@docverse-pdf/server 1.1.1 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/gotenberg.d.ts +71 -0
- package/dist/gotenberg.js +142 -0
- package/dist/index.d.ts +3 -3
- package/dist/index.js +2 -2
- package/dist/wordToPDF.d.ts +22 -14
- package/dist/wordToPDF.js +26 -31
- package/package.json +2 -2
- package/dist/unoServer.d.ts +0 -86
- package/dist/unoServer.js +0 -378
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
export interface GotenbergOptions {
|
|
2
|
+
/** Base URL of the Gotenberg service, e.g. `http://localhost:3000`. */
|
|
3
|
+
url?: string;
|
|
4
|
+
/** Per-request timeout in ms (default: 60_000). */
|
|
5
|
+
timeout?: number;
|
|
6
|
+
/** Optional Basic Auth credentials if Gotenberg is protected. */
|
|
7
|
+
basicAuth?: {
|
|
8
|
+
username: string;
|
|
9
|
+
password: string;
|
|
10
|
+
};
|
|
11
|
+
/** Extra headers to attach to every request (e.g. tracing headers). */
|
|
12
|
+
headers?: Record<string, string>;
|
|
13
|
+
}
|
|
14
|
+
export interface GotenbergConvertOptions {
|
|
15
|
+
/**
|
|
16
|
+
* Override the filename sent to Gotenberg. Gotenberg uses the file
|
|
17
|
+
* extension to decide which LibreOffice filter to apply, so make sure
|
|
18
|
+
* it matches the actual format (e.g. `input.docx`, `input.pptx`).
|
|
19
|
+
*/
|
|
20
|
+
filename?: string;
|
|
21
|
+
/** Produce a PDF/A compliant document. */
|
|
22
|
+
pdfa?: 'PDF/A-1a' | 'PDF/A-2b' | 'PDF/A-3b';
|
|
23
|
+
/** Produce a PDF/UA compliant document (accessibility). */
|
|
24
|
+
pdfua?: boolean;
|
|
25
|
+
/** Force landscape orientation. */
|
|
26
|
+
landscape?: boolean;
|
|
27
|
+
/** LibreOffice native page range, e.g. "1-3,5". */
|
|
28
|
+
nativePageRanges?: string;
|
|
29
|
+
/** PDF metadata (title, author, subject, keywords, creator, ...). */
|
|
30
|
+
metadata?: Record<string, string>;
|
|
31
|
+
/** Per-call timeout override (ms). Falls back to client-level timeout. */
|
|
32
|
+
timeout?: number;
|
|
33
|
+
}
|
|
34
|
+
export interface GotenbergFile {
|
|
35
|
+
buffer: Buffer | Uint8Array;
|
|
36
|
+
filename: string;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Thin HTTP client for the Gotenberg document conversion service.
|
|
40
|
+
*
|
|
41
|
+
* The client is stateless — there is no `start()` / `stop()`. Instances
|
|
42
|
+
* are cheap; reuse one across requests to benefit from Node's undici
|
|
43
|
+
* HTTP keep-alive pool.
|
|
44
|
+
*/
|
|
45
|
+
export declare class GotenbergClient {
|
|
46
|
+
private readonly url;
|
|
47
|
+
private readonly timeout;
|
|
48
|
+
private readonly basicAuth?;
|
|
49
|
+
private readonly extraHeaders;
|
|
50
|
+
constructor(options?: GotenbergOptions);
|
|
51
|
+
/** Returns the normalized base URL (trailing slash stripped). */
|
|
52
|
+
get baseUrl(): string;
|
|
53
|
+
/**
|
|
54
|
+
* Health check against Gotenberg's `/health` endpoint.
|
|
55
|
+
* Returns `true` on any 2xx, `false` otherwise (including network errors).
|
|
56
|
+
*/
|
|
57
|
+
ping(timeoutMs?: number): Promise<boolean>;
|
|
58
|
+
/**
|
|
59
|
+
* Convert a single document (DOCX, ODT, XLSX, PPTX, RTF, ...) to PDF via
|
|
60
|
+
* Gotenberg's LibreOffice route. Returns the PDF bytes.
|
|
61
|
+
*/
|
|
62
|
+
convert(input: Buffer | Uint8Array, filenameOrOptions?: string | GotenbergConvertOptions): Promise<Buffer>;
|
|
63
|
+
/**
|
|
64
|
+
* Convert multiple documents in a single request. By default Gotenberg
|
|
65
|
+
* concatenates them into one PDF (the `merge=true` form field). Pass
|
|
66
|
+
* `merge: false` via a future option if you ever need zipped output,
|
|
67
|
+
* but for now we always merge — the common case for batch conversion.
|
|
68
|
+
*/
|
|
69
|
+
convertMany(files: GotenbergFile[], options?: GotenbergConvertOptions): Promise<Buffer>;
|
|
70
|
+
private buildHeaders;
|
|
71
|
+
}
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
// ═══════════════════════════════════════════════
|
|
2
|
+
// GOTENBERG HTTP CLIENT
|
|
3
|
+
// ═══════════════════════════════════════════════
|
|
4
|
+
//
|
|
5
|
+
// Gotenberg (https://gotenberg.dev) is a stateless HTTP API that wraps
|
|
6
|
+
// LibreOffice and Chromium for document→PDF conversion. Running it as a
|
|
7
|
+
// sidecar container lets the server SDK stay a thin HTTP client — no
|
|
8
|
+
// local `soffice` process, no warm-pool management, no port juggling.
|
|
9
|
+
//
|
|
10
|
+
// Minimal docker-compose example:
|
|
11
|
+
// services:
|
|
12
|
+
// gotenberg:
|
|
13
|
+
// image: gotenberg/gotenberg:8
|
|
14
|
+
// ports: ["3000:3000"]
|
|
15
|
+
//
|
|
16
|
+
// Then on the Node side:
|
|
17
|
+
// const gotenberg = new GotenbergClient({ url: 'http://gotenberg:3000' });
|
|
18
|
+
// const pdf = await gotenberg.convert(docxBuffer, 'report.docx');
|
|
19
|
+
/**
|
|
20
|
+
* Thin HTTP client for the Gotenberg document conversion service.
|
|
21
|
+
*
|
|
22
|
+
* The client is stateless — there is no `start()` / `stop()`. Instances
|
|
23
|
+
* are cheap; reuse one across requests to benefit from Node's undici
|
|
24
|
+
* HTTP keep-alive pool.
|
|
25
|
+
*/
|
|
26
|
+
export class GotenbergClient {
|
|
27
|
+
url;
|
|
28
|
+
timeout;
|
|
29
|
+
basicAuth;
|
|
30
|
+
extraHeaders;
|
|
31
|
+
constructor(options = {}) {
|
|
32
|
+
const url = (options.url ?? 'http://localhost:3000').replace(/\/+$/, '');
|
|
33
|
+
this.url = url;
|
|
34
|
+
this.timeout = options.timeout ?? 60_000;
|
|
35
|
+
this.basicAuth = options.basicAuth;
|
|
36
|
+
this.extraHeaders = { ...(options.headers ?? {}) };
|
|
37
|
+
}
|
|
38
|
+
/** Returns the normalized base URL (trailing slash stripped). */
|
|
39
|
+
get baseUrl() {
|
|
40
|
+
return this.url;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Health check against Gotenberg's `/health` endpoint.
|
|
44
|
+
* Returns `true` on any 2xx, `false` otherwise (including network errors).
|
|
45
|
+
*/
|
|
46
|
+
async ping(timeoutMs = 2_000) {
|
|
47
|
+
try {
|
|
48
|
+
const controller = new AbortController();
|
|
49
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
50
|
+
try {
|
|
51
|
+
const res = await fetch(`${this.url}/health`, {
|
|
52
|
+
method: 'GET',
|
|
53
|
+
headers: this.buildHeaders(),
|
|
54
|
+
signal: controller.signal,
|
|
55
|
+
});
|
|
56
|
+
return res.ok;
|
|
57
|
+
}
|
|
58
|
+
finally {
|
|
59
|
+
clearTimeout(timer);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
catch {
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Convert a single document (DOCX, ODT, XLSX, PPTX, RTF, ...) to PDF via
|
|
68
|
+
* Gotenberg's LibreOffice route. Returns the PDF bytes.
|
|
69
|
+
*/
|
|
70
|
+
async convert(input, filenameOrOptions = 'input.docx') {
|
|
71
|
+
const opts = typeof filenameOrOptions === 'string'
|
|
72
|
+
? { filename: filenameOrOptions }
|
|
73
|
+
: filenameOrOptions;
|
|
74
|
+
return this.convertMany([{ buffer: input, filename: opts.filename ?? 'input.docx' }], opts);
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Convert multiple documents in a single request. By default Gotenberg
|
|
78
|
+
* concatenates them into one PDF (the `merge=true` form field). Pass
|
|
79
|
+
* `merge: false` via a future option if you ever need zipped output,
|
|
80
|
+
* but for now we always merge — the common case for batch conversion.
|
|
81
|
+
*/
|
|
82
|
+
async convertMany(files, options = {}) {
|
|
83
|
+
if (files.length === 0) {
|
|
84
|
+
throw new Error('GotenbergClient.convertMany: at least one file is required');
|
|
85
|
+
}
|
|
86
|
+
const form = new FormData();
|
|
87
|
+
for (const f of files) {
|
|
88
|
+
const bytes = Buffer.from(f.buffer);
|
|
89
|
+
// Wrap in a Blob so FormData streams the raw bytes with no extra
|
|
90
|
+
// text encoding. Gotenberg inspects the filename's extension to pick
|
|
91
|
+
// the LibreOffice filter, so it must match the real format.
|
|
92
|
+
form.append('files', new Blob([bytes]), f.filename);
|
|
93
|
+
}
|
|
94
|
+
if (options.pdfa)
|
|
95
|
+
form.append('pdfa', options.pdfa);
|
|
96
|
+
if (options.pdfua)
|
|
97
|
+
form.append('pdfua', 'true');
|
|
98
|
+
if (options.landscape)
|
|
99
|
+
form.append('landscape', 'true');
|
|
100
|
+
if (options.nativePageRanges)
|
|
101
|
+
form.append('nativePageRanges', options.nativePageRanges);
|
|
102
|
+
if (files.length > 1)
|
|
103
|
+
form.append('merge', 'true');
|
|
104
|
+
if (options.metadata && Object.keys(options.metadata).length > 0) {
|
|
105
|
+
form.append('metadata', JSON.stringify(options.metadata));
|
|
106
|
+
}
|
|
107
|
+
const timeout = options.timeout ?? this.timeout;
|
|
108
|
+
const controller = new AbortController();
|
|
109
|
+
const timer = setTimeout(() => controller.abort(), timeout);
|
|
110
|
+
try {
|
|
111
|
+
const res = await fetch(`${this.url}/forms/libreoffice/convert`, {
|
|
112
|
+
method: 'POST',
|
|
113
|
+
body: form,
|
|
114
|
+
headers: this.buildHeaders(),
|
|
115
|
+
signal: controller.signal,
|
|
116
|
+
});
|
|
117
|
+
if (!res.ok) {
|
|
118
|
+
const bodyText = await res.text().catch(() => '');
|
|
119
|
+
throw new Error(`Gotenberg returned ${res.status} ${res.statusText}: ${bodyText || '(empty body)'}`);
|
|
120
|
+
}
|
|
121
|
+
const arrayBuf = await res.arrayBuffer();
|
|
122
|
+
return Buffer.from(arrayBuf);
|
|
123
|
+
}
|
|
124
|
+
catch (err) {
|
|
125
|
+
if (err.name === 'AbortError') {
|
|
126
|
+
throw new Error(`Gotenberg request timed out after ${timeout}ms`);
|
|
127
|
+
}
|
|
128
|
+
throw err;
|
|
129
|
+
}
|
|
130
|
+
finally {
|
|
131
|
+
clearTimeout(timer);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
buildHeaders() {
|
|
135
|
+
const headers = { ...this.extraHeaders };
|
|
136
|
+
if (this.basicAuth) {
|
|
137
|
+
const token = Buffer.from(`${this.basicAuth.username}:${this.basicAuth.password}`).toString('base64');
|
|
138
|
+
headers['Authorization'] = `Basic ${token}`;
|
|
139
|
+
}
|
|
140
|
+
return headers;
|
|
141
|
+
}
|
|
142
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
export { PDFDocument } from './PDFDocument.js';
|
|
2
2
|
export type { PageSize, TextBlock, SearchResult, AnnotationInfo, PageObjectInfo, FormField, Bookmark, RenderOptions, ImageInfo, TextObjectStyle, } from './PDFDocument.js';
|
|
3
3
|
export { flattenXFDF, mergeXFDFIntoPDF } from './flattenXFDF.js';
|
|
4
|
-
export { wordToPDF, LibreOfficePool,
|
|
4
|
+
export { wordToPDF, LibreOfficePool, resetSharedGotenbergClient } from './wordToPDF.js';
|
|
5
5
|
export type { ConvertOptions, PoolOptions } from './wordToPDF.js';
|
|
6
|
-
export {
|
|
7
|
-
export type {
|
|
6
|
+
export { GotenbergClient } from './gotenberg.js';
|
|
7
|
+
export type { GotenbergOptions, GotenbergConvertOptions, GotenbergFile, } from './gotenberg.js';
|
package/dist/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
export { PDFDocument } from './PDFDocument.js';
|
|
2
2
|
export { flattenXFDF, mergeXFDFIntoPDF } from './flattenXFDF.js';
|
|
3
|
-
export { wordToPDF, LibreOfficePool,
|
|
4
|
-
export {
|
|
3
|
+
export { wordToPDF, LibreOfficePool, resetSharedGotenbergClient } from './wordToPDF.js';
|
|
4
|
+
export { GotenbergClient } from './gotenberg.js';
|
package/dist/wordToPDF.d.ts
CHANGED
|
@@ -1,21 +1,27 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import type {
|
|
1
|
+
import { GotenbergClient } from './gotenberg.js';
|
|
2
|
+
import type { GotenbergOptions, GotenbergConvertOptions } from './gotenberg.js';
|
|
3
3
|
export interface ConvertOptions {
|
|
4
4
|
format?: string;
|
|
5
|
-
mode?: 'process' | 'socket' | 'pool' | '
|
|
5
|
+
mode?: 'process' | 'socket' | 'pool' | 'gotenberg';
|
|
6
6
|
socketUrl?: string;
|
|
7
7
|
timeout?: number;
|
|
8
8
|
/**
|
|
9
|
-
* When `mode: '
|
|
10
|
-
* shared singleton.
|
|
11
|
-
*
|
|
9
|
+
* When `mode: 'gotenberg'`, reuse this client instead of the module-level
|
|
10
|
+
* shared singleton. Create one `GotenbergClient` at app boot and pass it
|
|
11
|
+
* here to benefit from HTTP keep-alive.
|
|
12
12
|
*/
|
|
13
|
-
|
|
13
|
+
gotenbergClient?: GotenbergClient;
|
|
14
14
|
/**
|
|
15
|
-
* When `mode: '
|
|
16
|
-
* are used to lazily construct the shared singleton on the first
|
|
15
|
+
* When `mode: 'gotenberg'` and no `gotenbergClient` is supplied, these
|
|
16
|
+
* options are used to lazily construct the shared singleton on the first
|
|
17
|
+
* call (e.g. `{ url: 'http://gotenberg:3000' }`).
|
|
17
18
|
*/
|
|
18
|
-
|
|
19
|
+
gotenbergOptions?: GotenbergOptions;
|
|
20
|
+
/**
|
|
21
|
+
* Extra per-conversion options forwarded to Gotenberg (pdfa, landscape,
|
|
22
|
+
* nativePageRanges, metadata, filename, ...).
|
|
23
|
+
*/
|
|
24
|
+
gotenbergConvertOptions?: GotenbergConvertOptions;
|
|
19
25
|
}
|
|
20
26
|
export interface PoolOptions {
|
|
21
27
|
workers?: number;
|
|
@@ -26,11 +32,13 @@ export interface PoolOptions {
|
|
|
26
32
|
}
|
|
27
33
|
export declare function wordToPDF(wordBuffer: Buffer | Uint8Array, optionsOrFormat?: string | ConvertOptions): Promise<Buffer>;
|
|
28
34
|
/**
|
|
29
|
-
*
|
|
30
|
-
* `wordToPDF(..., { mode: '
|
|
31
|
-
*
|
|
35
|
+
* Clears the module-level Gotenberg client created lazily by
|
|
36
|
+
* `wordToPDF(..., { mode: 'gotenberg' })`. The client holds no OS
|
|
37
|
+
* resources (only Node's HTTP keep-alive sockets, which undici cleans up
|
|
38
|
+
* automatically), so this is only needed if you want the next call to
|
|
39
|
+
* pick up fresh `gotenbergOptions`.
|
|
32
40
|
*/
|
|
33
|
-
export declare function
|
|
41
|
+
export declare function resetSharedGotenbergClient(): void;
|
|
34
42
|
export declare class LibreOfficePool {
|
|
35
43
|
private workers;
|
|
36
44
|
private queue;
|
package/dist/wordToPDF.js
CHANGED
|
@@ -4,7 +4,7 @@ import { writeFileSync, readFileSync, unlinkSync, mkdirSync, existsSync } from '
|
|
|
4
4
|
import { join } from 'path';
|
|
5
5
|
import { tmpdir } from 'os';
|
|
6
6
|
import { randomBytes } from 'crypto';
|
|
7
|
-
import {
|
|
7
|
+
import { GotenbergClient } from './gotenberg.js';
|
|
8
8
|
// ═══════════════════════════════════════════════
|
|
9
9
|
// SIMPLE CONVERT (backward compatible)
|
|
10
10
|
// ═══════════════════════════════════════════════
|
|
@@ -15,8 +15,8 @@ export async function wordToPDF(wordBuffer, optionsOrFormat = 'pdf') {
|
|
|
15
15
|
const format = opts.format ?? 'pdf';
|
|
16
16
|
const mode = opts.mode ?? 'process';
|
|
17
17
|
const timeout = opts.timeout ?? 30000;
|
|
18
|
-
if (mode === '
|
|
19
|
-
return
|
|
18
|
+
if (mode === 'gotenberg') {
|
|
19
|
+
return convertViaGotenberg(wordBuffer, opts);
|
|
20
20
|
}
|
|
21
21
|
if (mode === 'socket' && opts.socketUrl) {
|
|
22
22
|
return convertViaSocket(wordBuffer, format, opts.socketUrl, timeout);
|
|
@@ -24,39 +24,34 @@ export async function wordToPDF(wordBuffer, optionsOrFormat = 'pdf') {
|
|
|
24
24
|
return convertViaProcess(wordBuffer, format, timeout);
|
|
25
25
|
}
|
|
26
26
|
// ═══════════════════════════════════════════════
|
|
27
|
-
//
|
|
27
|
+
// GOTENBERG MODE (HTTP sidecar service, recommended)
|
|
28
28
|
// ═══════════════════════════════════════════════
|
|
29
|
-
let
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
return sharedUnoServerStarting;
|
|
36
|
-
sharedUnoServerStarting = (async () => {
|
|
37
|
-
const server = new UnoServer(options);
|
|
38
|
-
await server.start();
|
|
39
|
-
sharedUnoServer = server;
|
|
40
|
-
sharedUnoServerStarting = null;
|
|
41
|
-
return server;
|
|
42
|
-
})();
|
|
43
|
-
return sharedUnoServerStarting;
|
|
29
|
+
let sharedGotenbergClient = null;
|
|
30
|
+
function getSharedGotenbergClient(options) {
|
|
31
|
+
if (sharedGotenbergClient)
|
|
32
|
+
return sharedGotenbergClient;
|
|
33
|
+
sharedGotenbergClient = new GotenbergClient(options);
|
|
34
|
+
return sharedGotenbergClient;
|
|
44
35
|
}
|
|
45
36
|
/**
|
|
46
|
-
*
|
|
47
|
-
* `wordToPDF(..., { mode: '
|
|
48
|
-
*
|
|
37
|
+
* Clears the module-level Gotenberg client created lazily by
|
|
38
|
+
* `wordToPDF(..., { mode: 'gotenberg' })`. The client holds no OS
|
|
39
|
+
* resources (only Node's HTTP keep-alive sockets, which undici cleans up
|
|
40
|
+
* automatically), so this is only needed if you want the next call to
|
|
41
|
+
* pick up fresh `gotenbergOptions`.
|
|
49
42
|
*/
|
|
50
|
-
export
|
|
51
|
-
|
|
52
|
-
sharedUnoServer = null;
|
|
53
|
-
sharedUnoServerStarting = null;
|
|
54
|
-
if (server)
|
|
55
|
-
await server.stop();
|
|
43
|
+
export function resetSharedGotenbergClient() {
|
|
44
|
+
sharedGotenbergClient = null;
|
|
56
45
|
}
|
|
57
|
-
async function
|
|
58
|
-
const
|
|
59
|
-
|
|
46
|
+
async function convertViaGotenberg(wordBuffer, opts) {
|
|
47
|
+
const client = opts.gotenbergClient ?? getSharedGotenbergClient(opts.gotenbergOptions);
|
|
48
|
+
const convertOpts = {
|
|
49
|
+
...(opts.gotenbergConvertOptions ?? {}),
|
|
50
|
+
};
|
|
51
|
+
if (opts.timeout !== undefined && convertOpts.timeout === undefined) {
|
|
52
|
+
convertOpts.timeout = opts.timeout;
|
|
53
|
+
}
|
|
54
|
+
return client.convert(wordBuffer, convertOpts);
|
|
60
55
|
}
|
|
61
56
|
// ═══════════════════════════════════════════════
|
|
62
57
|
// PROCESS MODE (original, 3-5s)
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@docverse-pdf/server",
|
|
3
|
-
"version": "1.1.
|
|
4
|
-
"description": "DocVerse Server SDK — server-side PDF processing, XFDF merge, Word to PDF (
|
|
3
|
+
"version": "1.1.2",
|
|
4
|
+
"description": "DocVerse Server SDK — server-side PDF processing, XFDF merge, Word to PDF (Gotenberg + LibreOffice), image extraction",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
7
7
|
"types": "dist/index.d.ts",
|
package/dist/unoServer.d.ts
DELETED
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
export interface UnoServerOptions {
|
|
2
|
-
/** TCP port the unoserver daemon listens on for its own RPC (default: 2003). */
|
|
3
|
-
port?: number;
|
|
4
|
-
/**
|
|
5
|
-
* TCP port used by the underlying LibreOffice UNO socket. Must be unique per
|
|
6
|
-
* daemon — otherwise multiple daemons collide on the default 2002.
|
|
7
|
-
* (default: undefined = unoserver picks 2002, which only works for one daemon.)
|
|
8
|
-
*/
|
|
9
|
-
unoPort?: number;
|
|
10
|
-
/** Host/interface the daemon binds to (default: 127.0.0.1). */
|
|
11
|
-
host?: string;
|
|
12
|
-
/** Path to the `unoserver` executable (default: auto-detect). */
|
|
13
|
-
unoserverPath?: string;
|
|
14
|
-
/** Path to the `unoconvert` client executable (default: auto-detect). */
|
|
15
|
-
unoconvertPath?: string;
|
|
16
|
-
/** Path to the LibreOffice `soffice` binary (passed to unoserver --executable). */
|
|
17
|
-
libreOfficePath?: string;
|
|
18
|
-
/**
|
|
19
|
-
* Dedicated UNO user-profile dir for this daemon (passed to unoserver
|
|
20
|
-
* --user-installation). **Must be an absolute filesystem path, not a
|
|
21
|
-
* `file://` URI** — unoserver converts it internally via `Path(...).as_uri()`.
|
|
22
|
-
*/
|
|
23
|
-
userInstallation?: string;
|
|
24
|
-
/** Max time (ms) to wait for the daemon to become ready (default: 15000). */
|
|
25
|
-
startTimeout?: number;
|
|
26
|
-
/** Max time (ms) to wait for a single conversion (default: 30000). */
|
|
27
|
-
convertTimeout?: number;
|
|
28
|
-
}
|
|
29
|
-
export interface UnoServerPoolOptions extends Omit<UnoServerOptions, 'port' | 'unoPort' | 'userInstallation'> {
|
|
30
|
-
/** Number of parallel unoserver daemons (default: 2). */
|
|
31
|
-
workers?: number;
|
|
32
|
-
/** First daemon RPC port; each worker gets basePort+i (default: 2003). */
|
|
33
|
-
basePort?: number;
|
|
34
|
-
/**
|
|
35
|
-
* First LibreOffice UNO-socket port; each worker gets unoBasePort+i
|
|
36
|
-
* (default: 2100). Must be in a non-overlapping range with `basePort`.
|
|
37
|
-
*/
|
|
38
|
-
unoBasePort?: number;
|
|
39
|
-
/** Base dir for per-worker user-profile dirs (default: os.tmpdir()/docverse_unoserver). */
|
|
40
|
-
tempDir?: string;
|
|
41
|
-
}
|
|
42
|
-
export interface UnoServerStats {
|
|
43
|
-
workers: number;
|
|
44
|
-
ready: number;
|
|
45
|
-
busy: number;
|
|
46
|
-
queued: number;
|
|
47
|
-
}
|
|
48
|
-
/**
|
|
49
|
-
* Wraps a single `unoserver` daemon process and a matching `unoconvert` client.
|
|
50
|
-
*
|
|
51
|
-
* Requires the `unoserver` Python package on the host:
|
|
52
|
-
* pip install unoserver
|
|
53
|
-
*
|
|
54
|
-
* A single running daemon holds a warm LibreOffice instance, so conversions
|
|
55
|
-
* typically complete in 200–500 ms — roughly 10× faster than cold `soffice
|
|
56
|
-
* --convert-to` invocations.
|
|
57
|
-
*/
|
|
58
|
-
export declare class UnoServer {
|
|
59
|
-
private proc;
|
|
60
|
-
private readonly opts;
|
|
61
|
-
private ready;
|
|
62
|
-
constructor(options?: UnoServerOptions);
|
|
63
|
-
get port(): number;
|
|
64
|
-
get host(): string;
|
|
65
|
-
isReady(): boolean;
|
|
66
|
-
start(): Promise<void>;
|
|
67
|
-
convert(input: Buffer | Uint8Array, format?: string): Promise<Buffer>;
|
|
68
|
-
stop(): Promise<void>;
|
|
69
|
-
}
|
|
70
|
-
/**
|
|
71
|
-
* A pool of warm `unoserver` daemons, each listening on its own port.
|
|
72
|
-
* Use this when you need concurrent conversions — jobs are dispatched
|
|
73
|
-
* to the first idle worker, otherwise queued FIFO.
|
|
74
|
-
*/
|
|
75
|
-
export declare class UnoServerPool {
|
|
76
|
-
private workers;
|
|
77
|
-
private queue;
|
|
78
|
-
private readonly opts;
|
|
79
|
-
private running;
|
|
80
|
-
constructor(options?: UnoServerPoolOptions);
|
|
81
|
-
start(): Promise<void>;
|
|
82
|
-
convert(input: Buffer | Uint8Array, format?: string): Promise<Buffer>;
|
|
83
|
-
private dispatch;
|
|
84
|
-
stop(): Promise<void>;
|
|
85
|
-
getStats(): UnoServerStats;
|
|
86
|
-
}
|
package/dist/unoServer.js
DELETED
|
@@ -1,378 +0,0 @@
|
|
|
1
|
-
import { spawn, execSync } from 'child_process';
|
|
2
|
-
import { createConnection } from 'net';
|
|
3
|
-
import { resolve as resolvePath } from 'path';
|
|
4
|
-
// ═══════════════════════════════════════════════
|
|
5
|
-
// HELPERS
|
|
6
|
-
// ═══════════════════════════════════════════════
|
|
7
|
-
function which(cmd) {
|
|
8
|
-
try {
|
|
9
|
-
const out = execSync(`command -v ${cmd}`, { stdio: ['ignore', 'pipe', 'ignore'] })
|
|
10
|
-
.toString()
|
|
11
|
-
.trim();
|
|
12
|
-
return out || null;
|
|
13
|
-
}
|
|
14
|
-
catch {
|
|
15
|
-
return null;
|
|
16
|
-
}
|
|
17
|
-
}
|
|
18
|
-
function findUnoserver() {
|
|
19
|
-
const candidates = [
|
|
20
|
-
'unoserver',
|
|
21
|
-
'/usr/local/bin/unoserver',
|
|
22
|
-
'/usr/bin/unoserver',
|
|
23
|
-
'/opt/homebrew/bin/unoserver',
|
|
24
|
-
];
|
|
25
|
-
for (const c of candidates) {
|
|
26
|
-
const resolved = c.startsWith('/') ? c : which(c);
|
|
27
|
-
if (resolved)
|
|
28
|
-
return resolved;
|
|
29
|
-
}
|
|
30
|
-
return 'unoserver';
|
|
31
|
-
}
|
|
32
|
-
function findUnoconvert() {
|
|
33
|
-
const candidates = [
|
|
34
|
-
'unoconvert',
|
|
35
|
-
'/usr/local/bin/unoconvert',
|
|
36
|
-
'/usr/bin/unoconvert',
|
|
37
|
-
'/opt/homebrew/bin/unoconvert',
|
|
38
|
-
];
|
|
39
|
-
for (const c of candidates) {
|
|
40
|
-
const resolved = c.startsWith('/') ? c : which(c);
|
|
41
|
-
if (resolved)
|
|
42
|
-
return resolved;
|
|
43
|
-
}
|
|
44
|
-
return 'unoconvert';
|
|
45
|
-
}
|
|
46
|
-
function findSoffice() {
|
|
47
|
-
const candidates = [
|
|
48
|
-
'/usr/bin/soffice',
|
|
49
|
-
'/usr/bin/libreoffice',
|
|
50
|
-
'/usr/local/bin/soffice',
|
|
51
|
-
'/Applications/LibreOffice.app/Contents/MacOS/soffice',
|
|
52
|
-
];
|
|
53
|
-
for (const c of candidates) {
|
|
54
|
-
try {
|
|
55
|
-
execSync(`${c} --version`, { stdio: 'ignore' });
|
|
56
|
-
return c;
|
|
57
|
-
}
|
|
58
|
-
catch { }
|
|
59
|
-
}
|
|
60
|
-
return undefined;
|
|
61
|
-
}
|
|
62
|
-
async function probePort(host, port, timeoutMs = 500) {
|
|
63
|
-
return new Promise((resolve) => {
|
|
64
|
-
const sock = createConnection({ host, port });
|
|
65
|
-
const timer = setTimeout(() => {
|
|
66
|
-
sock.destroy();
|
|
67
|
-
resolve(false);
|
|
68
|
-
}, timeoutMs);
|
|
69
|
-
sock.once('connect', () => {
|
|
70
|
-
clearTimeout(timer);
|
|
71
|
-
sock.end();
|
|
72
|
-
resolve(true);
|
|
73
|
-
});
|
|
74
|
-
sock.once('error', () => {
|
|
75
|
-
clearTimeout(timer);
|
|
76
|
-
resolve(false);
|
|
77
|
-
});
|
|
78
|
-
});
|
|
79
|
-
}
|
|
80
|
-
async function waitForPort(host, port, deadline) {
|
|
81
|
-
while (Date.now() < deadline) {
|
|
82
|
-
if (await probePort(host, port))
|
|
83
|
-
return;
|
|
84
|
-
await new Promise((r) => setTimeout(r, 250));
|
|
85
|
-
}
|
|
86
|
-
throw new Error(`unoserver did not become ready on ${host}:${port} within the timeout`);
|
|
87
|
-
}
|
|
88
|
-
// ═══════════════════════════════════════════════
|
|
89
|
-
// SINGLE UNOSERVER DAEMON
|
|
90
|
-
// ═══════════════════════════════════════════════
|
|
91
|
-
/**
|
|
92
|
-
* Wraps a single `unoserver` daemon process and a matching `unoconvert` client.
|
|
93
|
-
*
|
|
94
|
-
* Requires the `unoserver` Python package on the host:
|
|
95
|
-
* pip install unoserver
|
|
96
|
-
*
|
|
97
|
-
* A single running daemon holds a warm LibreOffice instance, so conversions
|
|
98
|
-
* typically complete in 200–500 ms — roughly 10× faster than cold `soffice
|
|
99
|
-
* --convert-to` invocations.
|
|
100
|
-
*/
|
|
101
|
-
export class UnoServer {
|
|
102
|
-
proc = null;
|
|
103
|
-
opts;
|
|
104
|
-
ready = false;
|
|
105
|
-
constructor(options = {}) {
|
|
106
|
-
this.opts = {
|
|
107
|
-
port: options.port ?? 2003,
|
|
108
|
-
unoPort: options.unoPort,
|
|
109
|
-
host: options.host ?? '127.0.0.1',
|
|
110
|
-
unoserverPath: options.unoserverPath ?? findUnoserver(),
|
|
111
|
-
unoconvertPath: options.unoconvertPath ?? findUnoconvert(),
|
|
112
|
-
libreOfficePath: options.libreOfficePath ?? findSoffice(),
|
|
113
|
-
userInstallation: options.userInstallation
|
|
114
|
-
? resolvePath(options.userInstallation)
|
|
115
|
-
: undefined,
|
|
116
|
-
startTimeout: options.startTimeout ?? 15000,
|
|
117
|
-
convertTimeout: options.convertTimeout ?? 30000,
|
|
118
|
-
};
|
|
119
|
-
}
|
|
120
|
-
get port() {
|
|
121
|
-
return this.opts.port;
|
|
122
|
-
}
|
|
123
|
-
get host() {
|
|
124
|
-
return this.opts.host;
|
|
125
|
-
}
|
|
126
|
-
isReady() {
|
|
127
|
-
return this.ready;
|
|
128
|
-
}
|
|
129
|
-
async start() {
|
|
130
|
-
if (this.ready)
|
|
131
|
-
return;
|
|
132
|
-
const args = ['--interface', this.opts.host, '--port', String(this.opts.port)];
|
|
133
|
-
if (this.opts.unoPort !== undefined) {
|
|
134
|
-
args.push('--uno-port', String(this.opts.unoPort));
|
|
135
|
-
}
|
|
136
|
-
if (this.opts.libreOfficePath) {
|
|
137
|
-
args.push('--executable', this.opts.libreOfficePath);
|
|
138
|
-
}
|
|
139
|
-
if (this.opts.userInstallation) {
|
|
140
|
-
args.push('--user-installation', this.opts.userInstallation);
|
|
141
|
-
}
|
|
142
|
-
const proc = spawn(this.opts.unoserverPath, args, {
|
|
143
|
-
stdio: ['ignore', 'pipe', 'pipe'],
|
|
144
|
-
detached: false,
|
|
145
|
-
});
|
|
146
|
-
this.proc = proc;
|
|
147
|
-
// Capture stderr so startup failures surface the real reason, not just a
|
|
148
|
-
// generic "did not become ready" timeout.
|
|
149
|
-
const stderrChunks = [];
|
|
150
|
-
proc.stderr?.on('data', (c) => stderrChunks.push(c));
|
|
151
|
-
// Also drain stdout (unoserver is quiet here but we must consume it to
|
|
152
|
-
// avoid filling the pipe buffer on some platforms).
|
|
153
|
-
proc.stdout?.on('data', () => { });
|
|
154
|
-
const collectStderr = () => Buffer.concat(stderrChunks).toString('utf8').trim();
|
|
155
|
-
let earlyExit = null;
|
|
156
|
-
const earlyExitPromise = new Promise((_, reject) => {
|
|
157
|
-
proc.once('exit', (code, signal) => {
|
|
158
|
-
this.ready = false;
|
|
159
|
-
this.proc = null;
|
|
160
|
-
if (earlyExit === null) {
|
|
161
|
-
earlyExit = { code, signal };
|
|
162
|
-
reject(new Error(`unoserver exited during startup (code=${code}, signal=${signal})\n` +
|
|
163
|
-
`stderr:\n${collectStderr() || '(empty)'}`));
|
|
164
|
-
}
|
|
165
|
-
});
|
|
166
|
-
});
|
|
167
|
-
const spawnErrorPromise = new Promise((_, reject) => {
|
|
168
|
-
proc.once('error', (err) => {
|
|
169
|
-
this.ready = false;
|
|
170
|
-
reject(new Error(`unoserver spawn error: ${err.message}`));
|
|
171
|
-
});
|
|
172
|
-
});
|
|
173
|
-
const deadline = Date.now() + this.opts.startTimeout;
|
|
174
|
-
try {
|
|
175
|
-
await Promise.race([
|
|
176
|
-
waitForPort(this.opts.host, this.opts.port, deadline),
|
|
177
|
-
earlyExitPromise,
|
|
178
|
-
spawnErrorPromise,
|
|
179
|
-
]);
|
|
180
|
-
}
|
|
181
|
-
catch (err) {
|
|
182
|
-
await this.stop();
|
|
183
|
-
if (earlyExit !== null) {
|
|
184
|
-
// Early-exit error is already descriptive; rethrow as-is.
|
|
185
|
-
throw err;
|
|
186
|
-
}
|
|
187
|
-
// Timeout path — attach whatever stderr we collected for debugging.
|
|
188
|
-
const stderr = collectStderr();
|
|
189
|
-
const message = `unoserver did not become ready on ${this.opts.host}:${this.opts.port} ` +
|
|
190
|
-
`within ${this.opts.startTimeout}ms\nstderr:\n${stderr || '(empty)'}`;
|
|
191
|
-
throw new Error(message);
|
|
192
|
-
}
|
|
193
|
-
this.ready = true;
|
|
194
|
-
}
|
|
195
|
-
async convert(input, format = 'pdf') {
|
|
196
|
-
if (!this.ready) {
|
|
197
|
-
throw new Error('UnoServer not started. Call .start() first.');
|
|
198
|
-
}
|
|
199
|
-
return new Promise((resolve, reject) => {
|
|
200
|
-
const args = [
|
|
201
|
-
'--host', this.opts.host,
|
|
202
|
-
'--port', String(this.opts.port),
|
|
203
|
-
'--convert-to', format,
|
|
204
|
-
'-', // input: stdin
|
|
205
|
-
'-', // output: stdout
|
|
206
|
-
];
|
|
207
|
-
const client = spawn(this.opts.unoconvertPath, args, {
|
|
208
|
-
stdio: ['pipe', 'pipe', 'pipe'],
|
|
209
|
-
});
|
|
210
|
-
const chunks = [];
|
|
211
|
-
const errChunks = [];
|
|
212
|
-
let settled = false;
|
|
213
|
-
const timer = setTimeout(() => {
|
|
214
|
-
if (settled)
|
|
215
|
-
return;
|
|
216
|
-
settled = true;
|
|
217
|
-
try {
|
|
218
|
-
client.kill('SIGKILL');
|
|
219
|
-
}
|
|
220
|
-
catch { }
|
|
221
|
-
reject(new Error(`unoconvert timed out after ${this.opts.convertTimeout}ms`));
|
|
222
|
-
}, this.opts.convertTimeout);
|
|
223
|
-
client.stdout.on('data', (c) => chunks.push(c));
|
|
224
|
-
client.stderr.on('data', (c) => errChunks.push(c));
|
|
225
|
-
client.on('error', (err) => {
|
|
226
|
-
if (settled)
|
|
227
|
-
return;
|
|
228
|
-
settled = true;
|
|
229
|
-
clearTimeout(timer);
|
|
230
|
-
reject(err);
|
|
231
|
-
});
|
|
232
|
-
client.on('close', (code) => {
|
|
233
|
-
if (settled)
|
|
234
|
-
return;
|
|
235
|
-
settled = true;
|
|
236
|
-
clearTimeout(timer);
|
|
237
|
-
if (code === 0) {
|
|
238
|
-
resolve(Buffer.concat(chunks));
|
|
239
|
-
}
|
|
240
|
-
else {
|
|
241
|
-
const stderr = Buffer.concat(errChunks).toString('utf8').trim();
|
|
242
|
-
reject(new Error(`unoconvert exited with code ${code}: ${stderr}`));
|
|
243
|
-
}
|
|
244
|
-
});
|
|
245
|
-
client.stdin.on('error', (err) => {
|
|
246
|
-
if (settled)
|
|
247
|
-
return;
|
|
248
|
-
settled = true;
|
|
249
|
-
clearTimeout(timer);
|
|
250
|
-
reject(err);
|
|
251
|
-
});
|
|
252
|
-
client.stdin.end(Buffer.from(input));
|
|
253
|
-
});
|
|
254
|
-
}
|
|
255
|
-
async stop() {
|
|
256
|
-
this.ready = false;
|
|
257
|
-
const proc = this.proc;
|
|
258
|
-
this.proc = null;
|
|
259
|
-
if (!proc)
|
|
260
|
-
return;
|
|
261
|
-
try {
|
|
262
|
-
proc.kill('SIGTERM');
|
|
263
|
-
}
|
|
264
|
-
catch { }
|
|
265
|
-
// Give it a moment to exit cleanly, then force.
|
|
266
|
-
await new Promise((resolve) => {
|
|
267
|
-
const killTimer = setTimeout(() => {
|
|
268
|
-
try {
|
|
269
|
-
proc.kill('SIGKILL');
|
|
270
|
-
}
|
|
271
|
-
catch { }
|
|
272
|
-
resolve();
|
|
273
|
-
}, 2000);
|
|
274
|
-
proc.once('exit', () => {
|
|
275
|
-
clearTimeout(killTimer);
|
|
276
|
-
resolve();
|
|
277
|
-
});
|
|
278
|
-
});
|
|
279
|
-
}
|
|
280
|
-
}
|
|
281
|
-
/**
|
|
282
|
-
* A pool of warm `unoserver` daemons, each listening on its own port.
|
|
283
|
-
* Use this when you need concurrent conversions — jobs are dispatched
|
|
284
|
-
* to the first idle worker, otherwise queued FIFO.
|
|
285
|
-
*/
|
|
286
|
-
export class UnoServerPool {
|
|
287
|
-
workers = [];
|
|
288
|
-
queue = [];
|
|
289
|
-
opts;
|
|
290
|
-
running = false;
|
|
291
|
-
constructor(options = {}) {
|
|
292
|
-
this.opts = {
|
|
293
|
-
workers: options.workers ?? 2,
|
|
294
|
-
basePort: options.basePort ?? 2003,
|
|
295
|
-
unoBasePort: options.unoBasePort ?? 2100,
|
|
296
|
-
tempDir: options.tempDir ?? `${process.env.TMPDIR ?? '/tmp'}/docverse_unoserver`,
|
|
297
|
-
startTimeout: options.startTimeout ?? 15000,
|
|
298
|
-
convertTimeout: options.convertTimeout ?? 30000,
|
|
299
|
-
host: options.host,
|
|
300
|
-
unoserverPath: options.unoserverPath,
|
|
301
|
-
unoconvertPath: options.unoconvertPath,
|
|
302
|
-
libreOfficePath: options.libreOfficePath,
|
|
303
|
-
};
|
|
304
|
-
}
|
|
305
|
-
async start() {
|
|
306
|
-
if (this.running)
|
|
307
|
-
return;
|
|
308
|
-
const { mkdirSync, existsSync } = await import('fs');
|
|
309
|
-
if (!existsSync(this.opts.tempDir)) {
|
|
310
|
-
mkdirSync(this.opts.tempDir, { recursive: true });
|
|
311
|
-
}
|
|
312
|
-
const servers = [];
|
|
313
|
-
for (let i = 0; i < this.opts.workers; i++) {
|
|
314
|
-
// unoserver expects a raw absolute path for --user-installation; passing
|
|
315
|
-
// a `file://` URI trips its internal `Path(...).as_uri()` and crashes.
|
|
316
|
-
const userInstallation = resolvePath(this.opts.tempDir, `worker_${i}`);
|
|
317
|
-
servers.push(new UnoServer({
|
|
318
|
-
port: this.opts.basePort + i,
|
|
319
|
-
unoPort: this.opts.unoBasePort + i,
|
|
320
|
-
host: this.opts.host,
|
|
321
|
-
unoserverPath: this.opts.unoserverPath,
|
|
322
|
-
unoconvertPath: this.opts.unoconvertPath,
|
|
323
|
-
libreOfficePath: this.opts.libreOfficePath,
|
|
324
|
-
userInstallation,
|
|
325
|
-
startTimeout: this.opts.startTimeout,
|
|
326
|
-
convertTimeout: this.opts.convertTimeout,
|
|
327
|
-
}));
|
|
328
|
-
}
|
|
329
|
-
await Promise.all(servers.map((s) => s.start()));
|
|
330
|
-
this.workers = servers.map((server) => ({ server, busy: false }));
|
|
331
|
-
this.running = true;
|
|
332
|
-
}
|
|
333
|
-
async convert(input, format = 'pdf') {
|
|
334
|
-
if (!this.running) {
|
|
335
|
-
throw new Error('UnoServerPool not started. Call .start() first.');
|
|
336
|
-
}
|
|
337
|
-
const buf = Buffer.from(input);
|
|
338
|
-
return new Promise((resolve, reject) => {
|
|
339
|
-
const idle = this.workers.find((w) => !w.busy);
|
|
340
|
-
if (idle) {
|
|
341
|
-
this.dispatch(idle, buf, format, resolve, reject);
|
|
342
|
-
}
|
|
343
|
-
else {
|
|
344
|
-
this.queue.push({ input: buf, format, resolve, reject });
|
|
345
|
-
}
|
|
346
|
-
});
|
|
347
|
-
}
|
|
348
|
-
dispatch(worker, buf, format, resolve, reject) {
|
|
349
|
-
worker.busy = true;
|
|
350
|
-
worker.server
|
|
351
|
-
.convert(buf, format)
|
|
352
|
-
.then(resolve, reject)
|
|
353
|
-
.finally(() => {
|
|
354
|
-
worker.busy = false;
|
|
355
|
-
const next = this.queue.shift();
|
|
356
|
-
if (next) {
|
|
357
|
-
this.dispatch(worker, next.input, next.format, next.resolve, next.reject);
|
|
358
|
-
}
|
|
359
|
-
});
|
|
360
|
-
}
|
|
361
|
-
async stop() {
|
|
362
|
-
this.running = false;
|
|
363
|
-
const pending = this.queue.splice(0);
|
|
364
|
-
for (const job of pending) {
|
|
365
|
-
job.reject(new Error('UnoServerPool stopped before job completed'));
|
|
366
|
-
}
|
|
367
|
-
await Promise.all(this.workers.map((w) => w.server.stop()));
|
|
368
|
-
this.workers = [];
|
|
369
|
-
}
|
|
370
|
-
getStats() {
|
|
371
|
-
return {
|
|
372
|
-
workers: this.workers.length,
|
|
373
|
-
ready: this.workers.filter((w) => w.server.isReady()).length,
|
|
374
|
-
busy: this.workers.filter((w) => w.busy).length,
|
|
375
|
-
queued: this.queue.length,
|
|
376
|
-
};
|
|
377
|
-
}
|
|
378
|
-
}
|