@docverse-pdf/server 1.0.6 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,5 +1,7 @@
1
1
  export { PDFDocument } from './PDFDocument.js';
2
2
  export type { PageSize, TextBlock, SearchResult, AnnotationInfo, PageObjectInfo, FormField, Bookmark, RenderOptions, ImageInfo, TextObjectStyle, } from './PDFDocument.js';
3
3
  export { flattenXFDF, mergeXFDFIntoPDF } from './flattenXFDF.js';
4
- export { wordToPDF, LibreOfficePool } from './wordToPDF.js';
4
+ export { wordToPDF, LibreOfficePool, stopSharedUnoServer } from './wordToPDF.js';
5
5
  export type { ConvertOptions, PoolOptions } from './wordToPDF.js';
6
+ export { UnoServer, UnoServerPool } from './unoServer.js';
7
+ export type { UnoServerOptions, UnoServerPoolOptions, UnoServerStats, } from './unoServer.js';
package/dist/index.js CHANGED
@@ -1,3 +1,4 @@
1
1
  export { PDFDocument } from './PDFDocument.js';
2
2
  export { flattenXFDF, mergeXFDFIntoPDF } from './flattenXFDF.js';
3
- export { wordToPDF, LibreOfficePool } from './wordToPDF.js';
3
+ export { wordToPDF, LibreOfficePool, stopSharedUnoServer } from './wordToPDF.js';
4
+ export { UnoServer, UnoServerPool } from './unoServer.js';
@@ -0,0 +1,71 @@
1
+ export interface UnoServerOptions {
2
+ /** TCP port the unoserver daemon listens on (default: 2003). */
3
+ port?: number;
4
+ /** Host/interface the daemon binds to (default: 127.0.0.1). */
5
+ host?: string;
6
+ /** Path to the `unoserver` executable (default: auto-detect). */
7
+ unoserverPath?: string;
8
+ /** Path to the `unoconvert` client executable (default: auto-detect). */
9
+ unoconvertPath?: string;
10
+ /** Path to the LibreOffice `soffice` binary (passed to unoserver --executable). */
11
+ libreOfficePath?: string;
12
+ /** Dedicated UNO user-profile dir for this daemon (passed to unoserver --user-installation). */
13
+ userInstallation?: string;
14
+ /** Max time (ms) to wait for the daemon to become ready (default: 15000). */
15
+ startTimeout?: number;
16
+ /** Max time (ms) to wait for a single conversion (default: 30000). */
17
+ convertTimeout?: number;
18
+ }
19
+ export interface UnoServerPoolOptions extends Omit<UnoServerOptions, 'port' | 'userInstallation'> {
20
+ /** Number of parallel unoserver daemons (default: 2). */
21
+ workers?: number;
22
+ /** First TCP port; each worker gets basePort+i (default: 2003). */
23
+ basePort?: number;
24
+ /** Base dir for per-worker user-profile dirs (default: os.tmpdir()/docverse_unoserver). */
25
+ tempDir?: string;
26
+ }
27
+ export interface UnoServerStats {
28
+ workers: number;
29
+ ready: number;
30
+ busy: number;
31
+ queued: number;
32
+ }
33
+ /**
34
+ * Wraps a single `unoserver` daemon process and a matching `unoconvert` client.
35
+ *
36
+ * Requires the `unoserver` Python package on the host:
37
+ * pip install unoserver
38
+ *
39
+ * A single running daemon holds a warm LibreOffice instance, so conversions
40
+ * typically complete in 200–500 ms — roughly 10× faster than cold `soffice
41
+ * --convert-to` invocations.
42
+ */
43
+ export declare class UnoServer {
44
+ private proc;
45
+ private readonly opts;
46
+ private ready;
47
+ constructor(options?: UnoServerOptions);
48
+ get port(): number;
49
+ get host(): string;
50
+ isReady(): boolean;
51
+ start(): Promise<void>;
52
+ convert(input: Buffer | Uint8Array, format?: string): Promise<Buffer>;
53
+ stop(): Promise<void>;
54
+ }
55
+ /**
56
+ * A pool of warm `unoserver` daemons, each listening on its own port.
57
+ * Use this when you need concurrent conversions — jobs are dispatched
58
+ * to the first idle worker, otherwise queued FIFO.
59
+ */
60
+ export declare class UnoServerPool {
61
+ private workers;
62
+ private queue;
63
+ private readonly opts;
64
+ private running;
65
+ constructor(options?: UnoServerPoolOptions);
66
+ start(): Promise<void>;
67
+ convert(input: Buffer | Uint8Array, format?: string): Promise<Buffer>;
68
+ private dispatch;
69
+ stop(): Promise<void>;
70
+ getStats(): UnoServerStats;
71
+ }
@@ -0,0 +1,339 @@
1
+ import { spawn, execSync } from 'child_process';
2
+ import { createConnection } from 'net';
3
+ // ═══════════════════════════════════════════════
4
+ // HELPERS
5
+ // ═══════════════════════════════════════════════
6
+ function which(cmd) {
7
+ try {
8
+ const out = execSync(`command -v ${cmd}`, { stdio: ['ignore', 'pipe', 'ignore'] })
9
+ .toString()
10
+ .trim();
11
+ return out || null;
12
+ }
13
+ catch {
14
+ return null;
15
+ }
16
+ }
17
+ function findUnoserver() {
18
+ const candidates = [
19
+ 'unoserver',
20
+ '/usr/local/bin/unoserver',
21
+ '/usr/bin/unoserver',
22
+ '/opt/homebrew/bin/unoserver',
23
+ ];
24
+ for (const c of candidates) {
25
+ const resolved = c.startsWith('/') ? c : which(c);
26
+ if (resolved)
27
+ return resolved;
28
+ }
29
+ return 'unoserver';
30
+ }
31
+ function findUnoconvert() {
32
+ const candidates = [
33
+ 'unoconvert',
34
+ '/usr/local/bin/unoconvert',
35
+ '/usr/bin/unoconvert',
36
+ '/opt/homebrew/bin/unoconvert',
37
+ ];
38
+ for (const c of candidates) {
39
+ const resolved = c.startsWith('/') ? c : which(c);
40
+ if (resolved)
41
+ return resolved;
42
+ }
43
+ return 'unoconvert';
44
+ }
45
+ function findSoffice() {
46
+ const candidates = [
47
+ '/usr/bin/soffice',
48
+ '/usr/bin/libreoffice',
49
+ '/usr/local/bin/soffice',
50
+ '/Applications/LibreOffice.app/Contents/MacOS/soffice',
51
+ ];
52
+ for (const c of candidates) {
53
+ try {
54
+ execSync(`${c} --version`, { stdio: 'ignore' });
55
+ return c;
56
+ }
57
+ catch { }
58
+ }
59
+ return undefined;
60
+ }
61
+ async function probePort(host, port, timeoutMs = 500) {
62
+ return new Promise((resolve) => {
63
+ const sock = createConnection({ host, port });
64
+ const timer = setTimeout(() => {
65
+ sock.destroy();
66
+ resolve(false);
67
+ }, timeoutMs);
68
+ sock.once('connect', () => {
69
+ clearTimeout(timer);
70
+ sock.end();
71
+ resolve(true);
72
+ });
73
+ sock.once('error', () => {
74
+ clearTimeout(timer);
75
+ resolve(false);
76
+ });
77
+ });
78
+ }
79
+ async function waitForPort(host, port, deadline) {
80
+ while (Date.now() < deadline) {
81
+ if (await probePort(host, port))
82
+ return;
83
+ await new Promise((r) => setTimeout(r, 250));
84
+ }
85
+ throw new Error(`unoserver did not become ready on ${host}:${port} within the timeout`);
86
+ }
87
+ // ═══════════════════════════════════════════════
88
+ // SINGLE UNOSERVER DAEMON
89
+ // ═══════════════════════════════════════════════
90
+ /**
91
+ * Wraps a single `unoserver` daemon process and a matching `unoconvert` client.
92
+ *
93
+ * Requires the `unoserver` Python package on the host:
94
+ * pip install unoserver
95
+ *
96
+ * A single running daemon holds a warm LibreOffice instance, so conversions
97
+ * typically complete in 200–500 ms — roughly 10× faster than cold `soffice
98
+ * --convert-to` invocations.
99
+ */
100
+ export class UnoServer {
101
+ proc = null;
102
+ opts;
103
+ ready = false;
104
+ constructor(options = {}) {
105
+ this.opts = {
106
+ port: options.port ?? 2003,
107
+ host: options.host ?? '127.0.0.1',
108
+ unoserverPath: options.unoserverPath ?? findUnoserver(),
109
+ unoconvertPath: options.unoconvertPath ?? findUnoconvert(),
110
+ libreOfficePath: options.libreOfficePath ?? findSoffice(),
111
+ userInstallation: options.userInstallation,
112
+ startTimeout: options.startTimeout ?? 15000,
113
+ convertTimeout: options.convertTimeout ?? 30000,
114
+ };
115
+ }
116
+ get port() {
117
+ return this.opts.port;
118
+ }
119
+ get host() {
120
+ return this.opts.host;
121
+ }
122
+ isReady() {
123
+ return this.ready;
124
+ }
125
+ async start() {
126
+ if (this.ready)
127
+ return;
128
+ const args = ['--interface', this.opts.host, '--port', String(this.opts.port)];
129
+ if (this.opts.libreOfficePath) {
130
+ args.push('--executable', this.opts.libreOfficePath);
131
+ }
132
+ if (this.opts.userInstallation) {
133
+ args.push('--user-installation', this.opts.userInstallation);
134
+ }
135
+ const proc = spawn(this.opts.unoserverPath, args, {
136
+ stdio: ['ignore', 'pipe', 'pipe'],
137
+ detached: false,
138
+ });
139
+ proc.on('error', (err) => {
140
+ this.ready = false;
141
+ // Let the caller see the launch error via waitForPort timeout.
142
+ // Emit to stderr so it's debuggable without swallowing silently.
143
+ process.stderr.write(`[unoserver] spawn error: ${err.message}\n`);
144
+ });
145
+ proc.on('exit', () => {
146
+ this.ready = false;
147
+ this.proc = null;
148
+ });
149
+ this.proc = proc;
150
+ const deadline = Date.now() + this.opts.startTimeout;
151
+ try {
152
+ await waitForPort(this.opts.host, this.opts.port, deadline);
153
+ }
154
+ catch (err) {
155
+ await this.stop();
156
+ throw err;
157
+ }
158
+ this.ready = true;
159
+ }
160
+ async convert(input, format = 'pdf') {
161
+ if (!this.ready) {
162
+ throw new Error('UnoServer not started. Call .start() first.');
163
+ }
164
+ return new Promise((resolve, reject) => {
165
+ const args = [
166
+ '--host', this.opts.host,
167
+ '--port', String(this.opts.port),
168
+ '--convert-to', format,
169
+ '-', // input: stdin
170
+ '-', // output: stdout
171
+ ];
172
+ const client = spawn(this.opts.unoconvertPath, args, {
173
+ stdio: ['pipe', 'pipe', 'pipe'],
174
+ });
175
+ const chunks = [];
176
+ const errChunks = [];
177
+ let settled = false;
178
+ const timer = setTimeout(() => {
179
+ if (settled)
180
+ return;
181
+ settled = true;
182
+ try {
183
+ client.kill('SIGKILL');
184
+ }
185
+ catch { }
186
+ reject(new Error(`unoconvert timed out after ${this.opts.convertTimeout}ms`));
187
+ }, this.opts.convertTimeout);
188
+ client.stdout.on('data', (c) => chunks.push(c));
189
+ client.stderr.on('data', (c) => errChunks.push(c));
190
+ client.on('error', (err) => {
191
+ if (settled)
192
+ return;
193
+ settled = true;
194
+ clearTimeout(timer);
195
+ reject(err);
196
+ });
197
+ client.on('close', (code) => {
198
+ if (settled)
199
+ return;
200
+ settled = true;
201
+ clearTimeout(timer);
202
+ if (code === 0) {
203
+ resolve(Buffer.concat(chunks));
204
+ }
205
+ else {
206
+ const stderr = Buffer.concat(errChunks).toString('utf8').trim();
207
+ reject(new Error(`unoconvert exited with code ${code}: ${stderr}`));
208
+ }
209
+ });
210
+ client.stdin.on('error', (err) => {
211
+ if (settled)
212
+ return;
213
+ settled = true;
214
+ clearTimeout(timer);
215
+ reject(err);
216
+ });
217
+ client.stdin.end(Buffer.from(input));
218
+ });
219
+ }
220
+ async stop() {
221
+ this.ready = false;
222
+ const proc = this.proc;
223
+ this.proc = null;
224
+ if (!proc)
225
+ return;
226
+ try {
227
+ proc.kill('SIGTERM');
228
+ }
229
+ catch { }
230
+ // Give it a moment to exit cleanly, then force.
231
+ await new Promise((resolve) => {
232
+ const killTimer = setTimeout(() => {
233
+ try {
234
+ proc.kill('SIGKILL');
235
+ }
236
+ catch { }
237
+ resolve();
238
+ }, 2000);
239
+ proc.once('exit', () => {
240
+ clearTimeout(killTimer);
241
+ resolve();
242
+ });
243
+ });
244
+ }
245
+ }
246
+ /**
247
+ * A pool of warm `unoserver` daemons, each listening on its own port.
248
+ * Use this when you need concurrent conversions — jobs are dispatched
249
+ * to the first idle worker, otherwise queued FIFO.
250
+ */
251
+ export class UnoServerPool {
252
+ workers = [];
253
+ queue = [];
254
+ opts;
255
+ running = false;
256
+ constructor(options = {}) {
257
+ this.opts = {
258
+ workers: options.workers ?? 2,
259
+ basePort: options.basePort ?? 2003,
260
+ tempDir: options.tempDir ?? `${process.env.TMPDIR ?? '/tmp'}/docverse_unoserver`,
261
+ startTimeout: options.startTimeout ?? 15000,
262
+ convertTimeout: options.convertTimeout ?? 30000,
263
+ host: options.host,
264
+ unoserverPath: options.unoserverPath,
265
+ unoconvertPath: options.unoconvertPath,
266
+ libreOfficePath: options.libreOfficePath,
267
+ };
268
+ }
269
+ async start() {
270
+ if (this.running)
271
+ return;
272
+ const { mkdirSync, existsSync } = await import('fs');
273
+ if (!existsSync(this.opts.tempDir)) {
274
+ mkdirSync(this.opts.tempDir, { recursive: true });
275
+ }
276
+ const servers = [];
277
+ for (let i = 0; i < this.opts.workers; i++) {
278
+ const userInstallation = `file://${this.opts.tempDir}/worker_${i}`;
279
+ servers.push(new UnoServer({
280
+ port: this.opts.basePort + i,
281
+ host: this.opts.host,
282
+ unoserverPath: this.opts.unoserverPath,
283
+ unoconvertPath: this.opts.unoconvertPath,
284
+ libreOfficePath: this.opts.libreOfficePath,
285
+ userInstallation,
286
+ startTimeout: this.opts.startTimeout,
287
+ convertTimeout: this.opts.convertTimeout,
288
+ }));
289
+ }
290
+ await Promise.all(servers.map((s) => s.start()));
291
+ this.workers = servers.map((server) => ({ server, busy: false }));
292
+ this.running = true;
293
+ }
294
+ async convert(input, format = 'pdf') {
295
+ if (!this.running) {
296
+ throw new Error('UnoServerPool not started. Call .start() first.');
297
+ }
298
+ const buf = Buffer.from(input);
299
+ return new Promise((resolve, reject) => {
300
+ const idle = this.workers.find((w) => !w.busy);
301
+ if (idle) {
302
+ this.dispatch(idle, buf, format, resolve, reject);
303
+ }
304
+ else {
305
+ this.queue.push({ input: buf, format, resolve, reject });
306
+ }
307
+ });
308
+ }
309
+ dispatch(worker, buf, format, resolve, reject) {
310
+ worker.busy = true;
311
+ worker.server
312
+ .convert(buf, format)
313
+ .then(resolve, reject)
314
+ .finally(() => {
315
+ worker.busy = false;
316
+ const next = this.queue.shift();
317
+ if (next) {
318
+ this.dispatch(worker, next.input, next.format, next.resolve, next.reject);
319
+ }
320
+ });
321
+ }
322
+ async stop() {
323
+ this.running = false;
324
+ const pending = this.queue.splice(0);
325
+ for (const job of pending) {
326
+ job.reject(new Error('UnoServerPool stopped before job completed'));
327
+ }
328
+ await Promise.all(this.workers.map((w) => w.server.stop()));
329
+ this.workers = [];
330
+ }
331
+ getStats() {
332
+ return {
333
+ workers: this.workers.length,
334
+ ready: this.workers.filter((w) => w.server.isReady()).length,
335
+ busy: this.workers.filter((w) => w.busy).length,
336
+ queued: this.queue.length,
337
+ };
338
+ }
339
+ }
@@ -1,8 +1,21 @@
1
+ import { UnoServer, UnoServerPool } from './unoServer.js';
2
+ import type { UnoServerOptions } from './unoServer.js';
1
3
  export interface ConvertOptions {
2
4
  format?: string;
3
- mode?: 'process' | 'socket' | 'pool';
5
+ mode?: 'process' | 'socket' | 'pool' | 'unoserver';
4
6
  socketUrl?: string;
5
7
  timeout?: number;
8
+ /**
9
+ * When `mode: 'unoserver'`, reuse this instance instead of the module-level
10
+ * shared singleton. Pass a `UnoServer` for a single warm daemon, or a
11
+ * `UnoServerPool` for concurrent workers.
12
+ */
13
+ unoServer?: UnoServer | UnoServerPool;
14
+ /**
15
+ * When `mode: 'unoserver'` and no `unoServer` is supplied, these options
16
+ * are used to lazily construct the shared singleton on the first call.
17
+ */
18
+ unoServerOptions?: UnoServerOptions;
6
19
  }
7
20
  export interface PoolOptions {
8
21
  workers?: number;
@@ -12,6 +25,12 @@ export interface PoolOptions {
12
25
  convertTimeout?: number;
13
26
  }
14
27
  export declare function wordToPDF(wordBuffer: Buffer | Uint8Array, optionsOrFormat?: string | ConvertOptions): Promise<Buffer>;
28
+ /**
29
+ * Stops and clears the shared unoserver singleton created lazily by
30
+ * `wordToPDF(..., { mode: 'unoserver' })`. Safe to call even if nothing
31
+ * was started. Useful in graceful-shutdown hooks.
32
+ */
33
+ export declare function stopSharedUnoServer(): Promise<void>;
15
34
  export declare class LibreOfficePool {
16
35
  private workers;
17
36
  private queue;
package/dist/wordToPDF.js CHANGED
@@ -4,6 +4,7 @@ import { writeFileSync, readFileSync, unlinkSync, mkdirSync, existsSync } from '
4
4
  import { join } from 'path';
5
5
  import { tmpdir } from 'os';
6
6
  import { randomBytes } from 'crypto';
7
+ import { UnoServer } from './unoServer.js';
7
8
  // ═══════════════════════════════════════════════
8
9
  // SIMPLE CONVERT (backward compatible)
9
10
  // ═══════════════════════════════════════════════
@@ -14,12 +15,50 @@ export async function wordToPDF(wordBuffer, optionsOrFormat = 'pdf') {
14
15
  const format = opts.format ?? 'pdf';
15
16
  const mode = opts.mode ?? 'process';
16
17
  const timeout = opts.timeout ?? 30000;
18
+ if (mode === 'unoserver') {
19
+ return convertViaUnoServer(wordBuffer, format, opts);
20
+ }
17
21
  if (mode === 'socket' && opts.socketUrl) {
18
22
  return convertViaSocket(wordBuffer, format, opts.socketUrl, timeout);
19
23
  }
20
24
  return convertViaProcess(wordBuffer, format, timeout);
21
25
  }
22
26
  // ═══════════════════════════════════════════════
27
+ // UNOSERVER MODE (warm LibreOffice, 200–500ms)
28
+ // ═══════════════════════════════════════════════
29
+ let sharedUnoServer = null;
30
+ let sharedUnoServerStarting = null;
31
+ async function getSharedUnoServer(options) {
32
+ if (sharedUnoServer && sharedUnoServer.isReady())
33
+ return sharedUnoServer;
34
+ if (sharedUnoServerStarting)
35
+ return sharedUnoServerStarting;
36
+ sharedUnoServerStarting = (async () => {
37
+ const server = new UnoServer(options);
38
+ await server.start();
39
+ sharedUnoServer = server;
40
+ sharedUnoServerStarting = null;
41
+ return server;
42
+ })();
43
+ return sharedUnoServerStarting;
44
+ }
45
+ /**
46
+ * Stops and clears the shared unoserver singleton created lazily by
47
+ * `wordToPDF(..., { mode: 'unoserver' })`. Safe to call even if nothing
48
+ * was started. Useful in graceful-shutdown hooks.
49
+ */
50
+ export async function stopSharedUnoServer() {
51
+ const server = sharedUnoServer;
52
+ sharedUnoServer = null;
53
+ sharedUnoServerStarting = null;
54
+ if (server)
55
+ await server.stop();
56
+ }
57
+ async function convertViaUnoServer(wordBuffer, format, opts) {
58
+ const instance = opts.unoServer ?? (await getSharedUnoServer(opts.unoServerOptions));
59
+ return instance.convert(wordBuffer, format);
60
+ }
61
+ // ═══════════════════════════════════════════════
23
62
  // PROCESS MODE (original, 3-5s)
24
63
  // ═══════════════════════════════════════════════
25
64
  async function convertViaProcess(wordBuffer, format, timeout) {
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@docverse-pdf/server",
3
- "version": "1.0.6",
4
- "description": "DocVerse Server SDK — server-side PDF processing, XFDF merge, Word to PDF, image extraction",
3
+ "version": "1.1.0",
4
+ "description": "DocVerse Server SDK — server-side PDF processing, XFDF merge, Word to PDF (unoserver + LibreOffice), image extraction",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
7
7
  "types": "dist/index.d.ts",