@docverse-pdf/server 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  export { PDFDocument } from './PDFDocument.js';
2
2
  export type { PageSize, TextBlock, SearchResult, AnnotationInfo, PageObjectInfo, FormField, Bookmark, RenderOptions, ImageInfo, TextObjectStyle, } from './PDFDocument.js';
3
3
  export { flattenXFDF, mergeXFDFIntoPDF } from './flattenXFDF.js';
4
- export { wordToPDF } from './wordToPDF.js';
4
+ export { wordToPDF, LibreOfficePool } from './wordToPDF.js';
5
+ export type { ConvertOptions, PoolOptions } from './wordToPDF.js';
package/dist/index.js CHANGED
@@ -1,3 +1,3 @@
1
1
  export { PDFDocument } from './PDFDocument.js';
2
2
  export { flattenXFDF, mergeXFDFIntoPDF } from './flattenXFDF.js';
3
- export { wordToPDF } from './wordToPDF.js';
3
+ export { wordToPDF, LibreOfficePool } from './wordToPDF.js';
@@ -1,20 +1,34 @@
1
- /**
2
- * Convert a Word document (.docx/.doc) to PDF.
3
- *
4
- * Requires LibreOffice installed on the system.
5
- * Install: `apt-get install libreoffice-core` (Ubuntu) or `brew install libreoffice` (macOS)
6
- *
7
- * @param wordBuffer - The Word document as a Buffer
8
- * @param format - Output format (default: 'pdf')
9
- * @returns The PDF as a Buffer
10
- *
11
- * @example
12
- * ```ts
13
- * import { wordToPDF } from '@docverse/server';
14
- *
15
- * const docx = fs.readFileSync('document.docx');
16
- * const pdf = await wordToPDF(docx);
17
- * fs.writeFileSync('document.pdf', pdf);
18
- * ```
19
- */
20
- export declare function wordToPDF(wordBuffer: Buffer | Uint8Array, format?: string): Promise<Buffer>;
1
+ export interface ConvertOptions {
2
+ format?: string;
3
+ mode?: 'process' | 'socket' | 'pool';
4
+ socketUrl?: string;
5
+ timeout?: number;
6
+ }
7
+ export interface PoolOptions {
8
+ workers?: number;
9
+ libreOfficePath?: string;
10
+ tempDir?: string;
11
+ startTimeout?: number;
12
+ convertTimeout?: number;
13
+ }
14
+ export declare function wordToPDF(wordBuffer: Buffer | Uint8Array, optionsOrFormat?: string | ConvertOptions): Promise<Buffer>;
15
+ export declare class LibreOfficePool {
16
+ private workers;
17
+ private queue;
18
+ private opts;
19
+ private running;
20
+ private tempDir;
21
+ constructor(options?: PoolOptions);
22
+ private findLibreOffice;
23
+ start(): Promise<void>;
24
+ private waitForWorkers;
25
+ convert(wordBuffer: Buffer | Uint8Array, format?: string): Promise<Buffer>;
26
+ private executeConvert;
27
+ stop(): Promise<void>;
28
+ getStats(): {
29
+ workers: number;
30
+ busy: number;
31
+ queued: number;
32
+ ready: number;
33
+ };
34
+ }
package/dist/wordToPDF.js CHANGED
@@ -1,27 +1,260 @@
1
1
  import { promisify } from 'util';
2
- /**
3
- * Convert a Word document (.docx/.doc) to PDF.
4
- *
5
- * Requires LibreOffice installed on the system.
6
- * Install: `apt-get install libreoffice-core` (Ubuntu) or `brew install libreoffice` (macOS)
7
- *
8
- * @param wordBuffer - The Word document as a Buffer
9
- * @param format - Output format (default: 'pdf')
10
- * @returns The PDF as a Buffer
11
- *
12
- * @example
13
- * ```ts
14
- * import { wordToPDF } from '@docverse/server';
15
- *
16
- * const docx = fs.readFileSync('document.docx');
17
- * const pdf = await wordToPDF(docx);
18
- * fs.writeFileSync('document.pdf', pdf);
19
- * ```
20
- */
21
- export async function wordToPDF(wordBuffer, format = 'pdf') {
2
+ import { spawn, execSync } from 'child_process';
3
+ import { writeFileSync, readFileSync, unlinkSync, mkdirSync, existsSync } from 'fs';
4
+ import { join } from 'path';
5
+ import { tmpdir } from 'os';
6
+ import { randomBytes } from 'crypto';
7
+ // ═══════════════════════════════════════════════
8
+ // SIMPLE CONVERT (backward compatible)
9
+ // ═══════════════════════════════════════════════
10
+ export async function wordToPDF(wordBuffer, optionsOrFormat = 'pdf') {
11
+ const opts = typeof optionsOrFormat === 'string'
12
+ ? { format: optionsOrFormat }
13
+ : optionsOrFormat;
14
+ const format = opts.format ?? 'pdf';
15
+ const mode = opts.mode ?? 'process';
16
+ const timeout = opts.timeout ?? 30000;
17
+ if (mode === 'socket' && opts.socketUrl) {
18
+ return convertViaSocket(wordBuffer, format, opts.socketUrl, timeout);
19
+ }
20
+ return convertViaProcess(wordBuffer, format, timeout);
21
+ }
22
+ // ═══════════════════════════════════════════════
23
+ // PROCESS MODE (original, 3-5s)
24
+ // ═══════════════════════════════════════════════
25
+ async function convertViaProcess(wordBuffer, format, timeout) {
22
26
  const libre = await import('libreoffice-convert');
23
27
  const convert = promisify(libre.convert);
24
28
  const inputBuffer = Buffer.from(wordBuffer);
25
29
  const result = await convert(inputBuffer, format, undefined);
26
30
  return result;
27
31
  }
32
+ // ═══════════════════════════════════════════════
33
+ // SOCKET MODE (0.2-0.5s, needs running LO instance)
34
+ // ═══════════════════════════════════════════════
35
+ async function convertViaSocket(wordBuffer, format, socketUrl, timeout) {
36
+ const tmp = tmpdir();
37
+ const id = randomBytes(8).toString('hex');
38
+ const inputPath = join(tmp, `docverse_in_${id}.docx`);
39
+ const outputPath = join(tmp, `docverse_in_${id}.pdf`);
40
+ try {
41
+ writeFileSync(inputPath, Buffer.from(wordBuffer));
42
+ // Parse socket URL for connection string
43
+ const connStr = socketUrl.includes('socket,')
44
+ ? socketUrl
45
+ : `socket,host=127.0.0.1,port=${socketUrl};urp;`;
46
+ await new Promise((resolve, reject) => {
47
+ const timer = setTimeout(() => reject(new Error('Socket convert timeout')), timeout);
48
+ const proc = spawn('soffice', [
49
+ `--accept="${connStr}"`,
50
+ '--norestore',
51
+ '--headless',
52
+ '--convert-to', format,
53
+ '--outdir', tmp,
54
+ inputPath,
55
+ ], { shell: true, timeout });
56
+ proc.on('close', (code) => {
57
+ clearTimeout(timer);
58
+ if (code === 0)
59
+ resolve();
60
+ else
61
+ reject(new Error(`LibreOffice exited with code ${code}`));
62
+ });
63
+ proc.on('error', (err) => {
64
+ clearTimeout(timer);
65
+ reject(err);
66
+ });
67
+ });
68
+ if (!existsSync(outputPath)) {
69
+ throw new Error('Conversion failed: output file not created');
70
+ }
71
+ return readFileSync(outputPath);
72
+ }
73
+ finally {
74
+ try {
75
+ unlinkSync(inputPath);
76
+ }
77
+ catch { }
78
+ try {
79
+ unlinkSync(outputPath);
80
+ }
81
+ catch { }
82
+ }
83
+ }
84
+ // ═══════════════════════════════════════════════
85
+ // LIBREOFFICE POOL (fastest: warm instances)
86
+ // ═══════════════════════════════════════════════
87
+ export class LibreOfficePool {
88
+ workers = [];
89
+ queue = [];
90
+ opts;
91
+ running = false;
92
+ tempDir;
93
+ constructor(options = {}) {
94
+ this.opts = {
95
+ workers: options.workers ?? 2,
96
+ libreOfficePath: options.libreOfficePath ?? this.findLibreOffice(),
97
+ tempDir: options.tempDir ?? join(tmpdir(), 'docverse_pool'),
98
+ startTimeout: options.startTimeout ?? 15000,
99
+ convertTimeout: options.convertTimeout ?? 30000,
100
+ };
101
+ this.tempDir = this.opts.tempDir;
102
+ }
103
+ findLibreOffice() {
104
+ const paths = [
105
+ '/usr/bin/soffice',
106
+ '/usr/bin/libreoffice',
107
+ '/usr/local/bin/soffice',
108
+ '/Applications/LibreOffice.app/Contents/MacOS/soffice',
109
+ 'soffice',
110
+ ];
111
+ for (const p of paths) {
112
+ try {
113
+ execSync(`${p} --version`, { stdio: 'ignore' });
114
+ return p;
115
+ }
116
+ catch { }
117
+ }
118
+ return 'soffice';
119
+ }
120
+ async start() {
121
+ if (this.running)
122
+ return;
123
+ if (!existsSync(this.tempDir)) {
124
+ mkdirSync(this.tempDir, { recursive: true });
125
+ }
126
+ const basePort = 2002 + Math.floor(Math.random() * 1000);
127
+ for (let i = 0; i < this.opts.workers; i++) {
128
+ const port = basePort + i;
129
+ const userDir = join(this.tempDir, `worker_${i}`);
130
+ if (!existsSync(userDir))
131
+ mkdirSync(userDir, { recursive: true });
132
+ const proc = spawn(this.opts.libreOfficePath, [
133
+ '--headless',
134
+ '--norestore',
135
+ '--nologo',
136
+ '--nodefault',
137
+ `--accept=socket,host=127.0.0.1,port=${port};urp;`,
138
+ `-env:UserInstallation=file://${userDir}`,
139
+ ], {
140
+ stdio: 'ignore',
141
+ detached: false,
142
+ });
143
+ this.workers.push({ process: proc, port, busy: false, ready: false });
144
+ }
145
+ // Wait for workers to start
146
+ await this.waitForWorkers();
147
+ this.running = true;
148
+ }
149
+ async waitForWorkers() {
150
+ const deadline = Date.now() + this.opts.startTimeout;
151
+ for (const worker of this.workers) {
152
+ while (Date.now() < deadline) {
153
+ try {
154
+ // Test if port is listening
155
+ const net = await import('net');
156
+ await new Promise((resolve, reject) => {
157
+ const sock = net.createConnection(worker.port, '127.0.0.1');
158
+ sock.on('connect', () => { sock.destroy(); resolve(); });
159
+ sock.on('error', () => { reject(); });
160
+ setTimeout(() => { sock.destroy(); reject(); }, 500);
161
+ });
162
+ worker.ready = true;
163
+ break;
164
+ }
165
+ catch {
166
+ await new Promise(r => setTimeout(r, 300));
167
+ }
168
+ }
169
+ }
170
+ }
171
+ async convert(wordBuffer, format = 'pdf') {
172
+ if (!this.running)
173
+ throw new Error('Pool not started. Call pool.start() first.');
174
+ const buf = Buffer.from(wordBuffer);
175
+ return new Promise((resolve, reject) => {
176
+ // Find free worker
177
+ const worker = this.workers.find(w => !w.busy && w.ready);
178
+ if (worker) {
179
+ this.executeConvert(worker, buf, format, resolve, reject);
180
+ }
181
+ else {
182
+ // Queue the request
183
+ this.queue.push({ buffer: buf, format, resolve, reject });
184
+ }
185
+ });
186
+ }
187
+ async executeConvert(worker, buffer, format, resolve, reject) {
188
+ worker.busy = true;
189
+ const id = randomBytes(8).toString('hex');
190
+ const inputPath = join(this.tempDir, `in_${id}.docx`);
191
+ const outputPath = join(this.tempDir, `in_${id}.${format}`);
192
+ try {
193
+ writeFileSync(inputPath, buffer);
194
+ await new Promise((res, rej) => {
195
+ const timer = setTimeout(() => rej(new Error('Convert timeout')), this.opts.convertTimeout);
196
+ const proc = spawn(this.opts.libreOfficePath, [
197
+ '--headless',
198
+ '--norestore',
199
+ '--nologo',
200
+ '--convert-to', format,
201
+ '--outdir', this.tempDir,
202
+ inputPath,
203
+ ], { stdio: 'ignore', timeout: this.opts.convertTimeout });
204
+ proc.on('close', (code) => {
205
+ clearTimeout(timer);
206
+ if (code === 0)
207
+ res();
208
+ else
209
+ rej(new Error(`LibreOffice exit code ${code}`));
210
+ });
211
+ proc.on('error', (err) => {
212
+ clearTimeout(timer);
213
+ rej(err);
214
+ });
215
+ });
216
+ if (!existsSync(outputPath)) {
217
+ throw new Error('Output file not created');
218
+ }
219
+ resolve(readFileSync(outputPath));
220
+ }
221
+ catch (err) {
222
+ reject(err);
223
+ }
224
+ finally {
225
+ worker.busy = false;
226
+ try {
227
+ unlinkSync(inputPath);
228
+ }
229
+ catch { }
230
+ try {
231
+ unlinkSync(outputPath);
232
+ }
233
+ catch { }
234
+ // Process queue
235
+ if (this.queue.length > 0) {
236
+ const next = this.queue.shift();
237
+ this.executeConvert(worker, next.buffer, next.format, next.resolve, next.reject);
238
+ }
239
+ }
240
+ }
241
+ async stop() {
242
+ this.running = false;
243
+ for (const worker of this.workers) {
244
+ try {
245
+ worker.process.kill('SIGTERM');
246
+ }
247
+ catch { }
248
+ }
249
+ this.workers = [];
250
+ this.queue = [];
251
+ }
252
+ getStats() {
253
+ return {
254
+ workers: this.workers.length,
255
+ busy: this.workers.filter(w => w.busy).length,
256
+ queued: this.queue.length,
257
+ ready: this.workers.filter(w => w.ready).length,
258
+ };
259
+ }
260
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@docverse-pdf/server",
3
- "version": "1.0.0",
3
+ "version": "1.0.1",
4
4
  "description": "DocVerse Server SDK — server-side PDF processing, XFDF merge, Word to PDF, image extraction",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",