node-poppler 8.0.3 → 8.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.js CHANGED
@@ -1,7 +1,7 @@
1
1
  "use strict";
2
2
 
3
3
  const { execFile, spawn, spawnSync } = require("node:child_process");
4
- const { normalize, resolve: pathResolve } = require("node:path");
4
+ const { basename, normalize, resolve: pathResolve } = require("node:path");
5
5
  const { platform } = require("node:process");
6
6
  const { promisify } = require("node:util");
7
7
  const camelCase = require("camelcase");
@@ -9,6 +9,10 @@ const { lt } = require("semver");
9
9
 
10
10
  const execFileAsync = promisify(execFile);
11
11
 
12
+ /**
13
+ * @type {Readonly<Record<string, string>>}
14
+ * @ignore
15
+ */
12
16
  const ERROR_MSGS = {
13
17
  0: "No Error",
14
18
  1: "Error opening a PDF file",
@@ -36,6 +40,510 @@ const PDF_INFO_PATH_REG = /(.+)pdfinfo/u;
36
40
  * @typedef {Record<string, OptionDetails>} PopplerAcceptedOptions
37
41
  */
38
42
 
43
+ /**
44
+ * @typedef PdfAttachOptions
45
+ * @property {boolean} [printVersionInfo] Print copyright and version info.
46
+ * @property {boolean} [replace] Replace embedded file with same name (if it exists).
47
+ */
48
+
49
+ /**
50
+ * @typedef PdfDetachOptions
51
+ * @property {boolean} [listEmbedded] List all of the embedded files in the PDF file.
52
+ * File names are converted to the text encoding specified by `options.outputEncoding`.
53
+ * @property {string} [outputEncoding] Sets the encoding to use for text output.
54
+ * This defaults to `UTF-8`.
55
+ * @property {string} [ownerPassword] Owner password (for encrypted files).
56
+ * @property {string} [outputPath] Set the file name used when saving an embedded file with
57
+ * the save option enabled, or the directory if `options.saveall` is used.
58
+ * @property {boolean} [printVersionInfo] Print copyright and version info.
59
+ * @property {boolean} [saveAllFiles] Save all of the embedded files. This uses the file
60
+ * names associated with the embedded files (as printed by `options.listEmbedded`).
61
+ * By default, the files are saved in the current directory; this can be changed
62
+ * with `options.outputPath`.
63
+ * @property {string} [saveFile] Save the specified embedded file.
64
+ * By default, this uses the file name associated with the embedded file (as printed by
65
+ * `options.listEmbedded`); the file name can be changed with `options.outputPath`.
66
+ * @property {number} [saveSpecificFile] Save the specified embedded file.
67
+ * By default, this uses the file name associated with the embedded file (as printed by
68
+ * `options.listEmbedded`); the file name can be changed with `options.outputPath`.
69
+ * @property {string} [userPassword] User password (for encrypted files).
70
+ */
71
+
72
+ /**
73
+ * @typedef PdfFontsOptions
74
+ * @property {number} [firstPageToExamine] Specifies the first page to examine.
75
+ * @property {number} [lastPageToExamine] Specifies the last page to examine.
76
+ * @property {boolean} [listSubstitutes] List the substitute fonts that poppler
77
+ * will use for non-embedded fonts.
78
+ * @property {string} [ownerPassword] Owner password (for encrypted files).
79
+ * @property {boolean} [printVersionInfo] Print copyright and version info.
80
+ * @property {string} [userPassword] User password (for encrypted files).
81
+ */
82
+
83
+ /**
84
+ * @typedef PdfImagesOptions
85
+ * @property {boolean} [allFiles] Write JPEG, JPEG2000, JBIG2, and CCITT images in their native format.
86
+ * CMYK files are written as TIFF files. All other images are written as PNG files.
87
+ * @property {boolean} [ccittFile] Generate CCITT images as CCITT files.
88
+ * @property {number} [firstPageToConvert] Specifies the first page to convert.
89
+ * @property {number} [lastPageToConvert] Specifies the last page to convert.
90
+ * @property {boolean} [jbig2File] Generate JBIG2 images as JBIG2 files.
91
+ * @property {boolean} [jpeg2000File] Generate JPEG2000 images at JP2 files.
92
+ * @property {boolean} [jpegFile] Generate JPEG images as JPEG files.
93
+ * @property {boolean} [list] Instead of writing the images, list the
94
+ * images along with various information for each image.
95
+ * NOTE: Do not specify the outputPrefix with this option.
96
+ * @property {string} [ownerPassword] Owner password (for encrypted files).
97
+ * @property {boolean} [pngFile] Change the default output format to PNG.
98
+ * @property {boolean} [printVersionInfo] Print copyright and version info.
99
+ * @property {boolean} [tiffFile] Change the default output format to TIFF.
100
+ * @property {string} [userPassword] Specify the user password for the PDF file.
101
+ */
102
+
103
+ /**
104
+ * @typedef PdfInfoOptions
105
+ * @property {number} [firstPageToConvert] First page to print.
106
+ * @property {number} [lastPageToConvert] Last page to print.
107
+ * @property {boolean} [listEncodingOptions] List the available encodings.
108
+ * @property {string} [outputEncoding] Sets the encoding to use for text output.
109
+ * This defaults to `UTF-8`.
110
+ * @property {string} [ownerPassword] Owner password (for encrypted files).
111
+ * @property {boolean} [printAsJson] Print result as a JSON object.
112
+ * @property {boolean} [printBoundingBoxes] Prints the page box bounding boxes:
113
+ * MediaBox, CropBox, BleedBox, TrimBox, and ArtBox.
114
+ * @property {boolean} [printDocStruct] Prints the logical document structure
115
+ * of a Tagged-PDF file.
116
+ * @property {boolean} [printDocStructText] Print the textual content along with the
117
+ * document structure of a Tagged-PDF file. Note that extracting text this way might be slow
118
+ * for big PDF files.
119
+ * @property {boolean} [printIsoDates] Prints dates in ISO-8601 format (including the time zone).
120
+ * @property {boolean} [printJS] Prints all JavaScript in the PDF file.
121
+ * @property {boolean} [printMetadata] Prints document-level metadata. (This is the `Metadata`
122
+ * stream from the PDF file's Catalog object).
123
+ * @property {boolean} [printNamedDests] Print a list of all named destinations. If a page range
124
+ * is specified using the `options.firstPageToConvert` and `options.lastPageToConvert` options, only destinations
125
+ * in the page range are listed.
126
+ * @property {boolean} [printRawDates] Prints the raw (undecoded) date strings, directly from the PDF file.
127
+ * @property {boolean} [printUrls] Print all URLs in the PDF; only URLs referenced by PDF objects
128
+ * such as Link Annotations are listed, not URL strings in the text content.
129
+ * @property {boolean} [printVersionInfo] Print copyright and version info.
130
+ * @property {string} [userPassword] User password (for encrypted files).
131
+ */
132
+
133
+ /**
134
+ * @typedef PdfSeparateOptions
135
+ * @property {number} [firstPageToExtract] Specifies the first page to extract.
136
+ * This defaults to page 1.
137
+ * @property {number} [lastPageToExtract] Specifies the last page to extract.
138
+ * This defaults to the last page of the PDF file.
139
+ * @property {boolean} [printVersionInfo] Print copyright and version info.
140
+ */
141
+
142
+ /**
143
+ * @typedef PdfToCairoOptions
144
+ * @property {('best'|'default'|'fast'|'good'|'gray'|'none'|'subpixel')} [antialias] Set the cairo
145
+ * antialias option used for text and drawing in image files (or rasterized regions in vector output).
146
+ * @property {boolean} [cropBox] Uses the crop box rather than media box when
147
+ * generating the files (PNG/JPEG/TIFF only).
148
+ * @property {number} [cropHeight] Specifies the height of crop area in pixels
149
+ * (image output) or points (vector output).
150
+ * @property {number} [cropSize] Specifies the size of crop square in pixels
151
+ * (image output) or points (vector output).
152
+ * @property {number} [cropWidth] Specifies the width of crop area in pixels
153
+ * (image output) or points (vector output).
154
+ * @property {number} [cropXAxis] Specifies the x-coordinate of the crop area top left
155
+ * corner in pixels (image output) or points (vector output).
156
+ * @property {number} [cropYAxis] Specifies the y-coordinate of the crop area top left
157
+ * corner in pixels (image output) or points (vector output).
158
+ * @property {boolean} [duplex] Adds the %%IncludeFeature: *Duplex DuplexNoTumble DSC
159
+ * comment to the PostScript file (PS only). This tells the print manager to enable duplexing.
160
+ * @property {boolean} [epsFile] Generate an EPS file. An EPS file contains a single image,
161
+ * so if you use this option with a multi-page PDF file, you must use `options.firstPageToConvert` and
162
+ * `options.lastPageToConvert` to specify a single page.
163
+ * The page size options (originalPageSizes, paperSize, paperWidth, paperHeight) can not be used
164
+ * with this option.
165
+ * @property {boolean} [evenPagesOnly] Generates only the even numbered pages.
166
+ * @property {boolean} [fillPage] Expand PDF pages smaller than the paper to fill the
167
+ * paper (PS,PDF,SVG only). By default, these pages are not scaled.
168
+ * @property {number} [firstPageToConvert] Specifies the first page to convert.
169
+ * @property {boolean} [grayscaleFile] Generate grayscale file (PNG, JPEG, and TIFF only).
170
+ * @property {string} [iccFile] Use the specified ICC file as the output profile
171
+ * (PNG only). The profile will be embedded in the PNG file.
172
+ * @property {boolean} [jpegFile] Generate JPEG file(s).
173
+ * @property {string} [jpegOptions] When used with `options.jpegFile`, this option can
174
+ * be used to control the JPEG compression parameters. It takes a string of the form
175
+ * `"<opt>=<val>[,<opt>=<val>]"`. Currently available options are:
176
+ * - `quality` Selects the JPEG quality value. The value must be an integer between 0 and 100.
177
+ * - `progressive` Select progressive JPEG output. The possible values are "y", "n", indicating
178
+ * progressive (yes) or non-progressive (no), respectively.
179
+ * - `optimize` Sets whether to compute optimal Huffman coding tables for the JPEG output, which
180
+ * will create smaller files but make an extra pass over the data. The value must be "y" or "n",
181
+ * with "y" performing optimization, otherwise the default Huffman tables are used.
182
+ *
183
+ * Example: `"quality=95,optimize=y"`.
184
+ * @property {number} [lastPageToConvert] Specifies the last page to convert.
185
+ * @property {boolean} [monochromeFile] Generate monochrome file (PNG and TIFF only).
186
+ * @property {boolean} [noCenter] By default, PDF pages smaller than the paper
187
+ * (after any scaling) are centered on the paper. This option causes them to be aligned to
188
+ * the lower-left corner of the paper instead (PS,PDF,SVG only).
189
+ * @property {boolean} [noCrop] By default, printing output is cropped to the CropBox
190
+ * specified in the PDF file. This option disables cropping (PS, PDF, SVG only).
191
+ * @property {boolean} [noShrink] Do not scale PDF pages which are larger than the paper
192
+ * (PS,PDF,SVG only). By default, pages larger than the paper are shrunk to fit.
193
+ * @property {boolean} [oddPagesOnly] Generates only the odd numbered pages.
194
+ * @property {boolean} [originalPageSizes] Set the paper size of each page to match
195
+ * the size specified in the PDF file.
196
+ * @property {string} [ownerPassword] Specify the owner password for the PDF file.
197
+ * Providing this will bypass all security restrictions.
198
+ * @property {number} [paperHeight] Set the paper height, in points (PS, PDF, SVG only).
199
+ * @property {('A3'|'A4'|'legal'|'letter'|'match')} [paperSize] Set the paper size to one of `A3`, `A4`,
200
+ * `legal`, or `letter` (PS,PDF,SVG only). This can also be set to `match`, which will set the paper size
201
+ * of each page to match the size specified in the PDF file. If none of the paperSize,
202
+ * paperWidth, or paperHeight options are specified the default is to match the paper size.
203
+ * @property {number} [paperWidth] Set the paper width, in points (PS,PDF,SVG only).
204
+ * @property {boolean} [pdfFile] Generate PDF file.
205
+ * @property {boolean} [pngFile] Generate PNG file(s).
206
+ * @property {boolean} [printVersionInfo] Print copyright and version information.
207
+ * @property {boolean} [printDocStruct] If the input file contains structural information
208
+ * about the document's content, write this information to the output file (PDF only).
209
+ * @property {boolean} [psFile] Generate PS file.
210
+ * @property {boolean} [psLevel2] Generate Level 2 PostScript (PS only).
211
+ * @property {boolean} [psLevel3] Generate Level 3 PostScript (PS only). This enables all
212
+ * Level 2 features plus shading patterns and masked images. This is the default setting.
213
+ * @property {boolean} [quiet] Do not print any messages or errors.
214
+ * @property {number} [resolutionXAxis] Specifies the X resolution, in pixels per inch of
215
+ * image files (or rasterized regions in vector output). The default is 150 PPI.
216
+ * @property {number} [resolutionXYAxis] Specifies the X and Y resolution, in pixels per
217
+ * inch of image files (or rasterized regions in vector output). The default is 150 PPI.
218
+ * @property {number} [resolutionYAxis] Specifies the Y resolution, in pixels per inch of
219
+ * image files (or rasterized regions in vector output). The default is 150 PPI.
220
+ * @property {number} [scalePageTo] Scales the long side of each page (width for landscape
221
+ * pages, height for portrait pages) to fit in scale-to pixels. The size of the short side will
222
+ * be determined by the aspect ratio of the page (PNG/JPEG/TIFF only).
223
+ * @property {number} [scalePageToXAxis] Scales each page horizontally to fit in scale-to-x
224
+ * pixels. If scale-to-y is set to -1, the vertical size will determined by the aspect ratio of
225
+ * the page (PNG/JPEG/TIFF only).
226
+ * @property {number} [scalePageToYAxis] Scales each page vertically to fit in scale-to-y
227
+ * pixels. If scale-to-x is set to -1, the horizontal size will determined by the aspect ratio of
228
+ * the page (PNG/JPEG/TIFF only).
229
+ * @property {boolean} [singleFile] Writes only the first page and does not add digits.
230
+ * Can only be used with `options.jpegFile`, `options.pngFile`, and `options.tiffFile`.
231
+ * @property {boolean} [svgFile] Generate SVG (Scalable Vector Graphics) file.
232
+ * @property {('deflate'|'jpeg'|'lzw'|'none'|'packbits')} [tiffCompression] Set TIFF compression.
233
+ * @property {boolean} [tiffFile] Generate TIFF file(s).
234
+ * @property {boolean} [transparentPageColor] Use a transparent page color
235
+ * instead of white (PNG and TIFF only).
236
+ * @property {string} [userPassword] Specify the user password for the PDF file.
237
+ */
238
+
239
+ /**
240
+ * @typedef PdfToHtmlOptions
241
+ * @property {boolean} [complexOutput] Generate complex output.
242
+ * @property {boolean} [dataUrls] Use data URLs instead of external images in HTML.
243
+ * @property {boolean} [exchangePdfLinks] Exchange .pdf links with .html.
244
+ * @property {boolean} [extractHidden] Force hidden text extraction.
245
+ * @property {number} [firstPageToConvert] First page to print.
246
+ * @property {boolean} [fontFullName] Outputs the font name without any substitutions.
247
+ * @property {boolean} [ignoreImages] Ignore images.
248
+ * @property {('JPG'|'PNG')} [imageFormat] Image file format for Splash output (JPG or PNG).
249
+ * If complexOutput is selected, but imageFormat is not specified, PNG will be assumed.
250
+ * @property {number} [lastPageToConvert] Last page to print.
251
+ * @property {boolean} [noDrm] Override document DRM settings.
252
+ * @property {boolean} [noFrames] Generate no frames. Not supported in complex output mode.
253
+ * @property {boolean} [noMergeParagraph] Do not merge paragraphs.
254
+ * @property {boolean} [noRoundedCoordinates] Do not round coordinates
255
+ * (with XML output only).
256
+ * @property {string} [outputEncoding] Sets the encoding to use for text output.
257
+ * This defaults to `UTF-8`.
258
+ * @property {string} [ownerPassword] Owner password (for encrypted files).
259
+ * @property {boolean} [printVersionInfo] Print copyright and version info.
260
+ * @property {boolean} [quiet] Do not print any messages or errors.
261
+ * @property {boolean} [singlePage] Generate single HTML that includes all pages.
262
+ * @property {boolean} [stdout] Use standard output.
263
+ * @property {string} [userPassword] User password (for encrypted files).
264
+ * @property {number} [wordBreakThreshold] Adjust the word break threshold percent.
265
+ * Default is 10. Word break occurs when distance between two adjacent characters is greater
266
+ * than this percent of character height.
267
+ * @property {boolean} [xmlOutput] Output for XML post-processing.
268
+ * @property {number} [zoom] Zoom the PDF document (default 1.5).
269
+ */
270
+
271
+ /**
272
+ * @typedef PdfToPpmOptions
273
+ * @property {('no'|'yes')} [antialiasFonts] Enable or disable font anti-aliasing.
274
+ * This defaults to `yes`.
275
+ * @property {('no'|'yes')} [antialiasVectors] Enable or disable vector anti-aliasing.
276
+ * This defaults to `yes`.
277
+ * @property {boolean} [cropBox] Uses the crop box rather than media box when
278
+ * generating the files (PNG/JPEG/TIFF only).
279
+ * @property {number} [cropHeight] Specifies the height of crop area in pixels
280
+ * (image output) or points (vector output).
281
+ * @property {number} [cropSize] Specifies the size of crop square in pixels
282
+ * (image output) or points (vector output).
283
+ * @property {number} [cropWidth] Specifies the width of crop area in pixels
284
+ * (image output) or points (vector output).
285
+ * @property {number} [cropXAxis] Specifies the x-coordinate of the crop area top left
286
+ * corner in pixels (image output) or points (vector output).
287
+ * @property {number} [cropYAxis] Specifies the y-coordinate of the crop area top left
288
+ * corner in pixels (image output) or points (vector output).
289
+ * @property {string} [defaultCmykProfile] If Poppler is compiled with colour management support, this option
290
+ * sets the DefaultCMYK color space to the ICC profile stored in the display profile file passed.
291
+ * @property {string} [defaultGrayProfile] If Poppler is compiled with colour management support, this option
292
+ * sets the DefaultGray color space to the ICC profile stored in the display profile file passed.
293
+ * @property {string} [defaultRgbProfile] If Poppler is compiled with colour management support, this option
294
+ * sets the DefaultRGB color space to the ICC profile stored in the display profile file passed.
295
+ * @property {string} [displayProfile] If Poppler is compiled with colour management support, this option
296
+ * sets the display profile to the ICC profile stored in the display profile file passed.
297
+ * @property {boolean} [evenPagesOnly] Generates only the even numbered pages.
298
+ * @property {number} [firstPageToConvert] Specifies the first page to convert.
299
+ * @property {('no'|'yes')} [freetype] Enable or disable FreeType (a TrueType / Type 1 font rasterizer).
300
+ * This defaults to `yes`.
301
+ * @property {boolean} [forcePageNumber] Force page number even if there is only one page.
302
+ * @property {boolean} [grayscaleFile] Generate grayscale PGM file (instead of a color PPM file).
303
+ * @property {boolean} [hideAnnotations] Hide annotations.
304
+ * @property {boolean} [jpegFile] Generate JPEG file instead a PPM file.
305
+ * @property {number} [lastPageToConvert] Specifies the last page to convert.
306
+ * @property {boolean} [monochromeFile] Generate monochrome PBM file (instead of a color PPM file).
307
+ * @property {boolean} [oddPagesOnly] Generates only the odd numbered pages.
308
+ * @property {string} [ownerPassword] Specify the owner password for the PDF file.
309
+ * Providing this will bypass all security restrictions.
310
+ * @property {boolean} [pngFile] Generate PNG file instead a PPM file.
311
+ * @property {boolean} [printProgress] Print progress info as each page is generated.
312
+ * Three space-separated fields are printed to STDERR: the number of the current page, the number
313
+ * of the last page that will be generated, and the path to the file written to.
314
+ * @property {boolean} [printVersionInfo] Print copyright and version information.
315
+ * @property {boolean} [quiet] Do not print any messages or errors.
316
+ * @property {number} [resolutionXAxis] Specifies the X resolution, in pixels per inch of
317
+ * image files (or rasterized regions in vector output). The default is 150 PPI.
318
+ * @property {number} [resolutionXYAxis] Specifies the X and Y resolution, in pixels per
319
+ * inch of image files (or rasterized regions in vector output). The default is 150 PPI.
320
+ * @property {number} [resolutionYAxis] Specifies the Y resolution, in pixels per inch of
321
+ * image files (or rasterized regions in vector output). The default is 150 PPI.
322
+ * @property {number} [scalePageTo] Scales the long side of each page (width for landscape
323
+ * pages, height for portrait pages) to fit in scale-to pixels. The size of the short side will
324
+ * be determined by the aspect ratio of the page.
325
+ * @property {number} [scalePageToXAxis] Scales each page horizontally to fit in scale-to-x
326
+ * pixels. If scale-to-y is set to -1, the vertical size will determined by the aspect ratio of
327
+ * the page.
328
+ * @property {number} [scalePageToYAxis] Scales each page vertically to fit in scale-to-y
329
+ * pixels. If scale-to-x is set to -1, the horizontal size will determined by the aspect ratio of
330
+ * the page.
331
+ * @property {string} [separator] Specify single character separator between name and page number.
332
+ * @property {boolean} [singleFile] Writes only the first page and does not add digits.
333
+ * @property {('none'|'shape'|'solid')} [thinLineMode] Specifies the thin line mode. This defaults to `none`.
334
+ * @property {('deflate'|'jpeg'|'lzw'|'none'|'packbits')} [tiffCompression] Set TIFF compression.
335
+ * @property {boolean} [tiffFile] Generate TIFF file instead a PPM file.
336
+ * @property {string} [userPassword] Specify the user password for the PDF file.
337
+ */
338
+
339
+ /**
340
+ * @typedef PdfToPsOptions
341
+ * @property {('no'|'yes')} [antialias] Enable anti-aliasing on rasterization, accepts `no` or `yes`.
342
+ * @property {boolean} [binary] Write binary data in Level 1 PostScript. By default,
343
+ * pdftops writes hex-encoded data in Level 1 PostScript. Binary data is non-standard in Level 1
344
+ * PostScript but reduces the file size and can be useful when Level 1 PostScript is required
345
+ * only for its restricted use of PostScript operators.
346
+ * @property {string} [defaultCmykProfile] If Poppler is compiled with colour management support, this option
347
+ * sets the DefaultCMYK color space to the ICC profile stored in the display profile file passed.
348
+ * @property {string} [defaultGrayProfile] If Poppler is compiled with colour management support, this option
349
+ * sets the DefaultGray color space to the ICC profile stored in the display profile file passed.
350
+ * @property {string} [defaultRgbProfile] If Poppler is compiled with colour management support, this option
351
+ * sets the DefaultRGB color space to the ICC profile stored in the display profile file passed.
352
+ * @property {boolean} [duplex] Set the Duplex pagedevice entry in the PostScript file.
353
+ * This tells duplex-capable printers to enable duplexing.
354
+ * @property {boolean} [epsFile] Generate an EPS file. An EPS file contains a single image,
355
+ * so if you use this option with a multi-page PDF file, you must use `options.firstPageToConvert` and
356
+ * `options.lastPageToConvert` to specify a single page.
357
+ * The page size options (originalPageSizes, paperSize, paperWidth, paperHeight) can not be used
358
+ * with this option.
359
+ * @property {boolean} [fillPage] Expand PDF pages smaller than the paper to fill the
360
+ * paper. By default, these pages are not scaled.
361
+ * @property {number} [firstPageToConvert] Specifies the first page to convert.
362
+ * @property {number} [form] Generate PostScript form which can be imported by software
363
+ * that understands forms.
364
+ * A form contains a single page, so if you use this option with a multi-page PDF file,
365
+ * you must use `options.firstPageToConvert` and `options.lastPageToConvert` to specify a single page.
366
+ * The `options.level1` option cannot be used with `options.form`.
367
+ * No more than one of the mode options (`options.epsFile`, `options.form`) may be given.
368
+ * @property {number} [lastPageToConvert] Specifies the last page to convert.
369
+ * @property {boolean} [level1] Generate Level 1 PostScript. The resulting PostScript
370
+ * files will be significantly larger (if they contain images), but will print on Level 1 printers.
371
+ * This also converts all images to black and white.
372
+ * @property {boolean} [level1Sep] Generate Level 1 separable PostScript.
373
+ * All colors are converted to CMYK. Images are written with separate stream data for the four components.
374
+ * @property {boolean} [level2] Generate Level 2 PostScript.
375
+ * Level 2 supports color images and image compression. This is the default setting.
376
+ * @property {boolean} [level2Sep] Generate Level 2 separable PostScript. All colors are
377
+ * converted to CMYK. The PostScript separation convention operators are used to handle custom (spot) colors.
378
+ * @property {boolean} [level3] Generate Level 3 PostScript.
379
+ * This enables all Level 2 features plus CID font embedding.
380
+ * @property {boolean} [level3Sep] Generate Level 3 separable PostScript.
381
+ * The separation handling is the same as for `options.level2Sep`.
382
+ * @property {boolean} [noCenter] By default, PDF pages smaller than the paper
383
+ * (after any scaling) are centered on the paper. This option causes them to be aligned to
384
+ * the lower-left corner of the paper instead.
385
+ * @property {boolean} [noCrop] By default, printing output is cropped to the CropBox
386
+ * specified in the PDF file. This option disables cropping.
387
+ * @property {boolean} [noEmbedCIDFonts] By default, any CID PostScript fonts which are
388
+ * embedded in the PDF file are copied into the PostScript file. This option disables that embedding.
389
+ * No attempt is made to substitute for non-embedded CID PostScript fonts.
390
+ * @property {boolean} [noEmbedCIDTrueTypeFonts] By default, any CID TrueType fonts which are
391
+ * embedded in the PDF file are copied into the PostScript file. This option disables that embedding.
392
+ * No attempt is made to substitute for non-embedded CID TrueType fonts.
393
+ * @property {boolean} [noEmbedTrueTypeFonts] By default, any TrueType fonts which are embedded
394
+ * in the PDF file are copied into the PostScript file. This option causes pdfToPs to substitute base fonts instead.
395
+ * Embedded fonts make PostScript files larger, but may be necessary for readable output.
396
+ * Also, some PostScript interpreters do not have TrueType rasterizers.
397
+ * @property {boolean} [noEmbedType1Fonts] By default, any Type 1 fonts which are embedded in the PDF file
398
+ * are copied into the PostScript file. This option causes pdfToPs to substitute base fonts instead.
399
+ * Embedded fonts make PostScript files larger, but may be necessary for readable output.
400
+ * @property {boolean} [noShrink] Do not scale PDF pages which are larger than the paper.
401
+ * By default, pages larger than the paper are shrunk to fit.
402
+ * @property {boolean} [opi] Generate OPI comments for all images and forms which have OPI information.
403
+ * @property {boolean} [optimizecolorspace] By default, bitmap images in the PDF pass through to the
404
+ * output PostScript in their original color space, which produces predictable results.
405
+ * This option converts RGB and CMYK images into Gray images if every pixel of the image has equal components.
406
+ * This can fix problems when doing color separations of PDFs that contain embedded black and
407
+ * white images encoded as RGB.
408
+ * @property {boolean} [originalPageSizes] Set the paper size of each page to match
409
+ * the size specified in the PDF file.
410
+ * @property {boolean} [overprint] Enable overprinting.
411
+ * @property {string} [ownerPassword] Owner password (for encrypted files).
412
+ * @property {number} [paperHeight] Set the paper height, in points.
413
+ * @property {('A3'|'A4'|'legal'|'letter'|'match')} [paperSize] Set the paper size to one of `A3`, `A4`,
414
+ * `legal`, or `letter`. This can also be set to `match`, which will set the paper size
415
+ * of each page to match the size specified in the PDF file. If none of the paperSize,
416
+ * paperWidth, or paperHeight options are specified the default is to match the paper size.
417
+ * @property {number} [paperWidth] Set the paper width, in points.
418
+ * @property {boolean} [passfonts] By default, references to non-embedded 8-bit fonts
419
+ * in the PDF file are substituted with the closest `Helvetica`, `Times-Roman`, or `Courier` font.
420
+ * This option passes references to non-embedded fonts through to the PostScript file.
421
+ * @property {boolean} [preload] Preload images and forms.
422
+ * @property {boolean} [printVersionInfo] Print copyright and version information.
423
+ * @property {('CMYK8'|'MONO8'|'RGB8')} [processColorFormat] Sets the process color format as it is used
424
+ * during rasterization and transparency reduction.
425
+ *
426
+ * The default depends on the other settings: For `options.level1` the default is MONO8; for `options.level1Sep`,
427
+ * `options.level2Sep`, `options.level3Sep`, or `options.overprint` the default is CMYK8; in all other
428
+ * cases RGB8 is the default.
429
+ * If `option.processColorProfile` is set then `options.processColorFormat` is inferred from the specified ICC profile.
430
+ * @property {string} [processColorProfile] Sets the ICC profile that is assumed during
431
+ * rasterization and transparency reduction.
432
+ * @property {boolean} [quiet] Do not print any messages or errors.
433
+ * @property {('always'|'never'|'whenneeded')} [rasterize] By default, pdfToPs rasterizes pages as needed,
434
+ * for example, if they contain transparencies. To force rasterization, set `rasterize` to `always`.
435
+ * Use this to eliminate fonts.
436
+ * To prevent rasterization, set `rasterize` to `never`.
437
+ * This may produce files that display incorrectly.
438
+ * @property {number} [resolutionXYAxis] Specifies the X and Y resolution, in pixels per
439
+ * inch of image files (or rasterized regions in vector output). The default is 300 PPI.
440
+ * @property {string} [userPassword] User password (for encrypted files).
441
+ */
442
+
443
+ /**
444
+ * @typedef PdfToTextOptions
445
+ * @property {boolean} [boundingBoxXhtml] Generate an XHTML file containing bounding
446
+ * box information for each word in the file.
447
+ * @property {boolean} [boundingBoxXhtmlLayout] Generate an XHTML file containing
448
+ * bounding box information for each block, line, and word in the file.
449
+ * @property {boolean} [cropBox] Use the crop box rather than the media box with
450
+ * `options.boundingBoxXhtml` and `options.boundingBoxXhtmlLayout`.
451
+ * @property {number} [cropHeight] Specifies the height of crop area in pixels
452
+ * (image output) or points (vector output).
453
+ * @property {number} [cropWidth] Specifies the width of crop area in pixels
454
+ * (image output) or points (vector output).
455
+ * @property {number} [cropXAxis] Specifies the x-coordinate of the crop area top left
456
+ * corner in pixels (image output) or points (vector output).
457
+ * @property {number} [cropYAxis] Specifies the y-coordinate of the crop area top left
458
+ * corner in pixels (image output) or points (vector output).
459
+ * @property {('dos'|'mac'|'unix')} [eolConvention] Sets the end-of-line convention to use for
460
+ * text output: dos; mac; unix.
461
+ * @property {number} [firstPageToConvert] Specifies the first page to convert.
462
+ * @property {number} [fixedWidthLayout] Assume fixed-pitch (or tabular) text, with the
463
+ * specified character width (in points). This forces physical layout mode.
464
+ * @property {boolean} [generateHtmlMetaFile] Generate simple HTML file, including the
465
+ * meta information. This simply wraps the text in `<pre>` and `</pre>` and prepends the meta headers.
466
+ * @property {boolean} [generateTsvFile] Generate a TSV file containing the bounding box
467
+ * information for each block, line, and word in the file.
468
+ * @property {number} [lastPageToConvert] Specifies the last page to convert.
469
+ * @property {boolean} [listEncodingOptions] List the available encodings.
470
+ * @property {boolean} [maintainLayout] Maintain (as best as possible) the original physical
471
+ * layout of the text. The default is to undo physical layout (columns, hyphenation, etc.) and
472
+ * output the text in reading order.
473
+ * @property {boolean} [noDiagonalText] Discard diagonal text.
474
+ * @property {boolean} [noPageBreaks] Do not insert page breaks (form feed characters)
475
+ * between pages.
476
+ * @property {string} [outputEncoding] Sets the encoding to use for text output.
477
+ * This defaults to `UTF-8`.
478
+ * @property {string} [ownerPassword] Owner password (for encrypted files).
479
+ * @property {boolean} [printVersionInfo] Print copyright and version information.
480
+ * @property {boolean} [quiet] Do not print any messages or errors.
481
+ * @property {boolean} [rawLayout] Keep the text in content stream order. This is a
482
+ * hack which often undoes column formatting, etc. Use of raw mode is no longer recommended.
483
+ * @property {string} [userPassword] User password (for encrypted files).
484
+ */
485
+
486
+ /**
487
+ * @typedef PdfUniteOptions
488
+ * @property {boolean} [printVersionInfo] Print copyright and version information.
489
+ */
490
+
491
+ /**
492
+ * @author Frazer Smith
493
+ * @description Executes a Poppler binary with the provided arguments and file input.
494
+ * @ignore
495
+ * @param {string} binary - Path to the binary to execute.
496
+ * @param {string[]} args - Array of CLI arguments to pass to the binary.
497
+ * @param {Buffer|string} [file] - File input (Buffer or path).
498
+ * @param {object} [options] - Object containing execution options.
499
+ * @param {boolean} [options.binaryOutput] - Set binary encoding for stdout.
500
+ * @param {boolean} [options.preserveWhitespace] - If true, preserves leading and trailing whitespace in the output.
501
+ * @returns {Promise<string>} A promise that resolves with stdout, or rejects with an Error.
502
+ */
503
+ function executeBinary(binary, args, file, options = {}) {
504
+ return new Promise((resolve, reject) => {
505
+ const child = spawn(binary, args);
506
+
507
+ if (options.binaryOutput) {
508
+ child.stdout.setEncoding("binary");
509
+ }
510
+
511
+ if (Buffer.isBuffer(file)) {
512
+ child.stdin.write(file);
513
+ child.stdin.end();
514
+ }
515
+
516
+ let stdOut = "";
517
+ let stdErr = "";
518
+
519
+ child.stdout.on("data", (data) => {
520
+ stdOut += data;
521
+ });
522
+
523
+ child.stderr.on("data", (data) => {
524
+ stdErr += data;
525
+ });
526
+
527
+ child.on("close", (code) => {
528
+ /* istanbul ignore else */
529
+ if (stdOut !== "") {
530
+ resolve(options.preserveWhitespace ? stdOut : stdOut.trim());
531
+ } else if (code === 0) {
532
+ resolve(ERROR_MSGS[code]);
533
+ } else if (stdErr !== "") {
534
+ reject(new Error(stdErr.trim()));
535
+ } else {
536
+ reject(
537
+ new Error(
538
+ ERROR_MSGS[code ?? -1] ||
539
+ `${basename(binary)} ${args.join(" ")} exited with code ${code}`
540
+ )
541
+ );
542
+ }
543
+ });
544
+ });
545
+ }
546
+
39
547
  /**
40
548
  * @author Frazer Smith
41
549
  * @description Checks each option provided is valid, of the correct type, and can be used by specified
@@ -45,7 +553,7 @@ const PDF_INFO_PATH_REG = /(.+)pdfinfo/u;
45
553
  * @param {Record<string, any>} options - Object containing options to pass to binary.
46
554
  * @param {string} [version] - Version of binary.
47
555
  * @returns {string[]} Array of CLI arguments.
48
- * @throws If invalid arguments provided.
556
+ * @throws {Error} If invalid arguments provided.
49
557
  */
50
558
  function parseOptions(acceptedOptions, options, version) {
51
559
  /** @type {string[]} */
@@ -54,47 +562,47 @@ function parseOptions(acceptedOptions, options, version) {
54
562
  const invalidArgs = [];
55
563
 
56
564
  // Imperative loops are faster than functional loops, see https://romgrk.com/posts/optimizing-javascript
57
- const entries = Object.entries(options);
58
- const entriesLength = entries.length;
59
- for (let i = 0; i < entriesLength; i += 1) {
60
- // Destructuring adds overhead, so use index access
61
- const key = entries[i][0];
62
- if (Object.hasOwn(acceptedOptions, key)) {
63
- const option = entries[i][1];
64
- const acceptedOption = acceptedOptions[key];
65
-
66
- if (acceptedOption.type === typeof option) {
67
- // Skip boolean options if false
68
- if (acceptedOption.type !== "boolean" || option) {
69
- // Arg will be empty for some non-standard options
70
- if (acceptedOption.arg !== "") {
71
- args.push(acceptedOption.arg);
72
- }
565
+ const keys = Object.keys(options);
566
+ const keysLength = keys.length;
567
+ for (let i = 0; i < keysLength; i += 1) {
568
+ const key = keys[i];
569
+ if (!Object.hasOwn(acceptedOptions, key)) {
570
+ invalidArgs.push(`Invalid option provided '${key}'`);
571
+ continue;
572
+ }
73
573
 
74
- if (typeof option !== "boolean") {
75
- args.push(option);
76
- }
574
+ // @ts-ignore: keys are from options, TS cannot infer this
575
+ const option = options[key];
576
+ const acceptedOption = acceptedOptions[key];
577
+
578
+ if (acceptedOption.type === typeof option) {
579
+ // Skip boolean options if false
580
+ if (acceptedOption.type !== "boolean" || option) {
581
+ // Arg will be empty for some non-standard options
582
+ if (acceptedOption.arg !== "") {
583
+ args.push(acceptedOption.arg);
77
584
  }
78
- } else {
79
- invalidArgs.push(
80
- `Invalid value type provided for option '${key}', expected ${
81
- acceptedOption.type
82
- } but received ${typeof option}`
83
- );
84
- }
85
585
 
86
- if (
87
- acceptedOption.minVersion &&
88
- version &&
89
- // @ts-ignore: type checking is done above
90
- lt(version, acceptedOption.minVersion, { loose: true })
91
- ) {
92
- invalidArgs.push(
93
- `Invalid option provided for the current version of the binary used. '${key}' was introduced in v${acceptedOption.minVersion}, but received v${version}`
94
- );
586
+ if (typeof option !== "boolean") {
587
+ args.push(option);
588
+ }
95
589
  }
96
590
  } else {
97
- invalidArgs.push(`Invalid option provided '${key}'`);
591
+ invalidArgs.push(
592
+ `Invalid value type provided for option '${key}', expected ${
593
+ acceptedOption.type
594
+ } but received ${typeof option}`
595
+ );
596
+ }
597
+
598
+ if (
599
+ acceptedOption.minVersion &&
600
+ version &&
601
+ lt(version, acceptedOption.minVersion, { loose: true })
602
+ ) {
603
+ invalidArgs.push(
604
+ `Invalid option provided for the current version of the binary used. '${key}' was introduced in v${acceptedOption.minVersion}, but received v${version}`
605
+ );
98
606
  }
99
607
  }
100
608
  if (invalidArgs.length === 0) {
@@ -106,12 +614,29 @@ function parseOptions(acceptedOptions, options, version) {
106
614
  class Poppler {
107
615
  #popplerPath;
108
616
 
617
+ #pdfAttachBin;
618
+ #pdfDetachBin;
619
+ #pdfFontsBin;
620
+ #pdfImagesBin;
621
+ #pdfInfoBin;
622
+ #pdfSeparateBin;
623
+ #pdfToCairoBin;
624
+ #pdfToHtmlBin;
625
+ #pdfToPpmBin;
626
+ #pdfToPsBin;
627
+ #pdfToTextBin;
628
+ #pdfUniteBin;
629
+
630
+ #binVersions = new Map();
631
+ #acceptedOptions = new Map();
632
+
109
633
  /**
110
634
  * @param {string} [binPath] - Path of poppler-utils binaries.
111
635
  * If not provided, the constructor will attempt to find the Poppler `pdfinfo` binary
112
636
  * in the PATH environment variable and use that as the path for all binaries.
113
637
  * For `win32` the binaries are bundled with the package and will be used
114
638
  * if a local installation is not found.
639
+ * @throws {Error} If the Poppler binaries cannot be found.
115
640
  */
116
641
  constructor(binPath) {
117
642
  this.#popplerPath = "";
@@ -148,6 +673,19 @@ class Poppler {
148
673
  );
149
674
  }
150
675
  this.#popplerPath = normalize(this.#popplerPath);
676
+
677
+ this.#pdfAttachBin = pathResolve(this.#popplerPath, "pdfattach");
678
+ this.#pdfDetachBin = pathResolve(this.#popplerPath, "pdfdetach");
679
+ this.#pdfFontsBin = pathResolve(this.#popplerPath, "pdffonts");
680
+ this.#pdfImagesBin = pathResolve(this.#popplerPath, "pdfimages");
681
+ this.#pdfInfoBin = pathResolve(this.#popplerPath, "pdfinfo");
682
+ this.#pdfSeparateBin = pathResolve(this.#popplerPath, "pdfseparate");
683
+ this.#pdfToCairoBin = pathResolve(this.#popplerPath, "pdftocairo");
684
+ this.#pdfToHtmlBin = pathResolve(this.#popplerPath, "pdftohtml");
685
+ this.#pdfToPpmBin = pathResolve(this.#popplerPath, "pdftoppm");
686
+ this.#pdfToPsBin = pathResolve(this.#popplerPath, "pdftops");
687
+ this.#pdfToTextBin = pathResolve(this.#popplerPath, "pdftotext");
688
+ this.#pdfUniteBin = pathResolve(this.#popplerPath, "pdfunite");
151
689
  }
152
690
 
153
691
  /**
@@ -158,31 +696,460 @@ class Poppler {
158
696
  return this.#popplerPath;
159
697
  }
160
698
 
699
+ /**
700
+ * @author Frazer Smith
701
+ * @description Returns the version of the specified Poppler binary.
702
+ * @param {string} binary - The Poppler binary to get the version of.
703
+ * @returns {Promise<string>} A promise that resolves with the version of the binary, or rejects with an `Error` object.
704
+ */
705
+ async #getVersion(binary) {
706
+ if (!this.#binVersions.has(binary)) {
707
+ const { stderr } = await execFileAsync(binary, ["-v"]);
708
+ // @ts-ignore: parseOptions checks if falsy
709
+ const version = POPPLER_VERSION_REG.exec(stderr)[1];
710
+ this.#binVersions.set(binary, version);
711
+ }
712
+ return this.#binVersions.get(binary);
713
+ }
714
+
715
+ /**
716
+ * @author Frazer Smith
717
+ * @description Returns the accepted options for the specified Poppler binary function.
718
+ * @param {string} functionName - The name of the Poppler binary function.
719
+ * @returns {PopplerAcceptedOptions} An object containing the accepted options of the specified function.
720
+ */
721
+ #getAcceptedOptions(functionName) {
722
+ if (!this.#acceptedOptions.has(functionName)) {
723
+ switch (functionName) {
724
+ case "pdfAttach":
725
+ this.#acceptedOptions.set("pdfAttach", {
726
+ printVersionInfo: { arg: "-v", type: "boolean" },
727
+ replace: { arg: "-replace", type: "boolean" },
728
+ });
729
+ break;
730
+ case "pdfDetach":
731
+ this.#acceptedOptions.set("pdfDetach", {
732
+ listEmbedded: { arg: "-list", type: "boolean" },
733
+ outputEncoding: { arg: "-enc", type: "string" },
734
+ outputPath: { arg: "-o", type: "string" },
735
+ ownerPassword: { arg: "-opw", type: "string" },
736
+ printVersionInfo: { arg: "-v", type: "boolean" },
737
+ saveAllFiles: { arg: "-saveall", type: "boolean" },
738
+ saveFile: {
739
+ arg: "-savefile",
740
+ type: "string",
741
+ minVersion: "0.86.0",
742
+ },
743
+ saveSpecificFile: { arg: "-save", type: "number" },
744
+ userPassword: { arg: "-upw", type: "string" },
745
+ });
746
+ break;
747
+ case "pdfFonts":
748
+ this.#acceptedOptions.set("pdfFonts", {
749
+ firstPageToExamine: { arg: "-f", type: "number" },
750
+ lastPageToExamine: { arg: "-l", type: "number" },
751
+ listSubstitutes: { arg: "-subst", type: "boolean" },
752
+ ownerPassword: { arg: "-opw", type: "string" },
753
+ printVersionInfo: { arg: "-v", type: "boolean" },
754
+ userPassword: { arg: "-upw", type: "string" },
755
+ });
756
+ break;
757
+ case "pdfImages":
758
+ this.#acceptedOptions.set("pdfImages", {
759
+ allFiles: { arg: "-all", type: "boolean" },
760
+ ccittFile: { arg: "-ccitt", type: "boolean" },
761
+ firstPageToConvert: { arg: "-f", type: "number" },
762
+ lastPageToConvert: { arg: "-l", type: "number" },
763
+ jbig2File: { arg: "-jbig2", type: "boolean" },
764
+ jpeg2000File: { arg: "-jp2", type: "boolean" },
765
+ jpegFile: { arg: "-j", type: "boolean" },
766
+ list: { arg: "-list", type: "boolean" },
767
+ ownerPassword: { arg: "-opw", type: "string" },
768
+ pngFile: { arg: "-png", type: "boolean" },
769
+ printVersionInfo: { arg: "-v", type: "boolean" },
770
+ tiffFile: { arg: "-tiff", type: "boolean" },
771
+ userPassword: { arg: "-upw", type: "string" },
772
+ });
773
+ break;
774
+ case "pdfInfo":
775
+ this.#acceptedOptions.set("pdfInfo", {
776
+ firstPageToConvert: { arg: "-f", type: "number" },
777
+ lastPageToConvert: { arg: "-l", type: "number" },
778
+ listEncodingOptions: {
779
+ arg: "-listenc",
780
+ type: "boolean",
781
+ },
782
+ outputEncoding: { arg: "-enc", type: "string" },
783
+ ownerPassword: { arg: "-opw", type: "string" },
784
+ printAsJson: { arg: "", type: "boolean" },
785
+ printBoundingBoxes: { arg: "-box", type: "boolean" },
786
+ printDocStruct: { arg: "-struct", type: "boolean" },
787
+ printDocStructText: {
788
+ arg: "-struct-text",
789
+ type: "boolean",
790
+ },
791
+ printIsoDates: { arg: "-isodates", type: "boolean" },
792
+ printJS: { arg: "-js", type: "boolean" },
793
+ printMetadata: { arg: "-meta", type: "boolean" },
794
+ printNamedDests: { arg: "-dests", type: "boolean" },
795
+ printRawDates: { arg: "-rawdates", type: "boolean" },
796
+ printUrls: {
797
+ arg: "-url",
798
+ type: "boolean",
799
+ minVersion: "21.11.0",
800
+ },
801
+ printVersionInfo: { arg: "-v", type: "boolean" },
802
+ userPassword: { arg: "-upw", type: "string" },
803
+ });
804
+ break;
805
+ case "pdfSeparate":
806
+ this.#acceptedOptions.set("pdfSeparate", {
807
+ firstPageToExtract: { arg: "-f", type: "number" },
808
+ lastPageToExtract: { arg: "-l", type: "number" },
809
+ printVersionInfo: { arg: "-v", type: "boolean" },
810
+ });
811
+ break;
812
+ case "pdfToCairo":
813
+ this.#acceptedOptions.set("pdfToCairo", {
814
+ antialias: { arg: "-antialias", type: "string" },
815
+ cropBox: { arg: "-cropbox", type: "boolean" },
816
+ cropHeight: { arg: "-H", type: "number" },
817
+ cropSize: { arg: "-sz", type: "number" },
818
+ cropWidth: { arg: "-W", type: "number" },
819
+ cropXAxis: { arg: "-x", type: "number" },
820
+ cropYAxis: { arg: "-y", type: "number" },
821
+ duplex: { arg: "-duplex", type: "boolean" },
822
+ epsFile: { arg: "-eps", type: "boolean" },
823
+ evenPagesOnly: { arg: "-e", type: "boolean" },
824
+ fillPage: { arg: "-expand", type: "boolean" },
825
+ firstPageToConvert: { arg: "-f", type: "number" },
826
+ grayscaleFile: { arg: "-gray", type: "boolean" },
827
+ iccFile: { arg: "-icc", type: "string" },
828
+ jpegFile: { arg: "-jpeg", type: "boolean" },
829
+ jpegOptions: { arg: "-jpegopt", type: "string" },
830
+ lastPageToConvert: { arg: "-l", type: "number" },
831
+ monochromeFile: { arg: "-mono", type: "boolean" },
832
+ noCenter: { arg: "-nocenter", type: "boolean" },
833
+ noCrop: { arg: "-nocrop", type: "boolean" },
834
+ noShrink: { arg: "-noshrink", type: "boolean" },
835
+ oddPagesOnly: { arg: "-o", type: "boolean" },
836
+ originalPageSizes: {
837
+ arg: "-origpagesizes",
838
+ type: "boolean",
839
+ },
840
+ ownerPassword: { arg: "-opw", type: "string" },
841
+ paperHeight: { arg: "-paperh", type: "number" },
842
+ paperSize: { arg: "-paper", type: "string" },
843
+ paperWidth: { arg: "-paperw", type: "number" },
844
+ pdfFile: { arg: "-pdf", type: "boolean" },
845
+ pngFile: { arg: "-png", type: "boolean" },
846
+ printDocStruct: {
847
+ arg: "-struct",
848
+ type: "boolean",
849
+ minVersion: "23.11.0",
850
+ },
851
+ printVersionInfo: { arg: "-v", type: "boolean" },
852
+ psFile: { arg: "-ps", type: "boolean" },
853
+ psLevel2: { arg: "-level2", type: "boolean" },
854
+ psLevel3: { arg: "-level3", type: "boolean" },
855
+ quiet: { arg: "-q", type: "boolean" },
856
+ resolutionXAxis: { arg: "-rx", type: "number" },
857
+ resolutionXYAxis: { arg: "-r", type: "number" },
858
+ resolutionYAxis: { arg: "-ry", type: "number" },
859
+ scalePageTo: { arg: "-scale-to", type: "number" },
860
+ scalePageToXAxis: {
861
+ arg: "-scale-to-x",
862
+ type: "number",
863
+ },
864
+ scalePageToYAxis: {
865
+ arg: "-scale-to-y",
866
+ type: "number",
867
+ },
868
+ singleFile: { arg: "-singlefile", type: "boolean" },
869
+ svgFile: { arg: "-svg", type: "boolean" },
870
+ tiffCompression: {
871
+ arg: "-tiffcompression",
872
+ type: "string",
873
+ },
874
+ tiffFile: { arg: "-tiff", type: "boolean" },
875
+ transparentPageColor: {
876
+ arg: "-transp",
877
+ type: "boolean",
878
+ },
879
+ userPassword: { arg: "-upw", type: "string" },
880
+ });
881
+ break;
882
+ case "pdfToHtml":
883
+ this.#acceptedOptions.set("pdfToHtml", {
884
+ complexOutput: { arg: "-c", type: "boolean" },
885
+ dataUrls: {
886
+ arg: "-dataurls",
887
+ type: "boolean",
888
+ minVersion: "0.75.0",
889
+ },
890
+ exchangePdfLinks: { arg: "-p", type: "boolean" },
891
+ extractHidden: { arg: "-hidden", type: "boolean" },
892
+ firstPageToConvert: { arg: "-f", type: "number" },
893
+ fontFullName: { arg: "-fontfullname", type: "boolean" },
894
+ ignoreImages: { arg: "-i", type: "boolean" },
895
+ imageFormat: { arg: "-fmt", type: "string" },
896
+ lastPageToConvert: { arg: "-l", type: "number" },
897
+ noDrm: { arg: "-nodrm", type: "boolean" },
898
+ noFrames: { arg: "-noframes", type: "boolean" },
899
+ noMergeParagraph: { arg: "-nomerge", type: "boolean" },
900
+ noRoundedCoordinates: {
901
+ arg: "-noroundcoord",
902
+ type: "boolean",
903
+ },
904
+ outputEncoding: { arg: "-enc", type: "string" },
905
+ ownerPassword: { arg: "-opw", type: "string" },
906
+ printVersionInfo: { arg: "-v", type: "boolean" },
907
+ quiet: { arg: "-q", type: "boolean" },
908
+ singlePage: { arg: "-s", type: "boolean" },
909
+ stdout: { arg: "-stdout", type: "boolean" },
910
+ userPassword: { arg: "-upw", type: "string" },
911
+ wordBreakThreshold: { arg: "-wbt", type: "number" },
912
+ xmlOutput: { arg: "-xml", type: "boolean" },
913
+ zoom: { arg: "-zoom", type: "number" },
914
+ });
915
+ break;
916
+ case "pdfToPpm":
917
+ this.#acceptedOptions.set("pdfToPpm", {
918
+ antialiasFonts: { arg: "-aa", type: "string" },
919
+ antialiasVectors: { arg: "-aaVector", type: "string" },
920
+ cropBox: { arg: "-cropbox", type: "boolean" },
921
+ cropHeight: { arg: "-H", type: "number" },
922
+ cropSize: { arg: "-sz", type: "number" },
923
+ cropWidth: { arg: "-W", type: "number" },
924
+ cropXAxis: { arg: "-x", type: "number" },
925
+ cropYAxis: { arg: "-y", type: "number" },
926
+ defaultCmykProfile: {
927
+ arg: "-defaultcmykprofile",
928
+ type: "string",
929
+ minVersion: "21.01.0",
930
+ },
931
+ defaultGrayProfile: {
932
+ arg: "-defaultgrayprofile",
933
+ type: "string",
934
+ minVersion: "21.01.0",
935
+ },
936
+ defaultRgbProfile: {
937
+ arg: "-defaultrgbprofile",
938
+ type: "string",
939
+ minVersion: "21.01.0",
940
+ },
941
+ displayProfile: {
942
+ arg: "-displayprofile",
943
+ type: "string",
944
+ minVersion: "0.90.0",
945
+ },
946
+ evenPagesOnly: { arg: "-e", type: "boolean" },
947
+ firstPageToConvert: { arg: "-f", type: "number" },
948
+ forcePageNumber: {
949
+ arg: "-forcenum",
950
+ type: "boolean",
951
+ minVersion: "0.75.0",
952
+ },
953
+ freetype: { arg: "-freetype", type: "string" },
954
+ grayscaleFile: { arg: "-gray", type: "boolean" },
955
+ hideAnnotations: {
956
+ arg: "-hide-annotations",
957
+ type: "boolean",
958
+ minVersion: "0.84.0",
959
+ },
960
+ jpegFile: { arg: "-jpeg", type: "boolean" },
961
+ lastPageToConvert: { arg: "-l", type: "number" },
962
+ monochromeFile: { arg: "-mono", type: "boolean" },
963
+ oddPagesOnly: { arg: "-o", type: "boolean" },
964
+ ownerPassword: { arg: "-opw", type: "string" },
965
+ pngFile: { arg: "-png", type: "boolean" },
966
+ printProgress: {
967
+ arg: "-progress",
968
+ type: "boolean",
969
+ minVersion: "21.03.0",
970
+ },
971
+ printVersionInfo: { arg: "-v", type: "boolean" },
972
+ quiet: { arg: "-q", type: "boolean" },
973
+ resolutionXAxis: { arg: "-rx", type: "number" },
974
+ resolutionXYAxis: { arg: "-r", type: "number" },
975
+ resolutionYAxis: { arg: "-ry", type: "number" },
976
+ scalePageTo: { arg: "-scale-to", type: "number" },
977
+ scalePageToXAxis: {
978
+ arg: "-scale-to-x",
979
+ type: "number",
980
+ },
981
+ scalePageToYAxis: {
982
+ arg: "-scale-to-y",
983
+ type: "number",
984
+ },
985
+ separator: {
986
+ arg: "-sep",
987
+ type: "string",
988
+ minVersion: "0.75.0",
989
+ },
990
+ singleFile: { arg: "-singlefile", type: "boolean" },
991
+ thinLineMode: { arg: "-thinlinemode", type: "string" },
992
+ tiffCompression: {
993
+ arg: "-tiffcompression",
994
+ type: "string",
995
+ },
996
+ tiffFile: { arg: "-tiff", type: "boolean" },
997
+ userPassword: { arg: "-upw", type: "string" },
998
+ });
999
+ break;
1000
+ case "pdfToPs":
1001
+ this.#acceptedOptions.set("pdfToPs", {
1002
+ antialias: { arg: "-aaRaster", type: "string" },
1003
+ binary: { arg: "-binary", type: "boolean" },
1004
+ defaultCmykProfile: {
1005
+ arg: "-defaultcmykprofile",
1006
+ type: "string",
1007
+ minVersion: "21.01.0",
1008
+ },
1009
+ defaultGrayProfile: {
1010
+ arg: "-defaultgrayprofile",
1011
+ type: "string",
1012
+ minVersion: "21.01.0",
1013
+ },
1014
+ defaultRgbProfile: {
1015
+ arg: "-defaultrgbprofile",
1016
+ type: "string",
1017
+ minVersion: "21.01.0",
1018
+ },
1019
+ duplex: { arg: "-duplex", type: "boolean" },
1020
+ epsFile: { arg: "-eps", type: "boolean" },
1021
+ fillPage: { arg: "-expand", type: "boolean" },
1022
+ firstPageToConvert: { arg: "-f", type: "number" },
1023
+ form: { arg: "-form", type: "boolean" },
1024
+ lastPageToConvert: { arg: "-l", type: "number" },
1025
+ level1: { arg: "-level1", type: "boolean" },
1026
+ level1Sep: { arg: "-level1sep", type: "boolean" },
1027
+ level2: { arg: "-level2", type: "boolean" },
1028
+ level2Sep: { arg: "-level2sep", type: "boolean" },
1029
+ level3: { arg: "-level3", type: "boolean" },
1030
+ level3Sep: { arg: "-level3sep", type: "boolean" },
1031
+ noCenter: { arg: "-nocenter", type: "boolean" },
1032
+ noCrop: { arg: "-nocrop", type: "boolean" },
1033
+ noEmbedCIDFonts: {
1034
+ arg: "-noembcidps",
1035
+ type: "boolean",
1036
+ },
1037
+ noEmbedCIDTrueTypeFonts: {
1038
+ arg: "-noembcidtt",
1039
+ type: "boolean",
1040
+ },
1041
+ noEmbedTrueTypeFonts: {
1042
+ arg: "-noembtt",
1043
+ type: "boolean",
1044
+ },
1045
+ noEmbedType1Fonts: { arg: "-noembt1", type: "boolean" },
1046
+ noShrink: { arg: "-noshrink", type: "boolean" },
1047
+ opi: { arg: "-opi", type: "boolean" },
1048
+ optimizecolorspace: {
1049
+ arg: "-optimizecolorspace",
1050
+ type: "boolean",
1051
+ },
1052
+ originalPageSizes: {
1053
+ arg: "-origpagesizes",
1054
+ type: "boolean",
1055
+ },
1056
+ overprint: { arg: "-overprint", type: "boolean" },
1057
+ ownerPassword: { arg: "-opw", type: "string" },
1058
+ paperHeight: { arg: "-paperh", type: "number" },
1059
+ paperSize: { arg: "-paper", type: "string" },
1060
+ paperWidth: { arg: "-paperw", type: "number" },
1061
+ passfonts: { arg: "-passfonts", type: "boolean" },
1062
+ preload: { arg: "-preload", type: "boolean" },
1063
+ printVersionInfo: { arg: "-v", type: "boolean" },
1064
+ processColorFormat: {
1065
+ arg: "-processcolorformat",
1066
+ type: "string",
1067
+ },
1068
+ processColorProfile: {
1069
+ arg: "-processcolorprofile",
1070
+ type: "string",
1071
+ },
1072
+ quiet: { arg: "-q", type: "boolean" },
1073
+ rasterize: {
1074
+ arg: "-rasterize",
1075
+ type: "string",
1076
+ minVersion: "0.90.0",
1077
+ },
1078
+ resolutionXYAxis: { arg: "-r", type: "number" },
1079
+ userPassword: { arg: "-upw", type: "string" },
1080
+ });
1081
+ break;
1082
+ case "pdfToText":
1083
+ this.#acceptedOptions.set("pdfToText", {
1084
+ boundingBoxXhtml: { arg: "-bbox", type: "boolean" },
1085
+ boundingBoxXhtmlLayout: {
1086
+ arg: "-bbox-layout",
1087
+ type: "boolean",
1088
+ },
1089
+ cropBox: {
1090
+ arg: "-cropbox",
1091
+ type: "boolean",
1092
+ minVersion: "21.03.0",
1093
+ },
1094
+ cropHeight: { arg: "-H", type: "number" },
1095
+ cropWidth: { arg: "-W", type: "number" },
1096
+ cropXAxis: { arg: "-x", type: "number" },
1097
+ cropYAxis: { arg: "-y", type: "number" },
1098
+ eolConvention: { arg: "-eol", type: "string" },
1099
+ firstPageToConvert: { arg: "-f", type: "number" },
1100
+ fixedWidthLayout: { arg: "-fixed", type: "number" },
1101
+ generateHtmlMetaFile: {
1102
+ arg: "-htmlmeta",
1103
+ type: "boolean",
1104
+ },
1105
+ generateTsvFile: { arg: "-tsv", type: "boolean" },
1106
+ lastPageToConvert: { arg: "-l", type: "number" },
1107
+ listEncodingOptions: {
1108
+ arg: "-listenc",
1109
+ type: "boolean",
1110
+ },
1111
+ maintainLayout: { arg: "-layout", type: "boolean" },
1112
+ noDiagonalText: {
1113
+ arg: "-nodiag",
1114
+ type: "boolean",
1115
+ minVersion: "0.80.0",
1116
+ },
1117
+ noPageBreaks: { arg: "-nopgbrk", type: "boolean" },
1118
+ outputEncoding: { arg: "-enc", type: "string" },
1119
+ ownerPassword: { arg: "-opw", type: "string" },
1120
+ printVersionInfo: { arg: "-v", type: "boolean" },
1121
+ quiet: { arg: "-q", type: "boolean" },
1122
+ rawLayout: { arg: "-raw", type: "boolean" },
1123
+ resolution: { arg: "-r", type: "number" },
1124
+ userPassword: { arg: "-upw", type: "string" },
1125
+ });
1126
+ break;
1127
+ case "pdfUnite":
1128
+ this.#acceptedOptions.set("pdfUnite", {
1129
+ printVersionInfo: { arg: "-v", type: "boolean" },
1130
+ });
1131
+ break;
1132
+ }
1133
+ }
1134
+
1135
+ return this.#acceptedOptions.get(functionName);
1136
+ }
1137
+
161
1138
  /**
162
1139
  * @author Frazer Smith
163
1140
  * @description Embeds files (attachments) into a PDF file.
164
1141
  * @param {string} file - Filepath of the PDF file to read.
165
1142
  * @param {string} fileToAttach - Filepath of the attachment to be embedded into the PDF file.
166
1143
  * @param {string} outputFile - Filepath of the file to output the results to.
167
- * @param {object} [options] - Object containing options to pass to binary.
168
- * @param {boolean} [options.printVersionInfo] - Print copyright and version info.
169
- * @param {boolean} [options.replace] - Replace embedded file with same name (if it exists).
1144
+ * @param {PdfAttachOptions} [options] - Options to pass to pdfattach binary.
170
1145
  * @returns {Promise<string>} A promise that resolves with a stdout string, or rejects with an `Error` object.
171
1146
  */
172
1147
  async pdfAttach(file, fileToAttach, outputFile, options = {}) {
173
- /** @type {PopplerAcceptedOptions} */
174
- const acceptedOptions = {
175
- printVersionInfo: { arg: "-v", type: "boolean" },
176
- replace: { arg: "-replace", type: "boolean" },
177
- };
178
-
1148
+ const acceptedOptions = this.#getAcceptedOptions("pdfAttach");
179
1149
  const args = parseOptions(acceptedOptions, options);
180
1150
  args.push(file, fileToAttach, outputFile);
181
1151
 
182
- const { stdout } = await execFileAsync(
183
- pathResolve(this.#popplerPath, "pdfattach"),
184
- args
185
- );
1152
+ const { stdout } = await execFileAsync(this.#pdfAttachBin, args);
186
1153
  return stdout;
187
1154
  }
188
1155
 
@@ -190,53 +1157,15 @@ class Poppler {
190
1157
  * @author Frazer Smith
191
1158
  * @description Lists or extracts embedded files (attachments) from a PDF file.
192
1159
  * @param {string} file - Filepath of the PDF file to read.
193
- * @param {object} [options] - Object containing options to pass to binary.
194
- * @param {boolean} [options.listEmbedded] - List all of the embedded files in the PDF file.
195
- * File names are converted to the text encoding specified by `options.outputEncoding`.
196
- * @param {string} [options.outputEncoding] - Sets the encoding to use for text output.
197
- * This defaults to `UTF-8`.
198
- * @param {string} [options.ownerPassword] - Owner password (for encrypted files).
199
- * @param {string} [options.outputPath] - Set the file name used when saving an embedded file with
200
- * the save option enabled, or the directory if `options.saveall` is used.
201
- * @param {boolean} [options.printVersionInfo] - Print copyright and version info.
202
- * @param {boolean} [options.saveAllFiles] - Save all of the embedded files. This uses the file
203
- * names associated with the embedded files (as printed by `options.listEmbedded`).
204
- * By default, the files are saved in the current directory; this can be changed
205
- * with `options.outputPath`.
206
- * @param {string} [options.saveFile] - Save the specified embedded file.
207
- * By default, this uses the file name associated with the embedded file (as printed by
208
- * `options.listEmbedded`); the file name can be changed with `options.outputPath`.
209
- * @param {number} [options.saveSpecificFile] - Save the specified embedded file.
210
- * By default, this uses the file name associated with the embedded file (as printed by
211
- * `options.listEmbedded`); the file name can be changed with `options.outputPath`.
212
- * @param {string} [options.userPassword] - User password (for encrypted files).
1160
+ * @param {PdfDetachOptions} [options] - Options to pass to pdfdetach binary.
213
1161
  * @returns {Promise<string>} A promise that resolves with a stdout string, or rejects with an `Error` object.
214
1162
  */
215
1163
  async pdfDetach(file, options = {}) {
216
- /** @type {PopplerAcceptedOptions} */
217
- const acceptedOptions = {
218
- listEmbedded: { arg: "-list", type: "boolean" },
219
- outputEncoding: { arg: "-enc", type: "string" },
220
- outputPath: { arg: "-o", type: "string" },
221
- ownerPassword: { arg: "-opw", type: "string" },
222
- printVersionInfo: { arg: "-v", type: "boolean" },
223
- saveAllFiles: { arg: "-saveall", type: "boolean" },
224
- saveFile: {
225
- arg: "-savefile",
226
- type: "string",
227
- minVersion: "0.86.0",
228
- },
229
- saveSpecificFile: { arg: "-save", type: "number" },
230
- userPassword: { arg: "-upw", type: "string" },
231
- };
232
-
1164
+ const acceptedOptions = this.#getAcceptedOptions("pdfDetach");
233
1165
  const args = parseOptions(acceptedOptions, options);
234
1166
  args.push(file);
235
1167
 
236
- const { stdout } = await execFileAsync(
237
- pathResolve(this.#popplerPath, "pdfdetach"),
238
- args
239
- );
1168
+ const { stdout } = await execFileAsync(this.#pdfDetachBin, args);
240
1169
  return stdout;
241
1170
  }
242
1171
 
@@ -244,82 +1173,16 @@ class Poppler {
244
1173
  * @author Frazer Smith
245
1174
  * @description Lists the fonts used in a PDF file along with various information for each font.
246
1175
  * @param {(Buffer|string)} file - PDF file as Buffer, or filepath of the PDF file to read.
247
- * @param {object} [options] - Object containing options to pass to binary.
248
- * @param {number} [options.firstPageToExamine] - Specifies the first page to examine.
249
- * @param {number} [options.lastPageToExamine] - Specifies the last page to examine.
250
- * @param {boolean} [options.listSubstitutes] - List the substitute fonts that poppler
251
- * will use for non-embedded fonts.
252
- * @param {string} [options.ownerPassword] - Owner password (for encrypted files).
253
- * @param {boolean} [options.printVersionInfo] - Print copyright and version info.
254
- * @param {string} [options.userPassword] - User password (for encrypted files).
1176
+ * @param {PdfFontsOptions} [options] - Options to pass to pdffonts binary.
255
1177
  * @returns {Promise<string>} A promise that resolves with a stdout string, or rejects with an `Error` object.
256
1178
  */
257
1179
  async pdfFonts(file, options = {}) {
258
- /** @type {PopplerAcceptedOptions} */
259
- const acceptedOptions = {
260
- firstPageToExamine: { arg: "-f", type: "number" },
261
- lastPageToExamine: { arg: "-l", type: "number" },
262
- listSubstitutes: { arg: "-subst", type: "boolean" },
263
- ownerPassword: { arg: "-opw", type: "string" },
264
- printVersionInfo: { arg: "-v", type: "boolean" },
265
- userPassword: { arg: "-upw", type: "string" },
266
- };
267
-
268
- const { stderr } = await execFileAsync(
269
- pathResolve(this.#popplerPath, "pdffonts"),
270
- ["-v"]
271
- );
272
-
273
- // @ts-ignore: parseOptions checks if falsy
274
- const versionInfo = POPPLER_VERSION_REG.exec(stderr)[1];
275
-
1180
+ const acceptedOptions = this.#getAcceptedOptions("pdfFonts");
1181
+ const versionInfo = await this.#getVersion(this.#pdfFontsBin);
276
1182
  const args = parseOptions(acceptedOptions, options, versionInfo);
1183
+ args.push(Buffer.isBuffer(file) ? "-" : file);
277
1184
 
278
- return new Promise((resolve, reject) => {
279
- args.push(Buffer.isBuffer(file) ? "-" : file);
280
-
281
- const child = spawn(
282
- pathResolve(this.#popplerPath, "pdffonts"),
283
- args
284
- );
285
-
286
- if (Buffer.isBuffer(file)) {
287
- child.stdin.write(file);
288
- child.stdin.end();
289
- }
290
-
291
- let stdOut = "";
292
- let stdErr = "";
293
-
294
- child.stdout.on("data", (data) => {
295
- stdOut += data;
296
- });
297
-
298
- child.stderr.on("data", (data) => {
299
- stdErr += data;
300
- });
301
-
302
- child.on("close", (code) => {
303
- /* istanbul ignore else */
304
- if (stdOut !== "") {
305
- resolve(stdOut.trim());
306
- } else if (code === 0) {
307
- resolve(ERROR_MSGS[code]);
308
- } else if (stdErr !== "") {
309
- reject(new Error(stdErr.trim()));
310
- } else {
311
- reject(
312
- new Error(
313
- // @ts-ignore: Second operand used if code is not in ERROR_MSGS
314
- ERROR_MSGS[code] ||
315
- `pdffonts ${args.join(
316
- " "
317
- )} exited with code ${code}`
318
- )
319
- );
320
- }
321
- });
322
- });
1185
+ return executeBinary(this.#pdfFontsBin, args, file);
323
1186
  }
324
1187
 
325
1188
  /**
@@ -327,186 +1190,49 @@ class Poppler {
327
1190
  * @description Saves images from a PDF file as PPM, PBM, PNG, TIFF, JPEG, JPEG2000, or JBIG2 files.
328
1191
  * @param {(Buffer|string)} file - PDF file as Buffer, or filepath of the PDF file to read.
329
1192
  * @param {string} [outputPrefix] - Filename prefix of output files.
330
- * @param {object} [options] - Object containing options to pass to binary.
331
- * @param {boolean} [options.allFiles] - Write JPEG, JPEG2000, JBIG2, and CCITT images in their native format.
332
- * CMYK files are written as TIFF files. All other images are written as PNG files.
333
- * @param {boolean} [options.ccittFile] - Generate CCITT images as CCITT files.
334
- * @param {number} [options.firstPageToConvert] - Specifies the first page to convert.
335
- * @param {number} [options.lastPageToConvert] - Specifies the last page to convert.
336
- * @param {boolean} [options.jbig2File] - Generate JBIG2 images as JBIG2 files.
337
- * @param {boolean} [options.jpeg2000File] - Generate JPEG2000 images at JP2 files.
338
- * @param {boolean} [options.jpegFile] - Generate JPEG images as JPEG files.
339
- * @param {boolean} [options.list] - Instead of writing the images, list the
340
- * images along with various information for each image.
341
- * NOTE: Do not specify the outputPrefix with this option.
342
- * @param {string} [options.ownerPassword] - Owner password (for encrypted files).
343
- * @param {boolean} [options.pngFile] - Change the default output format to PNG.
344
- * @param {boolean} [options.printVersionInfo] - Print copyright and version info.
345
- * @param {boolean} [options.tiffFile] - Change the default output format to TIFF.
346
- * @param {string} [options.userPassword] - Specify the user password for the PDF file.
1193
+ * @param {PdfImagesOptions} [options] - Options to pass to pdfimages binary.
347
1194
  * @returns {Promise<string>} A promise that resolves with a stdout string, or rejects with an `Error` object.
348
1195
  */
349
1196
  async pdfImages(file, outputPrefix, options = {}) {
350
- /** @type {PopplerAcceptedOptions} */
351
- const acceptedOptions = {
352
- allFiles: { arg: "-all", type: "boolean" },
353
- ccittFile: { arg: "-ccitt", type: "boolean" },
354
- firstPageToConvert: { arg: "-f", type: "number" },
355
- lastPageToConvert: { arg: "-l", type: "number" },
356
- jbig2File: { arg: "-jbig2", type: "boolean" },
357
- jpeg2000File: { arg: "-jp2", type: "boolean" },
358
- jpegFile: { arg: "-j", type: "boolean" },
359
- list: { arg: "-list", type: "boolean" },
360
- ownerPassword: { arg: "-opw", type: "string" },
361
- pngFile: { arg: "-png", type: "boolean" },
362
- printVersionInfo: { arg: "-v", type: "boolean" },
363
- tiffFile: { arg: "-tiff", type: "boolean" },
364
- userPassword: { arg: "-upw", type: "string" },
365
- };
366
-
367
- const { stderr } = await execFileAsync(
368
- pathResolve(this.#popplerPath, "pdfimages"),
369
- ["-v"]
370
- );
371
-
372
- // @ts-ignore: parseOptions checks if falsy
373
- const versionInfo = POPPLER_VERSION_REG.exec(stderr)[1];
374
-
1197
+ const acceptedOptions = this.#getAcceptedOptions("pdfImages");
1198
+ const versionInfo = await this.#getVersion(this.#pdfImagesBin);
375
1199
  const args = parseOptions(acceptedOptions, options, versionInfo);
376
1200
 
377
- return new Promise((resolve, reject) => {
378
- args.push(Buffer.isBuffer(file) ? "-" : file);
379
-
380
- if (outputPrefix) {
381
- args.push(outputPrefix);
382
- }
383
-
384
- const child = spawn(
385
- pathResolve(this.#popplerPath, "pdfimages"),
386
- args
387
- );
388
-
389
- if (Buffer.isBuffer(file)) {
390
- child.stdin.write(file);
391
- child.stdin.end();
392
- }
393
-
394
- let stdOut = "";
395
- let stdErr = "";
396
-
397
- child.stdout.on("data", (data) => {
398
- stdOut += data;
399
- });
1201
+ args.push(Buffer.isBuffer(file) ? "-" : file);
400
1202
 
401
- child.stderr.on("data", (data) => {
402
- stdErr += data;
403
- });
1203
+ if (outputPrefix) {
1204
+ args.push(outputPrefix);
1205
+ }
404
1206
 
405
- child.on("close", (code) => {
406
- /* istanbul ignore else */
407
- if (stdOut !== "") {
408
- resolve(stdOut.trim());
409
- } else if (code === 0) {
410
- resolve(ERROR_MSGS[code]);
411
- } else if (stdErr !== "") {
412
- reject(new Error(stdErr.trim()));
413
- } else {
414
- reject(
415
- new Error(
416
- // @ts-ignore: Second operand used if code is not in ERROR_MSGS
417
- ERROR_MSGS[code] ||
418
- `pdfimages ${args.join(
419
- " "
420
- )} exited with code ${code}`
421
- )
422
- );
423
- }
424
- });
425
- });
1207
+ return executeBinary(this.#pdfImagesBin, args, file);
426
1208
  }
427
1209
 
428
1210
  /**
429
1211
  * @author Frazer Smith
430
1212
  * @description Prints the contents of the `Info` dictionary from a PDF file.
431
1213
  * @param {(Buffer|string)} file - PDF file as Buffer, or filepath of the PDF file to read.
432
- * @param {object} [options] - Object containing options to pass to binary.
433
- * @param {number} [options.firstPageToConvert] - First page to print.
434
- * @param {number} [options.lastPageToConvert] - Last page to print.
435
- * @param {boolean} [options.listEncodingOptions] - List the available encodings.
436
- * @param {string} [options.outputEncoding] - Sets the encoding to use for text output.
437
- * This defaults to `UTF-8`.
438
- * @param {string} [options.ownerPassword] - Owner password (for encrypted files).
439
- * @param {boolean} [options.printAsJson] - Print result as a JSON object.
440
- * @param {boolean} [options.printBoundingBoxes] - Prints the page box bounding boxes:
441
- * MediaBox, CropBox, BleedBox, TrimBox, and ArtBox.
442
- * @param {boolean} [options.printDocStruct] - Prints the logical document structure
443
- * of a Tagged-PDF file.
444
- * @param {boolean} [options.printDocStructText] - Print the textual content along with the
445
- * document structure of a Tagged-PDF file. Note that extracting text this way might be slow
446
- * for big PDF files.
447
- * @param {boolean} [options.printIsoDates] - Prints dates in ISO-8601 format (including the time zone).
448
- * @param {boolean} [options.printJS] - Prints all JavaScript in the PDF file.
449
- * @param {boolean} [options.printMetadata] - Prints document-level metadata. (This is the `Metadata`
450
- * stream from the PDF file's Catalog object).
451
- * @param {boolean} [options.printNamedDests] - Print a list of all named destinations. If a page range
452
- * is specified using the `options.firstPageToConvert` and `options.lastPageToConvert` options, only destinations
453
- * in the page range are listed.
454
- * @param {boolean} [options.printRawDates] - Prints the raw (undecoded) date strings, directly from the PDF file.
455
- * @param {boolean} [options.printUrls] - Print all URLs in the PDF; only URLs referenced by PDF objects
456
- * such as Link Annotations are listed, not URL strings in the text content.
457
- * @param {boolean} [options.printVersionInfo] - Print copyright and version info.
458
- * @param {string} [options.userPassword] - User password (for encrypted files).
1214
+ * @param {PdfInfoOptions} [options] - Options to pass to pdfinfo binary.
459
1215
  * @returns {Promise<object|string>} A promise that resolves with a stdout string or JSON object if
460
1216
  * `options.printAsJson` is `true`, or rejects with an `Error` object.
461
1217
  */
462
1218
  async pdfInfo(file, options = {}) {
463
- /** @type {PopplerAcceptedOptions} */
464
- const acceptedOptions = {
465
- firstPageToConvert: { arg: "-f", type: "number" },
466
- lastPageToConvert: { arg: "-l", type: "number" },
467
- listEncodingOptions: { arg: "-listenc", type: "boolean" },
468
- outputEncoding: { arg: "-enc", type: "string" },
469
- ownerPassword: { arg: "-opw", type: "string" },
470
- printAsJson: { arg: "", type: "boolean" },
471
- printBoundingBoxes: { arg: "-box", type: "boolean" },
472
- printDocStruct: { arg: "-struct", type: "boolean" },
473
- printDocStructText: { arg: "-struct-text", type: "boolean" },
474
- printIsoDates: { arg: "-isodates", type: "boolean" },
475
- printJS: { arg: "-js", type: "boolean" },
476
- printMetadata: { arg: "-meta", type: "boolean" },
477
- printNamedDests: { arg: "-dests", type: "boolean" },
478
- printRawDates: { arg: "-rawdates", type: "boolean" },
479
- printUrls: { arg: "-url", type: "boolean", minVersion: "21.11.0" },
480
- printVersionInfo: { arg: "-v", type: "boolean" },
481
- userPassword: { arg: "-upw", type: "string" },
482
- };
483
-
484
- const { stderr } = await execFileAsync(
485
- pathResolve(this.#popplerPath, "pdfinfo"),
486
- ["-v"]
487
- );
488
-
489
- // @ts-ignore: parseOptions checks if falsy
490
- const versionInfo = POPPLER_VERSION_REG.exec(stderr)[1];
491
-
1219
+ const acceptedOptions = this.#getAcceptedOptions("pdfInfo");
1220
+ const versionInfo = await this.#getVersion(this.#pdfInfoBin);
492
1221
  const args = parseOptions(acceptedOptions, options, versionInfo);
493
1222
 
494
1223
  // Fetch file size if stdin input is a Buffer, as Poppler omits it
495
1224
  /** @type {number} */
496
1225
  let fileSize;
497
1226
 
498
- return new Promise((resolve, reject) => {
499
- if (Buffer.isBuffer(file)) {
500
- args.push("-");
501
- fileSize = file.length;
502
- } else {
503
- args.push(file);
504
- }
1227
+ if (Buffer.isBuffer(file)) {
1228
+ args.push("-");
1229
+ fileSize = file.length;
1230
+ } else {
1231
+ args.push(file);
1232
+ }
505
1233
 
506
- const child = spawn(
507
- pathResolve(this.#popplerPath, "pdfinfo"),
508
- args
509
- );
1234
+ return new Promise((resolve, reject) => {
1235
+ const child = spawn(this.#pdfInfoBin, args);
510
1236
 
511
1237
  if (Buffer.isBuffer(file)) {
512
1238
  child.stdin.write(file);
@@ -557,8 +1283,7 @@ class Poppler {
557
1283
  } else {
558
1284
  reject(
559
1285
  new Error(
560
- // @ts-ignore: Second operand used if code is not in ERROR_MSGS
561
- ERROR_MSGS[code] ||
1286
+ ERROR_MSGS[code ?? -1] ||
562
1287
  `pdfinfo ${args.join(
563
1288
  " "
564
1289
  )} exited with code ${code}`
@@ -578,37 +1303,16 @@ class Poppler {
578
1303
  * @param {string} outputPattern - Should contain %d (or any variant respecting printf format),
579
1304
  * since %d is replaced by the page number.
580
1305
  * As an example, `sample-%d.pdf` will produce `sample-1.pdf` for a single page document.
581
- * @param {object} [options] - Object containing options to pass to binary.
582
- * @param {number} [options.firstPageToExtract] - Specifies the first page to extract.
583
- * This defaults to page 1.
584
- * @param {number} [options.lastPageToExtract] - Specifies the last page to extract.
585
- * This defaults to the last page of the PDF file.
586
- * @param {boolean} [options.printVersionInfo] - Print copyright and version info.
1306
+ * @param {PdfSeparateOptions} [options] - Options to pass to pdfseparate binary.
587
1307
  * @returns {Promise<string>} A promise that resolves with a stdout string, or rejects with an `Error` object.
588
1308
  */
589
1309
  async pdfSeparate(file, outputPattern, options = {}) {
590
- /** @type {PopplerAcceptedOptions} */
591
- const acceptedOptions = {
592
- firstPageToExtract: { arg: "-f", type: "number" },
593
- lastPageToExtract: { arg: "-l", type: "number" },
594
- printVersionInfo: { arg: "-v", type: "boolean" },
595
- };
596
-
597
- const { stderr } = await execFileAsync(
598
- pathResolve(this.#popplerPath, "pdfseparate"),
599
- ["-v"]
600
- );
601
-
602
- // @ts-ignore: parseOptions checks if falsy
603
- const versionInfo = POPPLER_VERSION_REG.exec(stderr)[1];
604
-
1310
+ const acceptedOptions = this.#getAcceptedOptions("pdfSeparate");
1311
+ const versionInfo = await this.#getVersion(this.#pdfSeparateBin);
605
1312
  const args = parseOptions(acceptedOptions, options, versionInfo);
606
1313
  args.push(file, outputPattern);
607
1314
 
608
- const { stdout } = await execFileAsync(
609
- pathResolve(this.#popplerPath, "pdfseparate"),
610
- args
611
- );
1315
+ const { stdout } = await execFileAsync(this.#pdfSeparateBin, args);
612
1316
  return stdout;
613
1317
  }
614
1318
 
@@ -623,227 +1327,20 @@ class Poppler {
623
1327
  * Encoding is set to `binary` if used with `options.singleFile` or `options.pdfFile`.
624
1328
  *
625
1329
  * If not set then the output filename will be derived from the PDF file name.
626
- * @param {object} [options] - Object containing options to pass to binary.
627
- * @param {('best'|'default'|'fast'|'good'|'gray'|'none'|'subpixel')} [options.antialias] - Set the cairo
628
- * antialias option used for text and drawing in image files (or rasterized regions in vector output).
629
- * @param {boolean} [options.cropBox] - Uses the crop box rather than media box when
630
- * generating the files (PNG/JPEG/TIFF only).
631
- * @param {number} [options.cropHeight] - Specifies the height of crop area in pixels
632
- * (image output) or points (vector output).
633
- * @param {number} [options.cropSize] - Specifies the size of crop square in pixels
634
- * (image output) or points (vector output).
635
- * @param {number} [options.cropWidth] - Specifies the width of crop area in pixels
636
- * (image output) or points (vector output).
637
- * @param {number} [options.cropXAxis] - Specifies the x-coordinate of the crop area top left
638
- * corner in pixels (image output) or points (vector output).
639
- * @param {number} [options.cropYAxis] - Specifies the y-coordinate of the crop area top left
640
- * corner in pixels (image output) or points (vector output).
641
- * @param {boolean} [options.duplex] - Adds the %%IncludeFeature: *Duplex DuplexNoTumble DSC
642
- * comment to the PostScript file (PS only). This tells the print manager to enable duplexing.
643
- * @param {boolean} [options.epsFile] - Generate an EPS file. An EPS file contains a single image,
644
- * so if you use this option with a multi-page PDF file, you must use `options.firstPageToConvert` and
645
- * `options.lastPageToConvert` to specify a single page.
646
- * The page size options (originalPageSizes, paperSize, paperWidth, paperHeight) can not be used
647
- * with this option.
648
- * @param {boolean} [options.evenPagesOnly] - Generates only the even numbered pages.
649
- * @param {boolean} [options.fillPage] - Expand PDF pages smaller than the paper to fill the
650
- * paper (PS,PDF,SVG only). By default, these pages are not scaled.
651
- * @param {number} [options.firstPageToConvert] - Specifies the first page to convert.
652
- * @param {boolean} [options.grayscaleFile] - Generate grayscale file (PNG, JPEG, and TIFF only).
653
- * @param {string} [options.iccFile] - Use the specified ICC file as the output profile
654
- * (PNG only). The profile will be embedded in the PNG file.
655
- * @param {boolean} [options.jpegFile] - Generate JPEG file(s).
656
- * @param {string} [options.jpegOptions] - When used with `options.jpegFile`, this option can
657
- * be used to control the JPEG compression parameters. It takes a string of the form
658
- * `"<opt>=<val>[,<opt>=<val>]"`. Currently available options are:
659
- * - `quality` Selects the JPEG quality value. The value must be an integer between 0 and 100.
660
- * - `progressive` Select progressive JPEG output. The possible values are "y", "n", indicating
661
- * progressive (yes) or non-progressive (no), respectively.
662
- * - `optimize` Sets whether to compute optimal Huffman coding tables for the JPEG output, which
663
- * will create smaller files but make an extra pass over the data. The value must be "y" or "n",
664
- * with "y" performing optimization, otherwise the default Huffman tables are used.
665
- *
666
- * Example: `"quality=95,optimize=y"`.
667
- * @param {number} [options.lastPageToConvert] - Specifies the last page to convert.
668
- * @param {boolean} [options.monochromeFile] - Generate monochrome file (PNG and TIFF only).
669
- * @param {boolean} [options.noCenter] - By default, PDF pages smaller than the paper
670
- * (after any scaling) are centered on the paper. This option causes them to be aligned to
671
- * the lower-left corner of the paper instead (PS,PDF,SVG only).
672
- * @param {boolean} [options.noCrop] - By default, printing output is cropped to the CropBox
673
- * specified in the PDF file. This option disables cropping (PS, PDF, SVG only).
674
- * @param {boolean} [options.noShrink] - Do not scale PDF pages which are larger than the paper
675
- * (PS,PDF,SVG only). By default, pages larger than the paper are shrunk to fit.
676
- * @param {boolean} [options.oddPagesOnly] - Generates only the odd numbered pages.
677
- * @param {boolean} [options.originalPageSizes] - Set the paper size of each page to match
678
- * the size specified in the PDF file.
679
- * @param {string} [options.ownerPassword] - Specify the owner password for the PDF file.
680
- * Providing this will bypass all security restrictions.
681
- * @param {number} [options.paperHeight] - Set the paper height, in points (PS, PDF, SVG only).
682
- * @param {('A3'|'A4'|'legal'|'letter'|'match')} [options.paperSize] - Set the paper size to one of `A3`, `A4`,
683
- * `legal`, or `letter` (PS,PDF,SVG only). This can also be set to `match`, which will set the paper size
684
- * of each page to match the size specified in the PDF file. If none of the paperSize,
685
- * paperWidth, or paperHeight options are specified the default is to match the paper size.
686
- * @param {number} [options.paperWidth] - Set the paper width, in points (PS,PDF,SVG only).
687
- * @param {boolean} [options.pdfFile] - Generate PDF file.
688
- * @param {boolean} [options.pngFile] - Generate PNG file(s).
689
- * @param {boolean} [options.printVersionInfo] - Print copyright and version information.
690
- * @param {boolean} [options.printDocStruct] - If the input file contains structural information
691
- * about the document's content, write this information to the output file (PDF only).
692
- * @param {boolean} [options.psFile] - Generate PS file.
693
- * @param {boolean} [options.psLevel2] - Generate Level 2 PostScript (PS only).
694
- * @param {boolean} [options.psLevel3] - Generate Level 3 PostScript (PS only). This enables all
695
- * Level 2 features plus shading patterns and masked images. This is the default setting.
696
- * @param {boolean} [options.quiet] - Do not print any messages or errors.
697
- * @param {number} [options.resolutionXAxis] - Specifies the X resolution, in pixels per inch of
698
- * image files (or rasterized regions in vector output). The default is 150 PPI.
699
- * @param {number} [options.resolutionXYAxis] - Specifies the X and Y resolution, in pixels per
700
- * inch of image files (or rasterized regions in vector output). The default is 150 PPI.
701
- * @param {number} [options.resolutionYAxis] - Specifies the Y resolution, in pixels per inch of
702
- * image files (or rasterized regions in vector output). The default is 150 PPI.
703
- * @param {number} [options.scalePageTo] - Scales the long side of each page (width for landscape
704
- * pages, height for portrait pages) to fit in scale-to pixels. The size of the short side will
705
- * be determined by the aspect ratio of the page (PNG/JPEG/TIFF only).
706
- * @param {number} [options.scalePageToXAxis] - Scales each page horizontally to fit in scale-to-x
707
- * pixels. If scale-to-y is set to -1, the vertical size will determined by the aspect ratio of
708
- * the page (PNG/JPEG/TIFF only).
709
- * @param {number} [options.scalePageToYAxis] - Scales each page vertically to fit in scale-to-y
710
- * pixels. If scale-to-x is set to -1, the horizontal size will determined by the aspect ratio of
711
- * the page (PNG/JPEG/TIFF only).
712
- * @param {boolean} [options.singleFile] - Writes only the first page and does not add digits.
713
- * Can only be used with `options.jpegFile`, `options.pngFile`, and `options.tiffFile`.
714
- * @param {boolean} [options.svgFile] - Generate SVG (Scalable Vector Graphics) file.
715
- * @param {('deflate'|'jpeg'|'lzw'|'none'|'packbits')} [options.tiffCompression] - Set TIFF compression.
716
- * @param {boolean} [options.tiffFile] - Generate TIFF file(s).
717
- * @param {boolean} [options.transparentPageColor] - Use a transparent page color
718
- * instead of white (PNG and TIFF only).
719
- * @param {string} [options.userPassword] - Specify the user password for the PDF file.
1330
+ * @param {PdfToCairoOptions} [options] - Options to pass to pdftocairo binary.
720
1331
  * @returns {Promise<string>} A promise that resolves with a stdout string, or rejects with an `Error` object.
721
1332
  */
722
1333
  async pdfToCairo(file, outputFile, options = {}) {
723
- /** @type {PopplerAcceptedOptions} */
724
- const acceptedOptions = {
725
- antialias: { arg: "-antialias", type: "string" },
726
- cropBox: { arg: "-cropbox", type: "boolean" },
727
- cropHeight: { arg: "-H", type: "number" },
728
- cropSize: { arg: "-sz", type: "number" },
729
- cropWidth: { arg: "-W", type: "number" },
730
- cropXAxis: { arg: "-x", type: "number" },
731
- cropYAxis: { arg: "-y", type: "number" },
732
- duplex: { arg: "-duplex", type: "boolean" },
733
- epsFile: { arg: "-eps", type: "boolean" },
734
- evenPagesOnly: { arg: "-e", type: "boolean" },
735
- fillPage: { arg: "-expand", type: "boolean" },
736
- firstPageToConvert: { arg: "-f", type: "number" },
737
- grayscaleFile: { arg: "-gray", type: "boolean" },
738
- iccFile: { arg: "-icc", type: "string" },
739
- jpegFile: { arg: "-jpeg", type: "boolean" },
740
- jpegOptions: { arg: "-jpegopt", type: "string" },
741
- lastPageToConvert: { arg: "-l", type: "number" },
742
- monochromeFile: { arg: "-mono", type: "boolean" },
743
- noCenter: { arg: "-nocenter", type: "boolean" },
744
- noCrop: { arg: "-nocrop", type: "boolean" },
745
- noShrink: { arg: "-noshrink", type: "boolean" },
746
- oddPagesOnly: { arg: "-o", type: "boolean" },
747
- originalPageSizes: { arg: "-origpagesizes", type: "boolean" },
748
- ownerPassword: { arg: "-opw", type: "string" },
749
- paperHeight: { arg: "-paperh", type: "number" },
750
- paperSize: { arg: "-paper", type: "string" },
751
- paperWidth: { arg: "-paperw", type: "number" },
752
- pdfFile: { arg: "-pdf", type: "boolean" },
753
- pngFile: { arg: "-png", type: "boolean" },
754
- printDocStruct: {
755
- arg: "-struct",
756
- type: "boolean",
757
- minVersion: "23.11.0",
758
- },
759
- printVersionInfo: { arg: "-v", type: "boolean" },
760
- psFile: { arg: "-ps", type: "boolean" },
761
- psLevel2: { arg: "-level2", type: "boolean" },
762
- psLevel3: { arg: "-level3", type: "boolean" },
763
- quiet: { arg: "-q", type: "boolean" },
764
- resolutionXAxis: { arg: "-rx", type: "number" },
765
- resolutionXYAxis: { arg: "-r", type: "number" },
766
- resolutionYAxis: { arg: "-ry", type: "number" },
767
- scalePageTo: { arg: "-scale-to", type: "number" },
768
- scalePageToXAxis: { arg: "-scale-to-x", type: "number" },
769
- scalePageToYAxis: { arg: "-scale-to-y", type: "number" },
770
- singleFile: { arg: "-singlefile", type: "boolean" },
771
- svgFile: { arg: "-svg", type: "boolean" },
772
- tiffCompression: { arg: "-tiffcompression", type: "string" },
773
- tiffFile: { arg: "-tiff", type: "boolean" },
774
- transparentPageColor: { arg: "-transp", type: "boolean" },
775
- userPassword: { arg: "-upw", type: "string" },
776
- };
777
-
778
- try {
779
- const { stderr } = await execFileAsync(
780
- pathResolve(this.#popplerPath, "pdftocairo"),
781
- ["-v"]
782
- );
783
-
784
- // @ts-ignore: parseOptions checks if falsy
785
- const versionInfo = POPPLER_VERSION_REG.exec(stderr)[1];
786
-
787
- const args = parseOptions(acceptedOptions, options, versionInfo);
788
-
789
- return new Promise((resolve, reject) => {
790
- args.push(
791
- Buffer.isBuffer(file) ? "-" : file,
792
- outputFile || "-"
793
- );
794
-
795
- const child = spawn(
796
- pathResolve(this.#popplerPath, "pdftocairo"),
797
- args
798
- );
799
-
800
- if (
801
- outputFile === undefined &&
802
- args.some((arg) => ["-singlefile", "-pdf"].includes(arg))
803
- ) {
804
- child.stdout.setEncoding("binary");
805
- }
806
-
807
- if (Buffer.isBuffer(file)) {
808
- child.stdin.write(file);
809
- child.stdin.end();
810
- }
811
-
812
- let stdOut = "";
813
- let stdErr = "";
814
-
815
- child.stdout.on("data", (data) => {
816
- stdOut += data;
817
- });
1334
+ const acceptedOptions = this.#getAcceptedOptions("pdfToCairo");
1335
+ const versionInfo = await this.#getVersion(this.#pdfToCairoBin);
1336
+ const args = parseOptions(acceptedOptions, options, versionInfo);
1337
+ args.push(Buffer.isBuffer(file) ? "-" : file, outputFile || "-");
818
1338
 
819
- child.stderr.on("data", (data) => {
820
- stdErr += data;
821
- });
1339
+ const binaryOutput =
1340
+ outputFile === undefined &&
1341
+ args.some((arg) => ["-singlefile", "-pdf"].includes(arg));
822
1342
 
823
- child.on("close", (code) => {
824
- /* istanbul ignore else */
825
- if (stdOut !== "") {
826
- resolve(stdOut.trim());
827
- } else if (code === 0) {
828
- resolve(ERROR_MSGS[code]);
829
- } else if (stdErr !== "") {
830
- reject(new Error(stdErr.trim()));
831
- } else {
832
- reject(
833
- new Error(
834
- // @ts-ignore: Second operand used if code is not in ERROR_MSGS
835
- ERROR_MSGS[code] ||
836
- `pdftocairo ${args.join(
837
- " "
838
- )} exited with code ${code}`
839
- )
840
- );
841
- }
842
- });
843
- });
844
- } catch (err) {
845
- return Promise.reject(err);
846
- }
1343
+ return executeBinary(this.#pdfToCairoBin, args, file, { binaryOutput });
847
1344
  }
848
1345
 
849
1346
  /**
@@ -855,90 +1352,21 @@ class Poppler {
855
1352
  * and create a new file, with `-html` appended to the end of the filename.
856
1353
  *
857
1354
  * Required if `file` is a Buffer.
858
- * @param {object} [options] - Object containing options to pass to binary.
859
- * @param {boolean} [options.complexOutput] - Generate complex output.
860
- * @param {boolean} [options.dataUrls] - Use data URLs instead of external images in HTML.
861
- * @param {boolean} [options.exchangePdfLinks] - Exchange .pdf links with .html.
862
- * @param {boolean} [options.extractHidden] - Force hidden text extraction.
863
- * @param {number} [options.firstPageToConvert] - First page to print.
864
- * @param {boolean} [options.fontFullName] - Outputs the font name without any substitutions.
865
- * @param {boolean} [options.ignoreImages] - Ignore images.
866
- * @param {('JPG'|'PNG')} [options.imageFormat] - Image file format for Splash output (JPG or PNG).
867
- * If complexOutput is selected, but imageFormat is not specified, PNG will be assumed.
868
- * @param {number} [options.lastPageToConvert] - Last page to print.
869
- * @param {boolean} [options.noDrm] - Override document DRM settings.
870
- * @param {boolean} [options.noFrames] - Generate no frames. Not supported in complex output mode.
871
- * @param {boolean} [options.noMergeParagraph] - Do not merge paragraphs.
872
- * @param {boolean} [options.noRoundedCoordinates] - Do not round coordinates
873
- * (with XML output only).
874
- * @param {string} [options.outputEncoding] - Sets the encoding to use for text output.
875
- * This defaults to `UTF-8`.
876
- * @param {string} [options.ownerPassword] - Owner password (for encrypted files).
877
- * @param {boolean} [options.printVersionInfo] - Print copyright and version info.
878
- * @param {boolean} [options.quiet] - Do not print any messages or errors.
879
- * @param {boolean} [options.singlePage] - Generate single HTML that includes all pages.
880
- * @param {boolean} [options.stdout] - Use standard output.
881
- * @param {string} [options.userPassword] - User password (for encrypted files).
882
- * @param {number} [options.wordBreakThreshold] - Adjust the word break threshold percent.
883
- * Default is 10. Word break occurs when distance between two adjacent characters is greater
884
- * than this percent of character height.
885
- * @param {boolean} [options.xmlOutput] - Output for XML post-processing.
886
- * @param {number} [options.zoom] - Zoom the PDF document (default 1.5).
1355
+ * @param {PdfToHtmlOptions} [options] - Options to pass to pdftohtml binary.
887
1356
  * @returns {Promise<string>} A promise that resolves with a stdout string, or rejects with an `Error` object.
888
1357
  */
889
1358
  async pdfToHtml(file, outputFile, options = {}) {
890
- /** @type {PopplerAcceptedOptions} */
891
- const acceptedOptions = {
892
- complexOutput: { arg: "-c", type: "boolean" },
893
- dataUrls: {
894
- arg: "-dataurls",
895
- type: "boolean",
896
- minVersion: "0.75.0",
897
- },
898
- exchangePdfLinks: { arg: "-p", type: "boolean" },
899
- extractHidden: { arg: "-hidden", type: "boolean" },
900
- firstPageToConvert: { arg: "-f", type: "number" },
901
- fontFullName: { arg: "-fontfullname", type: "boolean" },
902
- ignoreImages: { arg: "-i", type: "boolean" },
903
- imageFormat: { arg: "-fmt", type: "string" },
904
- lastPageToConvert: { arg: "-l", type: "number" },
905
- noDrm: { arg: "-nodrm", type: "boolean" },
906
- noFrames: { arg: "-noframes", type: "boolean" },
907
- noMergeParagraph: { arg: "-nomerge", type: "boolean" },
908
- noRoundedCoordinates: { arg: "-noroundcoord", type: "boolean" },
909
- outputEncoding: { arg: "-enc", type: "string" },
910
- ownerPassword: { arg: "-opw", type: "string" },
911
- printVersionInfo: { arg: "-v", type: "boolean" },
912
- quiet: { arg: "-q", type: "boolean" },
913
- singlePage: { arg: "-s", type: "boolean" },
914
- stdout: { arg: "-stdout", type: "boolean" },
915
- userPassword: { arg: "-upw", type: "string" },
916
- wordBreakThreshold: { arg: "-wbt", type: "number" },
917
- xmlOutput: { arg: "-xml", type: "boolean" },
918
- zoom: { arg: "-zoom", type: "number" },
919
- };
920
-
921
- const { stderr } = await execFileAsync(
922
- pathResolve(this.#popplerPath, "pdftohtml"),
923
- ["-v"]
924
- );
925
-
926
- // @ts-ignore: parseOptions checks if falsy
927
- const versionInfo = POPPLER_VERSION_REG.exec(stderr)[1];
928
-
1359
+ const acceptedOptions = this.#getAcceptedOptions("pdfToHtml");
1360
+ const versionInfo = await this.#getVersion(this.#pdfToHtmlBin);
929
1361
  const args = parseOptions(acceptedOptions, options, versionInfo);
1362
+ args.push(Buffer.isBuffer(file) ? "-" : file);
930
1363
 
931
- return new Promise((resolve, reject) => {
932
- args.push(Buffer.isBuffer(file) ? "-" : file);
933
-
934
- if (outputFile) {
935
- args.push(outputFile);
936
- }
1364
+ if (outputFile) {
1365
+ args.push(outputFile);
1366
+ }
937
1367
 
938
- const child = spawn(
939
- pathResolve(this.#popplerPath, "pdftohtml"),
940
- args
941
- );
1368
+ return new Promise((resolve, reject) => {
1369
+ const child = spawn(this.#pdfToHtmlBin, args);
942
1370
 
943
1371
  if (Buffer.isBuffer(file)) {
944
1372
  child.stdin.write(file);
@@ -974,193 +1402,16 @@ class Poppler {
974
1402
  * in Portable Bitmap (PBM) format.
975
1403
  * @param {(Buffer|string)} file - PDF file as Buffer, or filepath of the PDF file to read.
976
1404
  * @param {string} outputPath - Filepath to output the results to.
977
- * @param {object} [options] - Object containing options to pass to binary.
978
- * @param {('no'|'yes')} [options.antialiasFonts] - Enable or disable font anti-aliasing.
979
- * This defaults to `yes`.
980
- * @param {('no'|'yes')} [options.antialiasVectors] - Enable or disable vector anti-aliasing.
981
- * This defaults to `yes`.
982
- * @param {boolean} [options.cropBox] - Uses the crop box rather than media box when
983
- * generating the files (PNG/JPEG/TIFF only).
984
- * @param {number} [options.cropHeight] - Specifies the height of crop area in pixels
985
- * (image output) or points (vector output).
986
- * @param {number} [options.cropSize] - Specifies the size of crop square in pixels
987
- * (image output) or points (vector output).
988
- * @param {number} [options.cropWidth] - Specifies the width of crop area in pixels
989
- * (image output) or points (vector output).
990
- * @param {number} [options.cropXAxis] - Specifies the x-coordinate of the crop area top left
991
- * corner in pixels (image output) or points (vector output).
992
- * @param {number} [options.cropYAxis] - Specifies the y-coordinate of the crop area top left
993
- * corner in pixels (image output) or points (vector output).
994
- * @param {string} [options.defaultCmykProfile] - If Poppler is compiled with colour management support, this option
995
- * sets the DefaultCMYK color space to the ICC profile stored in the display profile file passed.
996
- * @param {string} [options.defaultGrayProfile] - If Poppler is compiled with colour management support, this option
997
- * sets the DefaultGray color space to the ICC profile stored in the display profile file passed.
998
- * @param {string} [options.defaultRgbProfile] - If Poppler is compiled with colour management support, this option
999
- * sets the DefaultRGB color space to the ICC profile stored in the display profile file passed.
1000
- * @param {string} [options.displayProfile] - If Poppler is compiled with colour management support, this option
1001
- * sets the display profile to the ICC profile stored in the display profile file passed.
1002
- * @param {boolean} [options.evenPagesOnly] - Generates only the even numbered pages.
1003
- * @param {number} [options.firstPageToConvert] - Specifies the first page to convert.
1004
- * @param {('no'|'yes')} [options.freetype] - Enable or disable FreeType (a TrueType / Type 1 font rasterizer).
1005
- * This defaults to `yes`.
1006
- * @param {boolean} [options.forcePageNumber] - Force page number even if there is only one page.
1007
- * @param {boolean} [options.grayscaleFile] - Generate grayscale PGM file (instead of a color PPM file).
1008
- * @param {boolean} [options.hideAnnotations] - Hide annotations.
1009
- * @param {boolean} [options.jpegFile] - Generate JPEG file instead a PPM file.
1010
- * @param {number} [options.lastPageToConvert] - Specifies the last page to convert.
1011
- * @param {boolean} [options.monochromeFile] - Generate monochrome PBM file (instead of a color PPM file).
1012
- * @param {boolean} [options.oddPagesOnly] - Generates only the odd numbered pages.
1013
- * @param {string} [options.ownerPassword] - Specify the owner password for the PDF file.
1014
- * Providing this will bypass all security restrictions.
1015
- * @param {boolean} [options.pngFile] - Generate PNG file instead a PPM file.
1016
- * @param {boolean} [options.printProgress] - Print progress info as each page is generated.
1017
- * Three space-separated fields are printed to STDERR: the number of the current page, the number
1018
- * of the last page that will be generated, and the path to the file written to.
1019
- * @param {boolean} [options.printVersionInfo] - Print copyright and version information.
1020
- * @param {boolean} [options.quiet] - Do not print any messages or errors.
1021
- * @param {number} [options.resolutionXAxis] - Specifies the X resolution, in pixels per inch of
1022
- * image files (or rasterized regions in vector output). The default is 150 PPI.
1023
- * @param {number} [options.resolutionXYAxis] - Specifies the X and Y resolution, in pixels per
1024
- * inch of image files (or rasterized regions in vector output). The default is 150 PPI.
1025
- * @param {number} [options.resolutionYAxis] - Specifies the Y resolution, in pixels per inch of
1026
- * image files (or rasterized regions in vector output). The default is 150 PPI.
1027
- * @param {number} [options.scalePageTo] - Scales the long side of each page (width for landscape
1028
- * pages, height for portrait pages) to fit in scale-to pixels. The size of the short side will
1029
- * be determined by the aspect ratio of the page.
1030
- * @param {number} [options.scalePageToXAxis] - Scales each page horizontally to fit in scale-to-x
1031
- * pixels. If scale-to-y is set to -1, the vertical size will determined by the aspect ratio of
1032
- * the page.
1033
- * @param {number} [options.scalePageToYAxis] - Scales each page vertically to fit in scale-to-y
1034
- * pixels. If scale-to-x is set to -1, the horizontal size will determined by the aspect ratio of
1035
- * the page.
1036
- * @param {string} [options.separator] - Specify single character separator between name and page number.
1037
- * @param {boolean} [options.singleFile] - Writes only the first page and does not add digits.
1038
- * @param {('none'|'shape'|'solid')} [options.thinLineMode] - Specifies the thin line mode. This defaults to `none`.
1039
- * @param {('deflate'|'jpeg'|'lzw'|'none'|'packbits')} [options.tiffCompression] - Set TIFF compression.
1040
- * @param {boolean} [options.tiffFile] - Generate TIFF file instead a PPM file.
1041
- * @param {string} [options.userPassword] - Specify the user password for the PDF file.
1405
+ * @param {PdfToPpmOptions} [options] - Options to pass to pdftoppm binary.
1042
1406
  * @returns {Promise<string>} A promise that resolves with a stdout string, or rejects with an `Error` object.
1043
1407
  */
1044
1408
  async pdfToPpm(file, outputPath, options = {}) {
1045
- /** @type {PopplerAcceptedOptions} */
1046
- const acceptedOptions = {
1047
- antialiasFonts: { arg: "-aa", type: "string" },
1048
- antialiasVectors: { arg: "-aaVector", type: "string" },
1049
- cropBox: { arg: "-cropbox", type: "boolean" },
1050
- cropHeight: { arg: "-H", type: "number" },
1051
- cropSize: { arg: "-sz", type: "number" },
1052
- cropWidth: { arg: "-W", type: "number" },
1053
- cropXAxis: { arg: "-x", type: "number" },
1054
- cropYAxis: { arg: "-y", type: "number" },
1055
- defaultCmykProfile: {
1056
- arg: "-defaultcmykprofile",
1057
- type: "string",
1058
- minVersion: "21.01.0",
1059
- },
1060
- defaultGrayProfile: {
1061
- arg: "-defaultgrayprofile",
1062
- type: "string",
1063
- minVersion: "21.01.0",
1064
- },
1065
- defaultRgbProfile: {
1066
- arg: "-defaultrgbprofile",
1067
- type: "string",
1068
- minVersion: "21.01.0",
1069
- },
1070
- displayProfile: {
1071
- arg: "-displayprofile",
1072
- type: "string",
1073
- minVersion: "0.90.0",
1074
- },
1075
- evenPagesOnly: { arg: "-e", type: "boolean" },
1076
- firstPageToConvert: { arg: "-f", type: "number" },
1077
- forcePageNumber: {
1078
- arg: "-forcenum",
1079
- type: "boolean",
1080
- minVersion: "0.75.0",
1081
- },
1082
- freetype: { arg: "-freetype", type: "string" },
1083
- grayscaleFile: { arg: "-gray", type: "boolean" },
1084
- hideAnnotations: {
1085
- arg: "-hide-annotations",
1086
- type: "boolean",
1087
- minVersion: "0.84.0",
1088
- },
1089
- jpegFile: { arg: "-jpeg", type: "boolean" },
1090
- lastPageToConvert: { arg: "-l", type: "number" },
1091
- monochromeFile: { arg: "-mono", type: "boolean" },
1092
- oddPagesOnly: { arg: "-o", type: "boolean" },
1093
- ownerPassword: { arg: "-opw", type: "string" },
1094
- pngFile: { arg: "-png", type: "boolean" },
1095
- printProgress: {
1096
- arg: "-progress",
1097
- type: "boolean",
1098
- minVersion: "21.03.0",
1099
- },
1100
- printVersionInfo: { arg: "-v", type: "boolean" },
1101
- quiet: { arg: "-q", type: "boolean" },
1102
- resolutionXAxis: { arg: "-rx", type: "number" },
1103
- resolutionXYAxis: { arg: "-r", type: "number" },
1104
- resolutionYAxis: { arg: "-ry", type: "number" },
1105
- scalePageTo: { arg: "-scale-to", type: "number" },
1106
- scalePageToXAxis: { arg: "-scale-to-x", type: "number" },
1107
- scalePageToYAxis: { arg: "-scale-to-y", type: "number" },
1108
- separator: { arg: "-sep", type: "string", minVersion: "0.75.0" },
1109
- singleFile: { arg: "-singlefile", type: "boolean" },
1110
- thinLineMode: { arg: "-thinlinemode", type: "string" },
1111
- tiffCompression: { arg: "-tiffcompression", type: "string" },
1112
- tiffFile: { arg: "-tiff", type: "boolean" },
1113
- userPassword: { arg: "-upw", type: "string" },
1114
- };
1115
-
1116
- const { stderr } = await execFileAsync(
1117
- pathResolve(this.#popplerPath, "pdftoppm"),
1118
- ["-v"]
1119
- );
1120
-
1121
- // @ts-ignore: parseOptions checks if falsy
1122
- const versionInfo = POPPLER_VERSION_REG.exec(stderr)[1];
1123
-
1409
+ const acceptedOptions = this.#getAcceptedOptions("pdfToPpm");
1410
+ const versionInfo = await this.#getVersion(this.#pdfToPpmBin);
1124
1411
  const args = parseOptions(acceptedOptions, options, versionInfo);
1412
+ args.push(Buffer.isBuffer(file) ? "-" : file, outputPath);
1125
1413
 
1126
- return new Promise((resolve, reject) => {
1127
- args.push(Buffer.isBuffer(file) ? "-" : file, outputPath);
1128
-
1129
- const child = spawn(
1130
- pathResolve(this.#popplerPath, "pdftoppm"),
1131
- args
1132
- );
1133
-
1134
- if (Buffer.isBuffer(file)) {
1135
- child.stdin.write(file);
1136
- child.stdin.end();
1137
- }
1138
-
1139
- let stdErr = "";
1140
-
1141
- child.stderr.on("data", (data) => {
1142
- stdErr += data;
1143
- });
1144
-
1145
- child.on("close", (code) => {
1146
- /* istanbul ignore else */
1147
- if (stdErr !== "") {
1148
- reject(new Error(stdErr.trim()));
1149
- } else if (code === 0) {
1150
- resolve(ERROR_MSGS[code]);
1151
- } else {
1152
- reject(
1153
- new Error(
1154
- // @ts-ignore: Second operand used if code is not in ERROR_MSGS
1155
- ERROR_MSGS[code] ||
1156
- `pdftoppm ${args.join(
1157
- " "
1158
- )} exited with code ${code}`
1159
- )
1160
- );
1161
- }
1162
- });
1163
- });
1414
+ return executeBinary(this.#pdfToPpmBin, args, file);
1164
1415
  }
1165
1416
 
1166
1417
  /**
@@ -1169,235 +1420,16 @@ class Poppler {
1169
1420
  * @param {(Buffer|string)} file - PDF file as Buffer, or filepath of the PDF file to read.
1170
1421
  * @param {string} [outputFile] - Filepath of the file to output the results to.
1171
1422
  * If `undefined` then will write output to stdout.
1172
- * @param {object} [options] - Object containing options to pass to binary.
1173
- * @param {('no'|'yes')} [options.antialias] - Enable anti-aliasing on rasterization, accepts `no` or `yes`.
1174
- * @param {boolean} [options.binary] - Write binary data in Level 1 PostScript. By default,
1175
- * pdftops writes hex-encoded data in Level 1 PostScript. Binary data is non-standard in Level 1
1176
- * PostScript but reduces the file size and can be useful when Level 1 PostScript is required
1177
- * only for its restricted use of PostScript operators.
1178
- * @param {string} [options.defaultCmykProfile] - If Poppler is compiled with colour management support, this option
1179
- * sets the DefaultCMYK color space to the ICC profile stored in the display profile file passed.
1180
- * @param {string} [options.defaultGrayProfile] - If Poppler is compiled with colour management support, this option
1181
- * sets the DefaultGray color space to the ICC profile stored in the display profile file passed.
1182
- * @param {string} [options.defaultRgbProfile] - If Poppler is compiled with colour management support, this option
1183
- * sets the DefaultRGB color space to the ICC profile stored in the display profile file passed.
1184
- * @param {boolean} [options.duplex] - Set the Duplex pagedevice entry in the PostScript file.
1185
- * This tells duplex-capable printers to enable duplexing.
1186
- * @param {boolean} [options.epsFile] - Generate an EPS file. An EPS file contains a single image,
1187
- * so if you use this option with a multi-page PDF file, you must use `options.firstPageToConvert` and
1188
- * `options.lastPageToConvert` to specify a single page.
1189
- * The page size options (originalPageSizes, paperSize, paperWidth, paperHeight) can not be used
1190
- * with this option.
1191
- * @param {boolean} [options.fillPage] - Expand PDF pages smaller than the paper to fill the
1192
- * paper. By default, these pages are not scaled.
1193
- * @param {number} [options.firstPageToConvert] - Specifies the first page to convert.
1194
- * @param {number} [options.form] - Generate PostScript form which can be imported by software
1195
- * that understands forms.
1196
- * A form contains a single page, so if you use this option with a multi-page PDF file,
1197
- * you must use `options.firstPageToConvert` and `options.lastPageToConvert` to specify a single page.
1198
- * The `options.level1` option cannot be used with `options.form`.
1199
- * No more than one of the mode options (`options.epsFile`, `options.form`) may be given.
1200
- * @param {number} [options.lastPageToConvert] - Specifies the last page to convert.
1201
- * @param {boolean} [options.level1] - Generate Level 1 PostScript. The resulting PostScript
1202
- * files will be significantly larger (if they contain images), but will print on Level 1 printers.
1203
- * This also converts all images to black and white.
1204
- * @param {boolean} [options.level1Sep] - Generate Level 1 separable PostScript.
1205
- * All colors are converted to CMYK. Images are written with separate stream data for the four components.
1206
- * @param {boolean} [options.level2] - Generate Level 2 PostScript.
1207
- * Level 2 supports color images and image compression. This is the default setting.
1208
- * @param {boolean} [options.level2Sep] - Generate Level 2 separable PostScript. All colors are
1209
- * converted to CMYK. The PostScript separation convention operators are used to handle custom (spot) colors.
1210
- * @param {boolean} [options.level3] - Generate Level 3 PostScript.
1211
- * This enables all Level 2 featuresplus CID font embedding.
1212
- * @param {boolean} [options.level3Sep] - Generate Level 3 separable PostScript.
1213
- * The separation handling is the same as for `options.level2Sep`.
1214
- * @param {boolean} [options.noCenter] - By default, PDF pages smaller than the paper
1215
- * (after any scaling) are centered on the paper. This option causes them to be aligned to
1216
- * the lower-left corner of the paper instead.
1217
- * @param {boolean} [options.noCrop] - By default, printing output is cropped to the CropBox
1218
- * specified in the PDF file. This option disables cropping.
1219
- * @param {boolean} [options.noEmbedCIDFonts] - By default, any CID PostScript fonts which are
1220
- * embedded in the PDF file are copied into the PostScript file. This option disables that embedding.
1221
- * No attempt is made to substitute for non-embedded CID PostScript fonts.
1222
- * @param {boolean} [options.noEmbedCIDTrueTypeFonts] - By default, any CID TrueType fonts which are
1223
- * embedded in the PDF file are copied into the PostScript file. This option disables that embedding.
1224
- * No attempt is made to substitute for non-embedded CID TrueType fonts.
1225
- * @param {boolean} [options.noEmbedTrueTypeFonts] - By default, any TrueType fonts which are embedded
1226
- * in the PDF file are copied into the PostScript file. This option causes pdfToPs to substitute base fonts instead.
1227
- * Embedded fonts make PostScript files larger, but may be necessary for readable output.
1228
- * Also, some PostScript interpreters do not have TrueType rasterizers.
1229
- * @param {boolean} [options.noEmbedType1Fonts] - By default, any Type 1 fonts which are embedded in the PDF file
1230
- * are copied into the PostScript file. This option causes pdfToPs to substitute base fonts instead.
1231
- * Embedded fonts make PostScript files larger, but may be necessary for readable output.
1232
- * @param {boolean} [options.noShrink] - Do not scale PDF pages which are larger than the paper.
1233
- * By default, pages larger than the paper are shrunk to fit.
1234
- * @param {boolean} [options.opi] - Generate OPI comments for all images and forms which have OPI information.
1235
- * @param {boolean} [options.optimizecolorspace] - By default, bitmap images in the PDF pass through to the
1236
- * output PostScript in their original color space, which produces predictable results.
1237
- * This option converts RGB and CMYK images into Gray images if every pixel of the image has equal components.
1238
- * This can fix problems when doing color separations of PDFs that contain embedded black and
1239
- * white images encoded as RGB.
1240
- * @param {boolean} [options.originalPageSizes] - Set the paper size of each page to match
1241
- * the size specified in the PDF file.
1242
- * @param {boolean} [options.overprint] - Enable overprinting.
1243
- * @param {string} [options.ownerPassword] - Owner password (for encrypted files).
1244
- * @param {number} [options.paperHeight] - Set the paper height, in points.
1245
- * @param {('A3'|'A4'|'legal'|'letter'|'match')} [options.paperSize] - Set the paper size to one of `A3`, `A4`,
1246
- * `legal`, or `letter`. This can also be set to `match`, which will set the paper size
1247
- * of each page to match the size specified in the PDF file. If none of the paperSize,
1248
- * paperWidth, or paperHeight options are specified the default is to match the paper size.
1249
- * @param {number} [options.paperWidth] - Set the paper width, in points.
1250
- * @param {boolean} [options.passfonts] - By default, references to non-embedded 8-bit fonts
1251
- * in the PDF file are substituted with the closest `Helvetica`, `Times-Roman`, or `Courier` font.
1252
- * This option passes references to non-embedded fonts through to the PostScript file.
1253
- * @param {boolean} [options.preload] - Preload images and forms.
1254
- * @param {boolean} [options.printVersionInfo] - Print copyright and version information.
1255
- * @param {('CMYK8'|'MONO8'|'RGB8')} [options.processColorFormat] - Sets the process color format as it is used
1256
- * during rasterization and transparency reduction.
1257
- *
1258
- * The default depends on the other settings: For `options.level1` the default is MONO8; for `options.level1Sep`,
1259
- * `options.level2Sep`, `options.level3Sep`, or `options.overprint` the default is CMYK8; in all other
1260
- * cases RGB8 is the default.
1261
- * If `option.processColorProfile` is set then `options.processColorFormat` is inferred from the specified ICC profile.
1262
- * @param {string} [options.processColorProfile] - Sets the ICC profile that is assumed during
1263
- * rasterization and transparency reduction.
1264
- * @param {boolean} [options.quiet] - Do not print any messages or errors.
1265
- * @param {('always'|'never'|'whenneeded')} [options.rasterize] - By default, pdfToPs rasterizes pages as needed,
1266
- * for example, if they contain transparencies. To force rasterization, set `rasterize` to `always`.
1267
- * Use this to eliminate fonts.
1268
- * To prevent rasterization, set `rasterize` to `never`.
1269
- * This may produce files that display incorrectly.
1270
- * @param {number} [options.resolutionXYAxis] - Specifies the X and Y resolution, in pixels per
1271
- * inch of image files (or rasterized regions in vector output). The default is 300 PPI.
1272
- * @param {string} [options.userPassword] - User password (for encrypted files).
1423
+ * @param {PdfToPsOptions} [options] - Options to pass to pdftops binary.
1273
1424
  * @returns {Promise<string>} A promise that resolves with a stdout string, or rejects with an `Error` object.
1274
1425
  */
1275
1426
  async pdfToPs(file, outputFile, options = {}) {
1276
- /** @type {PopplerAcceptedOptions} */
1277
- const acceptedOptions = {
1278
- antialias: { arg: "-aaRaster", type: "string" },
1279
- binary: { arg: "-binary", type: "boolean" },
1280
- defaultCmykProfile: {
1281
- arg: "-defaultcmykprofile",
1282
- type: "string",
1283
- minVersion: "21.01.0",
1284
- },
1285
- defaultGrayProfile: {
1286
- arg: "-defaultgrayprofile",
1287
- type: "string",
1288
- minVersion: "21.01.0",
1289
- },
1290
- defaultRgbProfile: {
1291
- arg: "-defaultrgbprofile",
1292
- type: "string",
1293
- minVersion: "21.01.0",
1294
- },
1295
- duplex: { arg: "-duplex", type: "boolean" },
1296
- epsFile: { arg: "-eps", type: "boolean" },
1297
- fillPage: { arg: "-expand", type: "boolean" },
1298
- firstPageToConvert: { arg: "-f", type: "number" },
1299
- form: { arg: "-form", type: "boolean" },
1300
- lastPageToConvert: { arg: "-l", type: "number" },
1301
- level1: { arg: "-level1", type: "boolean" },
1302
- level1Sep: { arg: "-level1sep", type: "boolean" },
1303
- level2: { arg: "-level2", type: "boolean" },
1304
- level2Sep: { arg: "-level2sep", type: "boolean" },
1305
- level3: { arg: "-level3", type: "boolean" },
1306
- level3Sep: { arg: "-level3sep", type: "boolean" },
1307
- noCenter: { arg: "-nocenter", type: "boolean" },
1308
- noCrop: { arg: "-nocrop", type: "boolean" },
1309
- noEmbedCIDFonts: { arg: "-noembcidps", type: "boolean" },
1310
- noEmbedCIDTrueTypeFonts: {
1311
- arg: "-noembcidtt",
1312
- type: "boolean",
1313
- },
1314
- noEmbedTrueTypeFonts: { arg: "-noembtt", type: "boolean" },
1315
- noEmbedType1Fonts: { arg: "-noembt1", type: "boolean" },
1316
- noShrink: { arg: "-noshrink", type: "boolean" },
1317
- opi: { arg: "-opi", type: "boolean" },
1318
- optimizecolorspace: {
1319
- arg: "-optimizecolorspace",
1320
- type: "boolean",
1321
- },
1322
- originalPageSizes: { arg: "-origpagesizes", type: "boolean" },
1323
- overprint: { arg: "-overprint", type: "boolean" },
1324
- ownerPassword: { arg: "-opw", type: "string" },
1325
- paperHeight: { arg: "-paperh", type: "number" },
1326
- paperSize: { arg: "-paper", type: "string" },
1327
- paperWidth: { arg: "-paperw", type: "number" },
1328
- passfonts: { arg: "-passfonts", type: "boolean" },
1329
- preload: { arg: "-preload", type: "boolean" },
1330
- printVersionInfo: { arg: "-v", type: "boolean" },
1331
- processColorFormat: { arg: "-processcolorformat", type: "string" },
1332
- processColorProfile: {
1333
- arg: "-processcolorprofile",
1334
- type: "string",
1335
- },
1336
- quiet: { arg: "-q", type: "boolean" },
1337
- rasterize: {
1338
- arg: "-rasterize",
1339
- type: "string",
1340
- minVersion: "0.90.0",
1341
- },
1342
- resolutionXYAxis: { arg: "-r", type: "number" },
1343
- userPassword: { arg: "-upw", type: "string" },
1344
- };
1345
-
1346
- const { stderr } = await execFileAsync(
1347
- pathResolve(this.#popplerPath, "pdftops"),
1348
- ["-v"]
1349
- );
1350
-
1351
- // @ts-ignore: parseOptions checks if falsy
1352
- const versionInfo = POPPLER_VERSION_REG.exec(stderr)[1];
1353
-
1427
+ const acceptedOptions = this.#getAcceptedOptions("pdfToPs");
1428
+ const versionInfo = await this.#getVersion(this.#pdfToPsBin);
1354
1429
  const args = parseOptions(acceptedOptions, options, versionInfo);
1430
+ args.push(Buffer.isBuffer(file) ? "-" : file, outputFile || "-");
1355
1431
 
1356
- return new Promise((resolve, reject) => {
1357
- args.push(Buffer.isBuffer(file) ? "-" : file, outputFile || "-");
1358
-
1359
- const child = spawn(
1360
- pathResolve(this.#popplerPath, "pdftops"),
1361
- args
1362
- );
1363
-
1364
- if (Buffer.isBuffer(file)) {
1365
- child.stdin.write(file);
1366
- child.stdin.end();
1367
- }
1368
-
1369
- let stdOut = "";
1370
- let stdErr = "";
1371
-
1372
- child.stdout.on("data", (data) => {
1373
- stdOut += data;
1374
- });
1375
-
1376
- child.stderr.on("data", (data) => {
1377
- stdErr += data;
1378
- });
1379
-
1380
- child.on("close", (code) => {
1381
- /* istanbul ignore else */
1382
- if (stdOut !== "") {
1383
- resolve(stdOut.trim());
1384
- } else if (code === 0) {
1385
- resolve(ERROR_MSGS[code]);
1386
- } else if (stdErr !== "") {
1387
- reject(new Error(stdErr.trim()));
1388
- } else {
1389
- reject(
1390
- new Error(
1391
- // @ts-ignore: Second operand used if code is not in ERROR_MSGS
1392
- ERROR_MSGS[code] ||
1393
- `pdftops ${args.join(
1394
- " "
1395
- )} exited with code ${code}`
1396
- )
1397
- );
1398
- }
1399
- });
1400
- });
1432
+ return executeBinary(this.#pdfToPsBin, args, file);
1401
1433
  }
1402
1434
 
1403
1435
  /**
@@ -1406,142 +1438,17 @@ class Poppler {
1406
1438
  * @param {(Buffer|string)} file - PDF file as Buffer, or filepath of the PDF file to read.
1407
1439
  * @param {string} [outputFile] - Filepath of the file to output the results to.
1408
1440
  * If `undefined` then will write output to stdout.
1409
- * @param {object} [options] - Object containing options to pass to binary.
1410
- * @param {boolean} [options.boundingBoxXhtml] - Generate an XHTML file containing bounding
1411
- * box information for each word in the file.
1412
- * @param {boolean} [options.boundingBoxXhtmlLayout] - Generate an XHTML file containing
1413
- * bounding box information for each block, line, and word in the file.
1414
- * @param {boolean} [options.cropBox] - Use the crop box rather than the media box with
1415
- * `options.boundingBoxXhtml` and `options.boundingBoxXhtmlLayout`.
1416
- * @param {number} [options.cropHeight] - Specifies the height of crop area in pixels
1417
- * (image output) or points (vector output).
1418
- * @param {number} [options.cropWidth] - Specifies the width of crop area in pixels
1419
- * (image output) or points (vector output).
1420
- * @param {number} [options.cropXAxis] - Specifies the x-coordinate of the crop area top left
1421
- * corner in pixels (image output) or points (vector output).
1422
- * @param {number} [options.cropYAxis] - Specifies the y-coordinate of the crop area top left
1423
- * corner in pixels (image output) or points (vector output).
1424
- * @param {('dos'|'mac'|'unix')} [options.eolConvention] - Sets the end-of-line convention to use for
1425
- * text output: dos; mac; unix.
1426
- * @param {number} [options.firstPageToConvert] - Specifies the first page to convert.
1427
- * @param {number} [options.fixedWidthLayout] - Assume fixed-pitch (or tabular) text, with the
1428
- * specified character width (in points). This forces physical layout mode.
1429
- * @param {boolean} [options.generateHtmlMetaFile] - Generate simple HTML file, including the
1430
- * meta information. This simply wraps the text in `<pre>` and `</pre>` and prepends the meta headers.
1431
- * @param {boolean} [options.generateTsvFile] - Generate a TSV file containing the bounding box
1432
- * information for each block, line, and word in the file.
1433
- * @param {number} [options.lastPageToConvert] - Specifies the last page to convert.
1434
- * @param {boolean} [options.listEncodingOptions] - List the available encodings.
1435
- * @param {boolean} [options.maintainLayout] - Maintain (as best as possible) the original physical
1436
- * layout of the text. The default is to undo physical layout (columns, hyphenation, etc.) and
1437
- * output the text in reading order.
1438
- * @param {boolean} [options.noDiagonalText] - Discard diagonal text.
1439
- * @param {boolean} [options.noPageBreaks] - Do not insert page breaks (form feed characters)
1440
- * between pages.
1441
- * @param {string} [options.outputEncoding] - Sets the encoding to use for text output.
1442
- * This defaults to `UTF-8`.
1443
- * @param {string} [options.ownerPassword] - Owner password (for encrypted files).
1444
- * @param {boolean} [options.printVersionInfo] - Print copyright and version information.
1445
- * @param {boolean} [options.quiet] - Do not print any messages or errors.
1446
- * @param {boolean} [options.rawLayout] - Keep the text in content stream order. This is a
1447
- * hack which often undoes column formatting, etc. Use of raw mode is no longer recommended.
1448
- * @param {string} [options.userPassword] - User password (for encrypted files).
1441
+ * @param {PdfToTextOptions} [options] - Options to pass to pdftotext binary.
1449
1442
  * @returns {Promise<string>} A promise that resolves with a stdout string, or rejects with an `Error` object.
1450
1443
  */
1451
1444
  async pdfToText(file, outputFile, options = {}) {
1452
- /** @type {PopplerAcceptedOptions} */
1453
- const acceptedOptions = {
1454
- boundingBoxXhtml: { arg: "-bbox", type: "boolean" },
1455
- boundingBoxXhtmlLayout: {
1456
- arg: "-bbox-layout",
1457
- type: "boolean",
1458
- },
1459
- cropBox: {
1460
- arg: "-cropbox",
1461
- type: "boolean",
1462
- minVersion: "21.03.0",
1463
- },
1464
- cropHeight: { arg: "-H", type: "number" },
1465
- cropWidth: { arg: "-W", type: "number" },
1466
- cropXAxis: { arg: "-x", type: "number" },
1467
- cropYAxis: { arg: "-y", type: "number" },
1468
- eolConvention: { arg: "-eol", type: "string" },
1469
- firstPageToConvert: { arg: "-f", type: "number" },
1470
- fixedWidthLayout: { arg: "-fixed", type: "number" },
1471
- generateHtmlMetaFile: { arg: "-htmlmeta", type: "boolean" },
1472
- generateTsvFile: { arg: "-tsv", type: "boolean" },
1473
- lastPageToConvert: { arg: "-l", type: "number" },
1474
- listEncodingOptions: { arg: "-listenc", type: "boolean" },
1475
- maintainLayout: { arg: "-layout", type: "boolean" },
1476
- noDiagonalText: {
1477
- arg: "-nodiag",
1478
- type: "boolean",
1479
- minVersion: "0.80.0",
1480
- },
1481
- noPageBreaks: { arg: "-nopgbrk", type: "boolean" },
1482
- outputEncoding: { arg: "-enc", type: "string" },
1483
- ownerPassword: { arg: "-opw", type: "string" },
1484
- printVersionInfo: { arg: "-v", type: "boolean" },
1485
- quiet: { arg: "-q", type: "boolean" },
1486
- rawLayout: { arg: "-raw", type: "boolean" },
1487
- resolution: { arg: "-r", type: "number" },
1488
- userPassword: { arg: "-upw", type: "string" },
1489
- };
1490
-
1491
- const { stderr } = await execFileAsync(
1492
- pathResolve(this.#popplerPath, "pdftotext"),
1493
- ["-v"]
1494
- );
1495
-
1496
- // @ts-ignore: parseOptions checks if falsy
1497
- const versionInfo = POPPLER_VERSION_REG.exec(stderr)[1];
1498
-
1445
+ const acceptedOptions = this.#getAcceptedOptions("pdfToText");
1446
+ const versionInfo = await this.#getVersion(this.#pdfToTextBin);
1499
1447
  const args = parseOptions(acceptedOptions, options, versionInfo);
1448
+ args.push(Buffer.isBuffer(file) ? "-" : file, outputFile || "-");
1500
1449
 
1501
- return new Promise((resolve, reject) => {
1502
- args.push(Buffer.isBuffer(file) ? "-" : file, outputFile || "-");
1503
-
1504
- const child = spawn(
1505
- pathResolve(this.#popplerPath, "pdftotext"),
1506
- args
1507
- );
1508
-
1509
- if (Buffer.isBuffer(file)) {
1510
- child.stdin.write(file);
1511
- child.stdin.end();
1512
- }
1513
-
1514
- let stdOut = "";
1515
- let stdErr = "";
1516
-
1517
- child.stdout.on("data", (data) => {
1518
- stdOut += data;
1519
- });
1520
-
1521
- child.stderr.on("data", (data) => {
1522
- stdErr += data;
1523
- });
1524
-
1525
- child.on("close", (code) => {
1526
- /* istanbul ignore else */
1527
- if (stdOut !== "") {
1528
- resolve(options.maintainLayout ? stdOut : stdOut.trim());
1529
- } else if (code === 0) {
1530
- resolve(ERROR_MSGS[code]);
1531
- } else if (stdErr !== "") {
1532
- reject(new Error(stdErr.trim()));
1533
- } else {
1534
- reject(
1535
- new Error(
1536
- // @ts-ignore: Second operand used if code is not in ERROR_MSGS
1537
- ERROR_MSGS[code] ||
1538
- `pdftotext ${args.join(
1539
- " "
1540
- )} exited with code ${code}`
1541
- )
1542
- );
1543
- }
1544
- });
1450
+ return executeBinary(this.#pdfToTextBin, args, file, {
1451
+ preserveWhitespace: options.maintainLayout,
1545
1452
  });
1546
1453
  }
1547
1454
 
@@ -1552,31 +1459,16 @@ class Poppler {
1552
1459
  * @param {string[]} files - Filepaths of the PDF files to merge.
1553
1460
  * An entire directory of PDF files can be merged like so: `path/to/directory/*.pdf`.
1554
1461
  * @param {string} outputFile - Filepath of the file to output the resulting merged PDF to.
1555
- * @param {object} [options] - Object containing options to pass to binary.
1556
- * @param {boolean} [options.printVersionInfo] - Print copyright and version information.
1462
+ * @param {PdfUniteOptions} [options] - Options to pass to pdfunite binary.
1557
1463
  * @returns {Promise<string>} A promise that resolves with a stdout string, or rejects with an `Error` object.
1558
1464
  */
1559
1465
  async pdfUnite(files, outputFile, options = {}) {
1560
- /** @type {PopplerAcceptedOptions} */
1561
- const acceptedOptions = {
1562
- printVersionInfo: { arg: "-v", type: "boolean" },
1563
- };
1564
-
1565
- const { stderr } = await execFileAsync(
1566
- pathResolve(this.#popplerPath, "pdfunite"),
1567
- ["-v"]
1568
- );
1569
-
1570
- // @ts-ignore: parseOptions checks if falsy
1571
- const versionInfo = POPPLER_VERSION_REG.exec(stderr)[1];
1572
-
1466
+ const acceptedOptions = this.#getAcceptedOptions("pdfUnite");
1467
+ const versionInfo = await this.#getVersion(this.#pdfUniteBin);
1573
1468
  const args = parseOptions(acceptedOptions, options, versionInfo);
1574
1469
  args.push(...files, outputFile);
1575
1470
 
1576
- const { stdout } = await execFileAsync(
1577
- pathResolve(this.#popplerPath, "pdfunite"),
1578
- args
1579
- );
1471
+ const { stdout } = await execFileAsync(this.#pdfUniteBin, args);
1580
1472
  return stdout;
1581
1473
  }
1582
1474
  }