@heripo/pdf-parser 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -12,7 +12,7 @@ type ConversionCompleteCallback = (outputPath: string) => Promise<void> | void;
12
12
  /**
13
13
  * Extended options for PDF conversion.
14
14
  */
15
- type PDFConvertOptions = Omit<ConversionOptions, 'to_formats' | 'image_export_mode' | 'ocr_engine' | 'accelerator_options' | 'ocr_options' | 'generate_picture_images' | 'images_scale' | 'force_ocr' | 'pipeline' | 'vlm_pipeline_model_local' | 'vlm_pipeline_model_api'> & {
15
+ type PDFConvertOptions = Omit<ConversionOptions, 'to_formats' | 'image_export_mode' | 'ocr_engine' | 'accelerator_options' | 'ocr_options' | 'generate_picture_images' | 'generate_page_images' | 'images_scale' | 'force_ocr' | 'pipeline' | 'vlm_pipeline_model_local' | 'vlm_pipeline_model_api'> & {
16
16
  num_threads?: number;
17
17
  /**
18
18
  * Force pre-conversion to image-based PDF before processing.
@@ -74,6 +74,8 @@ type Options = {
74
74
  * - Install specific version: `pyenv install 3.12.0 && pyenv global 3.12.0`
75
75
  * - `jq` - JSON processor
76
76
  * - Install: `brew install jq`
77
+ * - `poppler` - PDF text extraction tools (pdftotext, pdfinfo)
78
+ * - Install: `brew install poppler`
77
79
  * - `lsof` - List open files (usually pre-installed on macOS)
78
80
  *
79
81
  * ## Initialization Process
@@ -122,6 +124,7 @@ declare class PDFParser {
122
124
  init(): Promise<void>;
123
125
  private checkOperatingSystem;
124
126
  private checkJqInstalled;
127
+ private checkPopplerInstalled;
125
128
  private checkMacOSVersion;
126
129
  private checkImageMagickInstalled;
127
130
  private checkGhostscriptInstalled;
package/dist/index.d.ts CHANGED
@@ -12,7 +12,7 @@ type ConversionCompleteCallback = (outputPath: string) => Promise<void> | void;
12
12
  /**
13
13
  * Extended options for PDF conversion.
14
14
  */
15
- type PDFConvertOptions = Omit<ConversionOptions, 'to_formats' | 'image_export_mode' | 'ocr_engine' | 'accelerator_options' | 'ocr_options' | 'generate_picture_images' | 'images_scale' | 'force_ocr' | 'pipeline' | 'vlm_pipeline_model_local' | 'vlm_pipeline_model_api'> & {
15
+ type PDFConvertOptions = Omit<ConversionOptions, 'to_formats' | 'image_export_mode' | 'ocr_engine' | 'accelerator_options' | 'ocr_options' | 'generate_picture_images' | 'generate_page_images' | 'images_scale' | 'force_ocr' | 'pipeline' | 'vlm_pipeline_model_local' | 'vlm_pipeline_model_api'> & {
16
16
  num_threads?: number;
17
17
  /**
18
18
  * Force pre-conversion to image-based PDF before processing.
@@ -74,6 +74,8 @@ type Options = {
74
74
  * - Install specific version: `pyenv install 3.12.0 && pyenv global 3.12.0`
75
75
  * - `jq` - JSON processor
76
76
  * - Install: `brew install jq`
77
+ * - `poppler` - PDF text extraction tools (pdftotext, pdfinfo)
78
+ * - Install: `brew install poppler`
77
79
  * - `lsof` - List open files (usually pre-installed on macOS)
78
80
  *
79
81
  * ## Initialization Process
@@ -122,6 +124,7 @@ declare class PDFParser {
122
124
  init(): Promise<void>;
123
125
  private checkOperatingSystem;
124
126
  private checkJqInstalled;
127
+ private checkPopplerInstalled;
125
128
  private checkMacOSVersion;
126
129
  private checkImageMagickInstalled;
127
130
  private checkGhostscriptInstalled;