npm - @heripo/pdf-parser - Versions diffs - 0.1.4 → 0.1.6 - Mend

@heripo/pdf-parser 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/dist/chunk-WWNI354M.js +121 -0
package/dist/chunk-WWNI354M.js.map +1 -0
package/dist/index.cjs +315 -48
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +19 -5
package/dist/index.d.ts +19 -5
package/dist/index.js +195 -41
package/dist/index.js.map +1 -1
package/dist/vlm-models.cjs +147 -0
package/dist/vlm-models.cjs.map +1 -0
package/dist/vlm-models.d.cts +34 -0
package/dist/vlm-models.d.ts +34 -0
package/dist/vlm-models.js +12 -0
package/dist/vlm-models.js.map +1 -0
package/package.json +15 -9

package/dist/chunk-WWNI354M.js ADDED Viewed

@@ -0,0 +1,121 @@
+// src/config/vlm-models.ts
+var VLM_MODELS = {
+  // ── DocTags models (specialized document structure output) ──────────
+  "granite-docling-258M-mlx": {
+    repo_id: "ibm-granite/granite-docling-258M-mlx",
+    inference_framework: "mlx",
+    response_format: "doctags",
+    transformers_model_type: "automodel-vision2seq",
+    description: "Granite Docling 258M (MLX, Apple Silicon optimized, ~6s/page)"
+  },
+  "granite-docling-258M": {
+    repo_id: "ibm-granite/granite-docling-258M",
+    inference_framework: "transformers",
+    response_format: "doctags",
+    transformers_model_type: "automodel-vision2seq",
+    description: "Granite Docling 258M (Transformers, cross-platform)"
+  },
+  "smoldocling-256M-mlx": {
+    repo_id: "docling-project/SmolDocling-256M-preview-mlx-bf16",
+    inference_framework: "mlx",
+    response_format: "doctags",
+    transformers_model_type: "automodel-vision2seq",
+    description: "SmolDocling 256M (MLX, fastest option)"
+  },
+  "smoldocling-256M": {
+    repo_id: "docling-project/SmolDocling-256M-preview",
+    inference_framework: "transformers",
+    response_format: "doctags",
+    transformers_model_type: "automodel-vision2seq",
+    description: "SmolDocling 256M (Transformers)"
+  },
+  // ── Markdown models (general-purpose vision LLMs) ──────────────────
+  "granite-vision-2B": {
+    repo_id: "ibm-granite/granite-vision-3.2-2b",
+    inference_framework: "transformers",
+    response_format: "markdown",
+    transformers_model_type: "automodel-vision2seq",
+    description: "Granite Vision 3.2 2B (IBM, higher accuracy)"
+  },
+  "qwen25-vl-3B-mlx": {
+    repo_id: "mlx-community/Qwen2.5-VL-3B-Instruct-bf16",
+    inference_framework: "mlx",
+    response_format: "markdown",
+    transformers_model_type: "automodel-vision2seq",
+    description: "Qwen 2.5 VL 3B (MLX, multilingual, good KCJ support)"
+  },
+  phi4: {
+    repo_id: "microsoft/Phi-4-multimodal-instruct",
+    inference_framework: "transformers",
+    response_format: "markdown",
+    transformers_model_type: "automodel",
+    description: "Phi-4 Multimodal (Microsoft, CausalLM)"
+  },
+  "pixtral-12B-mlx": {
+    repo_id: "mlx-community/pixtral-12b-bf16",
+    inference_framework: "mlx",
+    response_format: "markdown",
+    transformers_model_type: "automodel-vision2seq",
+    description: "Pixtral 12B (MLX, Mistral, high accuracy)"
+  },
+  "pixtral-12B": {
+    repo_id: "mistral-community/pixtral-12b",
+    inference_framework: "transformers",
+    response_format: "markdown",
+    transformers_model_type: "automodel-vision2seq",
+    description: "Pixtral 12B (Transformers, Mistral)"
+  },
+  got2: {
+    repo_id: "stepfun-ai/GOT-OCR-2.0-hf",
+    inference_framework: "transformers",
+    response_format: "markdown",
+    transformers_model_type: "automodel-vision2seq",
+    description: "GOT-OCR 2.0 (StepFun, OCR-specialized)"
+  },
+  "gemma3-12B-mlx": {
+    repo_id: "mlx-community/gemma-3-12b-it-bf16",
+    inference_framework: "mlx",
+    response_format: "markdown",
+    transformers_model_type: "automodel-vision2seq",
+    description: "Gemma 3 12B (MLX, Google)"
+  },
+  "gemma3-27B-mlx": {
+    repo_id: "mlx-community/gemma-3-27b-it-bf16",
+    inference_framework: "mlx",
+    response_format: "markdown",
+    transformers_model_type: "automodel-vision2seq",
+    description: "Gemma 3 27B (MLX, Google, highest accuracy)"
+  },
+  dolphin: {
+    repo_id: "ByteDance/Dolphin",
+    inference_framework: "transformers",
+    response_format: "markdown",
+    transformers_model_type: "automodel-vision2seq",
+    description: "Dolphin (ByteDance, document-oriented)"
+  }
+};
+var DEFAULT_VLM_MODEL = "granite-docling-258M-mlx";
+function resolveVlmModel(model) {
+  if (typeof model === "string") {
+    const preset = VLM_MODELS[model];
+    if (!preset) {
+      throw new Error(
+        `Unknown VLM model preset: "${model}". Available presets: ${Object.keys(VLM_MODELS).join(", ")}`
+      );
+    }
+    return {
+      repo_id: preset.repo_id,
+      inference_framework: preset.inference_framework,
+      response_format: preset.response_format,
+      transformers_model_type: preset.transformers_model_type
+    };
+  }
+  return model;
+}
+export {
+  VLM_MODELS,
+  DEFAULT_VLM_MODEL,
+  resolveVlmModel
+};
+//# sourceMappingURL=chunk-WWNI354M.js.map

package/dist/chunk-WWNI354M.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"sources":["../src/config/vlm-models.ts"],"sourcesContent":["import type { VlmModelLocal } from 'docling-sdk';\n\n/**\n * VLM model preset with description\n */\nexport interface VlmModelPreset {\n repo_id: string;\n inference_framework: 'mlx' | 'transformers';\n response_format: 'doctags' | 'markdown';\n transformers_model_type: 'automodel-vision2seq' | 'automodel';\n description: string;\n}\n\n/**\n * Available VLM model presets\n *\n * Based on Docling's official VLM model specs:\n * https://docling-project.github.io/docling/usage/vision_models/#available-local-models\n *\n * Users can select a preset key or provide a custom VlmModelLocal object.\n */\nexport const VLM_MODELS: Record<string, VlmModelPreset> = {\n // ── DocTags models (specialized document structure output) ──────────\n\n 'granite-docling-258M-mlx': {\n repo_id: 'ibm-granite/granite-docling-258M-mlx',\n inference_framework: 'mlx',\n response_format: 'doctags',\n transformers_model_type: 'automodel-vision2seq',\n description:\n 'Granite Docling 258M (MLX, Apple Silicon optimized, ~6s/page)',\n },\n 'granite-docling-258M': {\n repo_id: 'ibm-granite/granite-docling-258M',\n inference_framework: 'transformers',\n response_format: 'doctags',\n transformers_model_type: 'automodel-vision2seq',\n description: 'Granite Docling 258M (Transformers, cross-platform)',\n },\n 'smoldocling-256M-mlx': {\n repo_id: 'docling-project/SmolDocling-256M-preview-mlx-bf16',\n inference_framework: 'mlx',\n response_format: 'doctags',\n transformers_model_type: 'automodel-vision2seq',\n description: 'SmolDocling 256M (MLX, fastest option)',\n },\n 'smoldocling-256M': {\n repo_id: 'docling-project/SmolDocling-256M-preview',\n inference_framework: 'transformers',\n response_format: 'doctags',\n transformers_model_type: 'automodel-vision2seq',\n description: 'SmolDocling 256M (Transformers)',\n },\n\n // ── Markdown models (general-purpose vision LLMs) ──────────────────\n\n 'granite-vision-2B': {\n repo_id: 'ibm-granite/granite-vision-3.2-2b',\n inference_framework: 'transformers',\n response_format: 'markdown',\n transformers_model_type: 'automodel-vision2seq',\n description: 'Granite Vision 3.2 2B (IBM, higher accuracy)',\n },\n 'qwen25-vl-3B-mlx': {\n repo_id: 'mlx-community/Qwen2.5-VL-3B-Instruct-bf16',\n inference_framework: 'mlx',\n response_format: 'markdown',\n transformers_model_type: 'automodel-vision2seq',\n description: 'Qwen 2.5 VL 3B (MLX, multilingual, good KCJ support)',\n },\n phi4: {\n repo_id: 'microsoft/Phi-4-multimodal-instruct',\n inference_framework: 'transformers',\n response_format: 'markdown',\n transformers_model_type: 'automodel',\n description: 'Phi-4 Multimodal (Microsoft, CausalLM)',\n },\n 'pixtral-12B-mlx': {\n repo_id: 'mlx-community/pixtral-12b-bf16',\n inference_framework: 'mlx',\n response_format: 'markdown',\n transformers_model_type: 'automodel-vision2seq',\n description: 'Pixtral 12B (MLX, Mistral, high accuracy)',\n },\n 'pixtral-12B': {\n repo_id: 'mistral-community/pixtral-12b',\n inference_framework: 'transformers',\n response_format: 'markdown',\n transformers_model_type: 'automodel-vision2seq',\n description: 'Pixtral 12B (Transformers, Mistral)',\n },\n got2: {\n repo_id: 'stepfun-ai/GOT-OCR-2.0-hf',\n inference_framework: 'transformers',\n response_format: 'markdown',\n transformers_model_type: 'automodel-vision2seq',\n description: 'GOT-OCR 2.0 (StepFun, OCR-specialized)',\n },\n 'gemma3-12B-mlx': {\n repo_id: 'mlx-community/gemma-3-12b-it-bf16',\n inference_framework: 'mlx',\n response_format: 'markdown',\n transformers_model_type: 'automodel-vision2seq',\n description: 'Gemma 3 12B (MLX, Google)',\n },\n 'gemma3-27B-mlx': {\n repo_id: 'mlx-community/gemma-3-27b-it-bf16',\n inference_framework: 'mlx',\n response_format: 'markdown',\n transformers_model_type: 'automodel-vision2seq',\n description: 'Gemma 3 27B (MLX, Google, highest accuracy)',\n },\n dolphin: {\n repo_id: 'ByteDance/Dolphin',\n inference_framework: 'transformers',\n response_format: 'markdown',\n transformers_model_type: 'automodel-vision2seq',\n description: 'Dolphin (ByteDance, document-oriented)',\n },\n} as const;\n\n/**\n * Default VLM model preset key\n */\nexport const DEFAULT_VLM_MODEL = 'granite-docling-258M-mlx';\n\n/**\n * Resolve a VLM model from a preset key or custom VlmModelLocal object.\n *\n * When using a preset key, only required fields are populated.\n * Optional fields (prompt, scale, extra_generation_config) use Docling defaults.\n */\nexport function resolveVlmModel(model: string | VlmModelLocal): VlmModelLocal {\n if (typeof model === 'string') {\n const preset = VLM_MODELS[model];\n if (!preset) {\n throw new Error(\n `Unknown VLM model preset: \"${model}\". Available presets: ${Object.keys(VLM_MODELS).join(', ')}`,\n );\n }\n return {\n repo_id: preset.repo_id,\n inference_framework: preset.inference_framework,\n response_format: preset.response_format,\n transformers_model_type: preset.transformers_model_type,\n } as VlmModelLocal;\n }\n return model;\n}\n"],"mappings":";AAqBO,IAAM,aAA6C;AAAA;AAAA,EAGxD,4BAA4B;AAAA,IAC1B,SAAS;AAAA,IACT,qBAAqB;AAAA,IACrB,iBAAiB;AAAA,IACjB,yBAAyB;AAAA,IACzB,aACE;AAAA,EACJ;AAAA,EACA,wBAAwB;AAAA,IACtB,SAAS;AAAA,IACT,qBAAqB;AAAA,IACrB,iBAAiB;AAAA,IACjB,yBAAyB;AAAA,IACzB,aAAa;AAAA,EACf;AAAA,EACA,wBAAwB;AAAA,IACtB,SAAS;AAAA,IACT,qBAAqB;AAAA,IACrB,iBAAiB;AAAA,IACjB,yBAAyB;AAAA,IACzB,aAAa;AAAA,EACf;AAAA,EACA,oBAAoB;AAAA,IAClB,SAAS;AAAA,IACT,qBAAqB;AAAA,IACrB,iBAAiB;AAAA,IACjB,yBAAyB;AAAA,IACzB,aAAa;AAAA,EACf;AAAA;AAAA,EAIA,qBAAqB;AAAA,IACnB,SAAS;AAAA,IACT,qBAAqB;AAAA,IACrB,iBAAiB;AAAA,IACjB,yBAAyB;AAAA,IACzB,aAAa;AAAA,EACf;AAAA,EACA,oBAAoB;AAAA,IAClB,SAAS;AAAA,IACT,qBAAqB;AAAA,IACrB,iBAAiB;AAAA,IACjB,yBAAyB;AAAA,IACzB,aAAa;AAAA,EACf;AAAA,EACA,MAAM;AAAA,IACJ,SAAS;AAAA,IACT,qBAAqB;AAAA,IACrB,iBAAiB;AAAA,IACjB,yBAAyB;AAAA,IACzB,aAAa;AAAA,EACf;AAAA,EACA,mBAAmB;AAAA,IACjB,SAAS;AAAA,IACT,qBAAqB;AAAA,IACrB,iBAAiB;AAAA,IACjB,yBAAyB;AAAA,IACzB,aAAa;AAAA,EACf;AAAA,EACA,eAAe;AAAA,IACb,SAAS;AAAA,IACT,qBAAqB;AAAA,IACrB,iBAAiB;AAAA,IACjB,yBAAyB;AAAA,IACzB,aAAa;AAAA,EACf;AAAA,EACA,MAAM;AAAA,IACJ,SAAS;AAAA,IACT,qBAAqB;AAAA,IACrB,iBAAiB;AAAA,IACjB,yBAAyB;AAAA,IACzB,aAAa;AAAA,EACf;AAAA,EACA,kBAAkB;AAAA,IAChB,SAAS;AAAA,IACT,qBAAqB;AAAA,IACrB,iBAAiB;AAAA,IACjB,yBAAyB;AAAA,IACzB,aAAa;AAAA,EACf;AAAA,EACA,kBAAkB;AAAA,IAChB,SAAS;AAAA,IACT,qBAAqB;AAAA,IACrB,iBAAiB;AAAA,IACjB,yBAAyB;AAAA,IACzB,aAAa;AAAA,EACf;AAAA,EACA,SAAS;AAAA,IACP,SAAS;AAAA,IACT,qBAAqB;AAAA,IACrB,iBAAiB;AAAA,IACjB,yBAAyB;AAAA,IACzB,aAAa;AAAA,EACf;AACF;AAKO,IAAM,oBAAoB;AAQ1B,SAAS,gBAAgB,OAA8C;AAC5E,MAAI,OAAO,UAAU,UAAU;AAC7B,UAAM,SAAS,WAAW,KAAK;AAC/B,QAAI,CAAC,QAAQ;AACX,YAAM,IAAI;AAAA,QACR,8BAA8B,KAAK,yBAAyB,OAAO,KAAK,UAAU,EAAE,KAAK,IAAI,CAAC;AAAA,MAChG;AAAA,IACF;AACA,WAAO;AAAA,MACL,SAAS,OAAO;AAAA,MAChB,qBAAqB,OAAO;AAAA,MAC5B,iBAAiB,OAAO;AAAA,MACxB,yBAAyB,OAAO;AAAA,IAClC;AAAA,EACF;AACA,SAAO;AACT;","names":[]}

package/dist/index.cjs CHANGED Viewed

@@ -30,15 +30,18 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
 // src/index.ts
 var src_exports = {};
 __export(src_exports, {
+  DEFAULT_VLM_MODEL: () => DEFAULT_VLM_MODEL,
   ImagePdfFallbackError: () => ImagePdfFallbackError,
-  PDFParser: () => PDFParser
+  PDFParser: () => PDFParser,
+  VLM_MODELS: () => VLM_MODELS,
+  resolveVlmModel: () => resolveVlmModel
 });
 module.exports = __toCommonJS(src_exports);
 // src/core/pdf-parser.ts
-var import_docling_sdk = require("docling-sdk");
+var import_docling_sdk2 = require("docling-sdk");
 var import_node_child_process3 = require("child_process");
-var import_node_os2 = require("os");
+var import_node_os3 = require("os");
 var import_node_path6 = require("path");
 // src/config/constants.ts
@@ -68,7 +71,11 @@ var PDF_CONVERTER = {
   /**
    * Interval for progress polling in milliseconds
    */
-  POLL_INTERVAL_MS: 1e3
+  POLL_INTERVAL_MS: 1e3,
+  /**
+   * Default timeout for task completion in milliseconds (30 minutes)
+   */
+  DEFAULT_TIMEOUT_MS: 18e5
 };
 var DOCLING_ENVIRONMENT = {
   /**
@@ -86,6 +93,19 @@ var IMAGE_PDF_CONVERTER = {
    */
   QUALITY: 100
 };
+var VLM_ENVIRONMENT = {
+  /**
+   * Timeout for VLM dependency installation (pip install) in milliseconds (3 hours).
+   * VLM packages can be very large and may require extended download times
+   * depending on network conditions.
+   */
+  SETUP_TIMEOUT_MS: 108e5,
+  /**
+   * Timeout for VLM model download in milliseconds (3 hours).
+   * Large VLM models (e.g., multi-GB weights) need sufficient time to download.
+   */
+  MODEL_DOWNLOAD_TIMEOUT_MS: 108e5
+};
 // ../shared/dist/index.mjs
 var import_child_process = require("child_process");
@@ -118,6 +138,7 @@ function spawnAsync(command, args, options = {}) {
 // src/environment/docling-environment.ts
 var import_node_child_process = require("child_process");
+var import_node_os = require("os");
 var import_node_path = require("path");
 // src/utils/python-version.ts
@@ -159,6 +180,7 @@ var DoclingEnvironment = class _DoclingEnvironment {
   venvPath;
   port;
   killExistingProcess;
+  vlmDependenciesInstalled = false;
   constructor(options) {
     this.logger = options.logger;
     this.venvPath = options.venvPath;
@@ -288,6 +310,81 @@ var DoclingEnvironment = class _DoclingEnvironment {
       );
     }
   }
+  /**
+   * Install VLM-specific dependencies for the Docling VLM pipeline.
+   *
+   * Installs:
+   * 1. docling-serve[vlm] - VLM model support for docling-serve
+   * 2. mlx + mlx-lm (macOS ARM64 only) - Apple Silicon optimized inference
+   *
+   * This is idempotent - subsequent calls skip if already installed.
+   */
+  async setupVlmDependencies() {
+    if (this.vlmDependenciesInstalled) {
+      this.logger.info(
+        "[DoclingEnvironment] VLM dependencies already installed, skipping"
+      );
+      return;
+    }
+    if (await this.isVlmReady()) {
+      this.vlmDependenciesInstalled = true;
+      this.logger.info(
+        "[DoclingEnvironment] VLM dependencies already installed, skipping"
+      );
+      return;
+    }
+    this.logger.info("[DoclingEnvironment] Installing VLM dependencies...");
+    const pipPath = (0, import_node_path.join)(this.venvPath, "bin", "pip");
+    this.logger.info("[DoclingEnvironment] Installing docling[vlm]...");
+    const vlmResult = await spawnAsync(
+      pipPath,
+      ["install", "docling-serve[vlm]"],
+      { timeout: VLM_ENVIRONMENT.SETUP_TIMEOUT_MS }
+    );
+    if (vlmResult.code !== 0) {
+      this.logger.error(
+        "[DoclingEnvironment] Failed to install docling-serve[vlm]:",
+        vlmResult.stderr
+      );
+      throw new Error(
+        `Failed to install docling-serve[vlm]. Exit code: ${vlmResult.code}`
+      );
+    }
+    if ((0, import_node_os.platform)() === "darwin" && (0, import_node_os.arch)() === "arm64") {
+      this.logger.info(
+        "[DoclingEnvironment] Installing mlx + mlx-lm for Apple Silicon..."
+      );
+      const mlxResult = await spawnAsync(
+        pipPath,
+        ["install", "mlx", "mlx-lm"],
+        { timeout: VLM_ENVIRONMENT.SETUP_TIMEOUT_MS }
+      );
+      if (mlxResult.code !== 0) {
+        this.logger.error(
+          "[DoclingEnvironment] Failed to install mlx/mlx-lm:",
+          mlxResult.stderr
+        );
+        throw new Error(
+          `Failed to install mlx/mlx-lm. Exit code: ${mlxResult.code}`
+        );
+      }
+    }
+    this.vlmDependenciesInstalled = true;
+    this.logger.info(
+      "[DoclingEnvironment] VLM dependencies installed successfully"
+    );
+  }
+  /**
+   * Check if VLM dependencies are ready by verifying Python module imports
+   */
+  async isVlmReady() {
+    const pythonPath = (0, import_node_path.join)(this.venvPath, "bin", "python");
+    const result = await spawnAsync(pythonPath, [
+      "-c",
+      "import docling_core; import docling"
+    ]);
+    return result.code === 0;
+  }
   async isPortInUse(port) {
     try {
       const result = await spawnAsync("lsof", ["-ti", `:${port}`]);
@@ -374,11 +471,127 @@ var DoclingEnvironment = class _DoclingEnvironment {
 };
 // src/core/pdf-converter.ts
+var import_docling_sdk = require("docling-sdk");
 var import_es_toolkit = require("es-toolkit");
 var import_node_fs4 = require("fs");
 var import_node_path5 = require("path");
 var import_promises = require("stream/promises");
+// src/config/vlm-models.ts
+var VLM_MODELS = {
+  // ── DocTags models (specialized document structure output) ──────────
+  "granite-docling-258M-mlx": {
+    repo_id: "ibm-granite/granite-docling-258M-mlx",
+    inference_framework: "mlx",
+    response_format: "doctags",
+    transformers_model_type: "automodel-vision2seq",
+    description: "Granite Docling 258M (MLX, Apple Silicon optimized, ~6s/page)"
+  },
+  "granite-docling-258M": {
+    repo_id: "ibm-granite/granite-docling-258M",
+    inference_framework: "transformers",
+    response_format: "doctags",
+    transformers_model_type: "automodel-vision2seq",
+    description: "Granite Docling 258M (Transformers, cross-platform)"
+  },
+  "smoldocling-256M-mlx": {
+    repo_id: "docling-project/SmolDocling-256M-preview-mlx-bf16",
+    inference_framework: "mlx",
+    response_format: "doctags",
+    transformers_model_type: "automodel-vision2seq",
+    description: "SmolDocling 256M (MLX, fastest option)"
+  },
+  "smoldocling-256M": {
+    repo_id: "docling-project/SmolDocling-256M-preview",
+    inference_framework: "transformers",
+    response_format: "doctags",
+    transformers_model_type: "automodel-vision2seq",
+    description: "SmolDocling 256M (Transformers)"
+  },
+  // ── Markdown models (general-purpose vision LLMs) ──────────────────
+  "granite-vision-2B": {
+    repo_id: "ibm-granite/granite-vision-3.2-2b",
+    inference_framework: "transformers",
+    response_format: "markdown",
+    transformers_model_type: "automodel-vision2seq",
+    description: "Granite Vision 3.2 2B (IBM, higher accuracy)"
+  },
+  "qwen25-vl-3B-mlx": {
+    repo_id: "mlx-community/Qwen2.5-VL-3B-Instruct-bf16",
+    inference_framework: "mlx",
+    response_format: "markdown",
+    transformers_model_type: "automodel-vision2seq",
+    description: "Qwen 2.5 VL 3B (MLX, multilingual, good KCJ support)"
+  },
+  phi4: {
+    repo_id: "microsoft/Phi-4-multimodal-instruct",
+    inference_framework: "transformers",
+    response_format: "markdown",
+    transformers_model_type: "automodel",
+    description: "Phi-4 Multimodal (Microsoft, CausalLM)"
+  },
+  "pixtral-12B-mlx": {
+    repo_id: "mlx-community/pixtral-12b-bf16",
+    inference_framework: "mlx",
+    response_format: "markdown",
+    transformers_model_type: "automodel-vision2seq",
+    description: "Pixtral 12B (MLX, Mistral, high accuracy)"
+  },
+  "pixtral-12B": {
+    repo_id: "mistral-community/pixtral-12b",
+    inference_framework: "transformers",
+    response_format: "markdown",
+    transformers_model_type: "automodel-vision2seq",
+    description: "Pixtral 12B (Transformers, Mistral)"
+  },
+  got2: {
+    repo_id: "stepfun-ai/GOT-OCR-2.0-hf",
+    inference_framework: "transformers",
+    response_format: "markdown",
+    transformers_model_type: "automodel-vision2seq",
+    description: "GOT-OCR 2.0 (StepFun, OCR-specialized)"
+  },
+  "gemma3-12B-mlx": {
+    repo_id: "mlx-community/gemma-3-12b-it-bf16",
+    inference_framework: "mlx",
+    response_format: "markdown",
+    transformers_model_type: "automodel-vision2seq",
+    description: "Gemma 3 12B (MLX, Google)"
+  },
+  "gemma3-27B-mlx": {
+    repo_id: "mlx-community/gemma-3-27b-it-bf16",
+    inference_framework: "mlx",
+    response_format: "markdown",
+    transformers_model_type: "automodel-vision2seq",
+    description: "Gemma 3 27B (MLX, Google, highest accuracy)"
+  },
+  dolphin: {
+    repo_id: "ByteDance/Dolphin",
+    inference_framework: "transformers",
+    response_format: "markdown",
+    transformers_model_type: "automodel-vision2seq",
+    description: "Dolphin (ByteDance, document-oriented)"
+  }
+};
+var DEFAULT_VLM_MODEL = "granite-docling-258M-mlx";
+function resolveVlmModel(model) {
+  if (typeof model === "string") {
+    const preset = VLM_MODELS[model];
+    if (!preset) {
+      throw new Error(
+        `Unknown VLM model preset: "${model}". Available presets: ${Object.keys(VLM_MODELS).join(", ")}`
+      );
+    }
+    return {
+      repo_id: preset.repo_id,
+      inference_framework: preset.inference_framework,
+      response_format: preset.response_format,
+      transformers_model_type: preset.transformers_model_type
+    };
+  }
+  return model;
+}
 // src/errors/image-pdf-fallback-error.ts
 var ImagePdfFallbackError = class extends Error {
   constructor(originalError, fallbackError) {
@@ -730,7 +943,7 @@ var LocalFileServer = class {
 // src/core/image-pdf-converter.ts
 var import_node_fs3 = require("fs");
-var import_node_os = require("os");
+var import_node_os2 = require("os");
 var import_node_path4 = require("path");
 var ImagePdfConverter = class {
   constructor(logger) {
@@ -746,7 +959,7 @@ var ImagePdfConverter = class {
    */
   async convert(pdfUrl, reportId) {
     const timestamp = Date.now();
-    const tempDir = (0, import_node_os.tmpdir)();
+    const tempDir = (0, import_node_os2.tmpdir)();
     const inputPath = (0, import_node_path4.join)(tempDir, `${reportId}-${timestamp}-input.pdf`);
     const outputPath = (0, import_node_path4.join)(tempDir, `${reportId}-${timestamp}-image.pdf`);
     try {
@@ -816,11 +1029,17 @@ var ImagePdfConverter = class {
 };
 // src/core/pdf-converter.ts
+var _origAssertValidConversionOptions = import_docling_sdk.ValidationUtils.assertValidConversionOptions.bind(import_docling_sdk.ValidationUtils);
+import_docling_sdk.ValidationUtils.assertValidConversionOptions = (options) => {
+  const { pipeline: _pipeline, ...rest } = options;
+  _origAssertValidConversionOptions(rest);
+};
 var PDFConverter = class {
-  constructor(logger, client, enableImagePdfFallback = false) {
+  constructor(logger, client, enableImagePdfFallback = false, timeout = PDF_CONVERTER.DEFAULT_TIMEOUT_MS) {
     this.logger = logger;
     this.client = client;
     this.enableImagePdfFallback = enableImagePdfFallback;
+    this.timeout = timeout;
   }
   async convert(url, reportId, onComplete, cleanupAfterCallback, options, abortSignal) {
     this.logger.info("[PDFConverter] Converting:", url);
@@ -875,7 +1094,15 @@ var PDFConverter = class {
   }
   async performConversion(url, reportId, onComplete, cleanupAfterCallback, options, abortSignal) {
     const startTime = Date.now();
-    const conversionOptions = this.buildConversionOptions(options);
+    const pipelineType = options.pipeline ?? "standard";
+    const conversionOptions = pipelineType === "vlm" ? this.buildVlmConversionOptions(options) : this.buildConversionOptions(options);
+    if (pipelineType === "vlm") {
+      this.logger.info("[PDFConverter] Using VLM pipeline");
+    } else {
+      this.logger.info(
+        `[PDFConverter] OCR languages: ${JSON.stringify(conversionOptions.ocr_options?.lang)}`
+      );
+    }
     this.logger.info(
       "[PDFConverter] Converting document with Async Source API..."
     );
@@ -942,7 +1169,7 @@ var PDFConverter = class {
   }
   buildConversionOptions(options) {
     return {
-      ...(0, import_es_toolkit.omit)(options, ["num_threads"]),
+      ...(0, import_es_toolkit.omit)(options, ["num_threads", "pipeline", "vlm_model"]),
       to_formats: ["json", "html"],
       image_export_mode: "embedded",
       ocr_engine: "ocrmac",
@@ -968,6 +1195,31 @@ var PDFConverter = class {
       }
     };
   }
+  /**
+   * Build conversion options for VLM pipeline.
+   *
+   * VLM pipeline uses a Vision Language Model instead of traditional OCR,
+   * providing better accuracy for KCJ characters and complex layouts.
+   */
+  buildVlmConversionOptions(options) {
+    const vlmModel = resolveVlmModel(options.vlm_model ?? DEFAULT_VLM_MODEL);
+    this.logger.info(
+      `[PDFConverter] VLM model: ${vlmModel.repo_id} (framework: ${vlmModel.inference_framework}, format: ${vlmModel.response_format})`
+    );
+    return {
+      ...(0, import_es_toolkit.omit)(options, ["num_threads", "pipeline", "vlm_model", "ocr_lang"]),
+      to_formats: ["json", "html"],
+      image_export_mode: "embedded",
+      pipeline: "vlm",
+      vlm_pipeline_model_local: vlmModel,
+      generate_picture_images: true,
+      images_scale: 2,
+      accelerator_options: {
+        device: "mps",
+        num_threads: options.num_threads
+      }
+    };
+  }
   async startConversionTask(url, conversionOptions) {
     const task = await this.client.convertSourceAsync({
       sources: [
@@ -1003,38 +1255,42 @@ var PDFConverter = class {
   }
   async trackTaskProgress(task) {
     const conversionStartTime = Date.now();
-    let lastStatus = "";
-    let isCompleted = false;
-    const pollInterval = setInterval(() => {
-      if (isCompleted) return;
-      const elapsed = Math.floor((Date.now() - conversionStartTime) / 1e3);
-      process.stdout.write(
-        `\r[PDFConverter] Status: ${lastStatus || "processing"} (${elapsed}s elapsed)`
-      );
-    }, PDF_CONVERTER.POLL_INTERVAL_MS);
-    task.on("progress", (status) => {
-      lastStatus = status.task_status;
+    let lastProgressLine = "";
+    const logProgress = (status) => {
+      const parts = [`Status: ${status.task_status}`];
       if (status.task_position !== void 0) {
-        process.stdout.write(
-          `\r[PDFConverter] Status: ${status.task_status} (position: ${status.task_position})`
-        );
+        parts.push(`position: ${status.task_position}`);
       }
-    });
-    task.on("complete", () => {
-      isCompleted = true;
-      clearInterval(pollInterval);
-      this.logger.info("\n[PDFConverter] Conversion completed!");
-    });
-    task.on("error", (error) => {
-      isCompleted = true;
-      clearInterval(pollInterval);
-      this.logger.error("\n[PDFConverter] Conversion error:", error.message);
-    });
-    try {
-      await task.waitForCompletion();
-    } finally {
-      isCompleted = true;
-      clearInterval(pollInterval);
+      const meta = status.task_meta;
+      if (meta) {
+        if (meta.processed_documents !== void 0 && meta.total_documents !== void 0) {
+          parts.push(
+            `progress: ${meta.processed_documents}/${meta.total_documents}`
+          );
+        }
+      }
+      const progressLine = `\r[PDFConverter] ${parts.join(" | ")}`;
+      if (progressLine !== lastProgressLine) {
+        lastProgressLine = progressLine;
+        process.stdout.write(progressLine);
+      }
+    };
+    while (true) {
+      if (Date.now() - conversionStartTime > this.timeout) {
+        throw new Error("Task timeout");
+      }
+      const status = await task.poll();
+      logProgress(status);
+      if (status.task_status === "success") {
+        this.logger.info("\n[PDFConverter] Conversion completed!");
+        return;
+      }
+      if (status.task_status === "failure") {
+        throw new Error("Task failed with status: failure");
+      }
+      await new Promise(
+        (resolve) => setTimeout(resolve, PDF_CONVERTER.POLL_INTERVAL_MS)
+      );
     }
   }
   async downloadResult(taskId) {
@@ -1070,6 +1326,7 @@ var PDFParser = class {
   killExistingProcess;
   enableImagePdfFallback;
   client = null;
+  environment;
   constructor(options) {
     const {
       logger,
@@ -1106,7 +1363,7 @@ var PDFParser = class {
     }
     if (this.baseUrl) {
       this.logger.info("[PDFParser] Using external server:", this.baseUrl);
-      this.client = new import_docling_sdk.Docling({
+      this.client = new import_docling_sdk2.Docling({
         api: { baseUrl: this.baseUrl, timeout: this.timeout }
       });
       await this.waitForServerReady();
@@ -1114,15 +1371,15 @@ var PDFParser = class {
     }
     this.logger.info("[PDFParser] Setting up local server...");
     try {
-      const environment = new DoclingEnvironment({
+      this.environment = new DoclingEnvironment({
         logger: this.logger,
         venvPath: this.venvPath,
         port: this.port,
         killExistingProcess: this.killExistingProcess
       });
-      await environment.setup();
+      await this.environment.setup();
       const clientUrl = `http://localhost:${this.port}`;
-      this.client = new import_docling_sdk.Docling({
+      this.client = new import_docling_sdk2.Docling({
         api: {
           baseUrl: clientUrl,
           timeout: this.timeout
@@ -1136,9 +1393,9 @@ var PDFParser = class {
     }
   }
   checkOperatingSystem() {
-    if ((0, import_node_os2.platform)() !== "darwin") {
+    if ((0, import_node_os3.platform)() !== "darwin") {
       throw new Error(
-        "PDFParser is only supported on macOS. Current platform: " + (0, import_node_os2.platform)()
+        "PDFParser is only supported on macOS. Current platform: " + (0, import_node_os3.platform)()
       );
     }
   }
@@ -1222,7 +1479,7 @@ var PDFParser = class {
     });
     await environment.startServer();
     this.client?.destroy();
-    this.client = new import_docling_sdk.Docling({
+    this.client = new import_docling_sdk2.Docling({
       api: {
         baseUrl: `http://localhost:${this.port}`,
         timeout: this.timeout
@@ -1266,6 +1523,12 @@ var PDFParser = class {
         "PDFParser is not initialized. Call init() before using parse()"
       );
     }
+    if (options.pipeline === "vlm" && this.environment && !this.baseUrl) {
+      this.logger.info(
+        "[PDFParser] VLM pipeline requested, ensuring VLM dependencies..."
+      );
+      await this.environment.setupVlmDependencies();
+    }
     const canRecover = !this.baseUrl && this.port !== void 0;
     const maxAttempts = PDF_PARSER.MAX_SERVER_RECOVERY_ATTEMPTS;
     let attempt = 0;
@@ -1275,7 +1538,8 @@ var PDFParser = class {
         const converter = new PDFConverter(
           this.logger,
           this.client,
-          effectiveFallbackEnabled
+          effectiveFallbackEnabled,
+          this.timeout
         );
         return await converter.convert(
           url,
@@ -1323,7 +1587,10 @@ var PDFParser = class {
 };
 // Annotate the CommonJS export names for ESM import in node:
 0 && (module.exports = {
+  DEFAULT_VLM_MODEL,
   ImagePdfFallbackError,
-  PDFParser
+  PDFParser,
+  VLM_MODELS,
+  resolveVlmModel
 });
 //# sourceMappingURL=index.cjs.map