PyPI - docling - Versions diffs - 2.57.0__py3-none-any.whl → 2.59.0__py3-none-any.whl - Mend

docling 2.57.0py3-none-any.whl → 2.59.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of docling might be problematic. Click here for more details.

Files changed (35) hide show

docling/backend/abstract_backend.py +24 -3
docling/backend/asciidoc_backend.py +3 -3
docling/backend/docling_parse_v4_backend.py +15 -4
docling/backend/html_backend.py +130 -20
docling/backend/md_backend.py +27 -5
docling/backend/msexcel_backend.py +121 -29
docling/backend/mspowerpoint_backend.py +2 -2
docling/backend/msword_backend.py +18 -18
docling/backend/pdf_backend.py +9 -2
docling/backend/pypdfium2_backend.py +12 -3
docling/cli/main.py +104 -38
docling/datamodel/asr_model_specs.py +408 -6
docling/datamodel/backend_options.py +82 -0
docling/datamodel/base_models.py +19 -2
docling/datamodel/document.py +81 -48
docling/datamodel/pipeline_options_asr_model.py +21 -1
docling/datamodel/pipeline_options_vlm_model.py +1 -0
docling/document_converter.py +37 -45
docling/document_extractor.py +12 -11
docling/models/api_vlm_model.py +5 -3
docling/models/picture_description_vlm_model.py +5 -1
docling/models/readingorder_model.py +6 -7
docling/models/vlm_models_inline/hf_transformers_model.py +13 -3
docling/models/vlm_models_inline/mlx_model.py +9 -3
docling/models/vlm_models_inline/nuextract_transformers_model.py +13 -3
docling/models/vlm_models_inline/vllm_model.py +42 -8
docling/pipeline/asr_pipeline.py +149 -6
docling/utils/api_image_request.py +20 -9
docling/utils/layout_postprocessor.py +23 -24
{docling-2.57.0.dist-info → docling-2.59.0.dist-info}/METADATA +11 -8
{docling-2.57.0.dist-info → docling-2.59.0.dist-info}/RECORD +35 -34
{docling-2.57.0.dist-info → docling-2.59.0.dist-info}/WHEEL +0 -0
{docling-2.57.0.dist-info → docling-2.59.0.dist-info}/entry_points.txt +0 -0
{docling-2.57.0.dist-info → docling-2.59.0.dist-info}/licenses/LICENSE +0 -0
{docling-2.57.0.dist-info → docling-2.59.0.dist-info}/top_level.txt +0 -0

docling/cli/main.py CHANGED Viewed

@@ -32,13 +32,26 @@ from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
 from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
 from docling.datamodel.asr_model_specs import (
     WHISPER_BASE,
+    WHISPER_BASE_MLX,
+    WHISPER_BASE_NATIVE,
     WHISPER_LARGE,
+    WHISPER_LARGE_MLX,
+    WHISPER_LARGE_NATIVE,
     WHISPER_MEDIUM,
+    WHISPER_MEDIUM_MLX,
+    WHISPER_MEDIUM_NATIVE,
     WHISPER_SMALL,
+    WHISPER_SMALL_MLX,
+    WHISPER_SMALL_NATIVE,
     WHISPER_TINY,
+    WHISPER_TINY_MLX,
+    WHISPER_TINY_NATIVE,
     WHISPER_TURBO,
+    WHISPER_TURBO_MLX,
+    WHISPER_TURBO_NATIVE,
     AsrModelType,
 )
+from docling.datamodel.backend_options import PdfBackendOptions
 from docling.datamodel.base_models import (
     ConversionStatus,
     FormatToExtensions,
@@ -391,7 +404,10 @@ def convert(  # noqa: C901
     ] = None,
     pdf_backend: Annotated[
         PdfBackend, typer.Option(..., help="The PDF backend to use.")
-    ] = PdfBackend.DLPARSE_V2,
+    ] = PdfBackend.DLPARSE_V4,
+    pdf_password: Annotated[
+        Optional[str], typer.Option(..., help="Password for protected PDF documents")
+    ] = None,
     table_mode: Annotated[
         TableFormerMode,
         typer.Option(..., help="The mode to use in the table structure model."),
@@ -611,10 +627,14 @@ def convert(  # noqa: C901
             ocr_options.psm = psm
         accelerator_options = AcceleratorOptions(num_threads=num_threads, device=device)
         # pipeline_options: PaginatedPipelineOptions
         pipeline_options: PipelineOptions
         format_options: Dict[InputFormat, FormatOption] = {}
+        pdf_backend_options: Optional[PdfBackendOptions] = PdfBackendOptions(
+            password=pdf_password
+        )
         if pipeline == ProcessingPipeline.STANDARD:
             pipeline_options = PdfPipelineOptions(
@@ -645,8 +665,10 @@ def convert(  # noqa: C901
             backend: Type[PdfDocumentBackend]
             if pdf_backend == PdfBackend.DLPARSE_V1:
                 backend = DoclingParseDocumentBackend
+                pdf_backend_options = None
             elif pdf_backend == PdfBackend.DLPARSE_V2:
                 backend = DoclingParseV2DocumentBackend
+                pdf_backend_options = None
             elif pdf_backend == PdfBackend.DLPARSE_V4:
                 backend = DoclingParseV4DocumentBackend  # type: ignore
             elif pdf_backend == PdfBackend.PYPDFIUM2:
@@ -657,6 +679,7 @@ def convert(  # noqa: C901
             pdf_format_option = PdfFormatOption(
                 pipeline_options=pipeline_options,
                 backend=backend,  # pdf_backend
+                backend_options=pdf_backend_options,
             )
             # METS GBS options
@@ -715,10 +738,15 @@ def convert(  # noqa: C901
                         pipeline_options.vlm_options = SMOLDOCLING_MLX
                     except ImportError:
-                        _log.warning(
-                            "To run SmolDocling faster, please install mlx-vlm:\n"
-                            "pip install mlx-vlm"
-                        )
+                        if sys.version_info < (3, 14):
+                            _log.warning(
+                                "To run SmolDocling faster, please install mlx-vlm:\n"
+                                "pip install mlx-vlm"
+                            )
+                        else:
+                            _log.warning(
+                                "You can run SmolDocling faster with MLX support, but it is unfortunately not yet available on Python 3.14."
+                            )
             elif vlm_model == VlmModelType.GRANITEDOCLING:
                 pipeline_options.vlm_options = GRANITEDOCLING_TRANSFORMERS
@@ -728,10 +756,16 @@ def convert(  # noqa: C901
                         pipeline_options.vlm_options = GRANITEDOCLING_MLX
                     except ImportError:
-                        _log.warning(
-                            "To run GraniteDocling faster, please install mlx-vlm:\n"
-                            "pip install mlx-vlm"
-                        )
+                        if sys.version_info < (3, 14):
+                            _log.warning(
+                                "To run GraniteDocling faster, please install mlx-vlm:\n"
+                                "pip install mlx-vlm"
+                            )
+                        else:
+                            _log.warning(
+                                "You can run GraniteDocling faster with MLX support, but it is unfortunately not yet available on Python 3.14."
+                            )
             elif vlm_model == VlmModelType.SMOLDOCLING_VLLM:
                 pipeline_options.vlm_options = SMOLDOCLING_VLLM
@@ -747,42 +781,74 @@ def convert(  # noqa: C901
                 InputFormat.IMAGE: pdf_format_option,
             }
-        elif pipeline == ProcessingPipeline.ASR:
-            pipeline_options = AsrPipelineOptions(
-                # enable_remote_services=enable_remote_services,
-                # artifacts_path = artifacts_path
-            )
+        # Set ASR options
+        asr_pipeline_options = AsrPipelineOptions(
+            accelerator_options=AcceleratorOptions(
+                device=device,
+                num_threads=num_threads,
+            ),
+            # enable_remote_services=enable_remote_services,
+            # artifacts_path = artifacts_path
+        )
-            if asr_model == AsrModelType.WHISPER_TINY:
-                pipeline_options.asr_options = WHISPER_TINY
-            elif asr_model == AsrModelType.WHISPER_SMALL:
-                pipeline_options.asr_options = WHISPER_SMALL
-            elif asr_model == AsrModelType.WHISPER_MEDIUM:
-                pipeline_options.asr_options = WHISPER_MEDIUM
-            elif asr_model == AsrModelType.WHISPER_BASE:
-                pipeline_options.asr_options = WHISPER_BASE
-            elif asr_model == AsrModelType.WHISPER_LARGE:
-                pipeline_options.asr_options = WHISPER_LARGE
-            elif asr_model == AsrModelType.WHISPER_TURBO:
-                pipeline_options.asr_options = WHISPER_TURBO
-            else:
-                _log.error(f"{asr_model} is not known")
-                raise ValueError(f"{asr_model} is not known")
+        # Auto-selecting models (choose best implementation for hardware)
+        if asr_model == AsrModelType.WHISPER_TINY:
+            asr_pipeline_options.asr_options = WHISPER_TINY
+        elif asr_model == AsrModelType.WHISPER_SMALL:
+            asr_pipeline_options.asr_options = WHISPER_SMALL
+        elif asr_model == AsrModelType.WHISPER_MEDIUM:
+            asr_pipeline_options.asr_options = WHISPER_MEDIUM
+        elif asr_model == AsrModelType.WHISPER_BASE:
+            asr_pipeline_options.asr_options = WHISPER_BASE
+        elif asr_model == AsrModelType.WHISPER_LARGE:
+            asr_pipeline_options.asr_options = WHISPER_LARGE
+        elif asr_model == AsrModelType.WHISPER_TURBO:
+            asr_pipeline_options.asr_options = WHISPER_TURBO
+        # Explicit MLX models (force MLX implementation)
+        elif asr_model == AsrModelType.WHISPER_TINY_MLX:
+            asr_pipeline_options.asr_options = WHISPER_TINY_MLX
+        elif asr_model == AsrModelType.WHISPER_SMALL_MLX:
+            asr_pipeline_options.asr_options = WHISPER_SMALL_MLX
+        elif asr_model == AsrModelType.WHISPER_MEDIUM_MLX:
+            asr_pipeline_options.asr_options = WHISPER_MEDIUM_MLX
+        elif asr_model == AsrModelType.WHISPER_BASE_MLX:
+            asr_pipeline_options.asr_options = WHISPER_BASE_MLX
+        elif asr_model == AsrModelType.WHISPER_LARGE_MLX:
+            asr_pipeline_options.asr_options = WHISPER_LARGE_MLX
+        elif asr_model == AsrModelType.WHISPER_TURBO_MLX:
+            asr_pipeline_options.asr_options = WHISPER_TURBO_MLX
+        # Explicit Native models (force native implementation)
+        elif asr_model == AsrModelType.WHISPER_TINY_NATIVE:
+            asr_pipeline_options.asr_options = WHISPER_TINY_NATIVE
+        elif asr_model == AsrModelType.WHISPER_SMALL_NATIVE:
+            asr_pipeline_options.asr_options = WHISPER_SMALL_NATIVE
+        elif asr_model == AsrModelType.WHISPER_MEDIUM_NATIVE:
+            asr_pipeline_options.asr_options = WHISPER_MEDIUM_NATIVE
+        elif asr_model == AsrModelType.WHISPER_BASE_NATIVE:
+            asr_pipeline_options.asr_options = WHISPER_BASE_NATIVE
+        elif asr_model == AsrModelType.WHISPER_LARGE_NATIVE:
+            asr_pipeline_options.asr_options = WHISPER_LARGE_NATIVE
+        elif asr_model == AsrModelType.WHISPER_TURBO_NATIVE:
+            asr_pipeline_options.asr_options = WHISPER_TURBO_NATIVE
-            _log.info(f"pipeline_options: {pipeline_options}")
+        else:
+            _log.error(f"{asr_model} is not known")
+            raise ValueError(f"{asr_model} is not known")
-            audio_format_option = AudioFormatOption(
-                pipeline_cls=AsrPipeline,
-                pipeline_options=pipeline_options,
-            )
+        _log.debug(f"ASR pipeline_options: {asr_pipeline_options}")
-            format_options = {
-                InputFormat.AUDIO: audio_format_option,
-            }
+        audio_format_option = AudioFormatOption(
+            pipeline_cls=AsrPipeline,
+            pipeline_options=asr_pipeline_options,
+        )
+        format_options[InputFormat.AUDIO] = audio_format_option
+        # Common options for all pipelines
         if artifacts_path is not None:
             pipeline_options.artifacts_path = artifacts_path
-            # audio_pipeline_options.artifacts_path = artifacts_path
+            asr_pipeline_options.artifacts_path = artifacts_path
         doc_converter = DocumentConverter(
             allowed_formats=from_formats,

docling/datamodel/asr_model_specs.py CHANGED Viewed

@@ -10,13 +10,394 @@ from docling.datamodel.pipeline_options_asr_model import (
     # AsrResponseFormat,
     # ApiAsrOptions,
     InferenceAsrFramework,
+    InlineAsrMlxWhisperOptions,
     InlineAsrNativeWhisperOptions,
     TransformersModelType,
 )
 _log = logging.getLogger(__name__)
-WHISPER_TINY = InlineAsrNativeWhisperOptions(
+def _get_whisper_tiny_model():
+    """
+    Get the best Whisper Tiny model for the current hardware.
+    Automatically selects MLX Whisper Tiny for Apple Silicon (MPS) if available,
+    otherwise falls back to native Whisper Tiny.
+    """
+    # Check if MPS is available (Apple Silicon)
+    try:
+        import torch
+        has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
+    except ImportError:
+        has_mps = False
+    # Check if mlx-whisper is available
+    try:
+        import mlx_whisper  # type: ignore
+        has_mlx_whisper = True
+    except ImportError:
+        has_mlx_whisper = False
+    # Use MLX Whisper if both MPS and mlx-whisper are available
+    if has_mps and has_mlx_whisper:
+        return InlineAsrMlxWhisperOptions(
+            repo_id="mlx-community/whisper-tiny-mlx",
+            inference_framework=InferenceAsrFramework.MLX,
+            language="en",
+            task="transcribe",
+            word_timestamps=True,
+            no_speech_threshold=0.6,
+            logprob_threshold=-1.0,
+            compression_ratio_threshold=2.4,
+        )
+    else:
+        return InlineAsrNativeWhisperOptions(
+            repo_id="tiny",
+            inference_framework=InferenceAsrFramework.WHISPER,
+            verbose=True,
+            timestamps=True,
+            word_timestamps=True,
+            temperature=0.0,
+            max_new_tokens=256,
+            max_time_chunk=30.0,
+        )
+# Create the model instance
+WHISPER_TINY = _get_whisper_tiny_model()
+def _get_whisper_small_model():
+    """
+    Get the best Whisper Small model for the current hardware.
+    Automatically selects MLX Whisper Small for Apple Silicon (MPS) if available,
+    otherwise falls back to native Whisper Small.
+    """
+    # Check if MPS is available (Apple Silicon)
+    try:
+        import torch
+        has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
+    except ImportError:
+        has_mps = False
+    # Check if mlx-whisper is available
+    try:
+        import mlx_whisper  # type: ignore
+        has_mlx_whisper = True
+    except ImportError:
+        has_mlx_whisper = False
+    # Use MLX Whisper if both MPS and mlx-whisper are available
+    if has_mps and has_mlx_whisper:
+        return InlineAsrMlxWhisperOptions(
+            repo_id="mlx-community/whisper-small-mlx",
+            inference_framework=InferenceAsrFramework.MLX,
+            language="en",
+            task="transcribe",
+            word_timestamps=True,
+            no_speech_threshold=0.6,
+            logprob_threshold=-1.0,
+            compression_ratio_threshold=2.4,
+        )
+    else:
+        return InlineAsrNativeWhisperOptions(
+            repo_id="small",
+            inference_framework=InferenceAsrFramework.WHISPER,
+            verbose=True,
+            timestamps=True,
+            word_timestamps=True,
+            temperature=0.0,
+            max_new_tokens=256,
+            max_time_chunk=30.0,
+        )
+# Create the model instance
+WHISPER_SMALL = _get_whisper_small_model()
+def _get_whisper_medium_model():
+    """
+    Get the best Whisper Medium model for the current hardware.
+    Automatically selects MLX Whisper Medium for Apple Silicon (MPS) if available,
+    otherwise falls back to native Whisper Medium.
+    """
+    # Check if MPS is available (Apple Silicon)
+    try:
+        import torch
+        has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
+    except ImportError:
+        has_mps = False
+    # Check if mlx-whisper is available
+    try:
+        import mlx_whisper  # type: ignore
+        has_mlx_whisper = True
+    except ImportError:
+        has_mlx_whisper = False
+    # Use MLX Whisper if both MPS and mlx-whisper are available
+    if has_mps and has_mlx_whisper:
+        return InlineAsrMlxWhisperOptions(
+            repo_id="mlx-community/whisper-medium-mlx-8bit",
+            inference_framework=InferenceAsrFramework.MLX,
+            language="en",
+            task="transcribe",
+            word_timestamps=True,
+            no_speech_threshold=0.6,
+            logprob_threshold=-1.0,
+            compression_ratio_threshold=2.4,
+        )
+    else:
+        return InlineAsrNativeWhisperOptions(
+            repo_id="medium",
+            inference_framework=InferenceAsrFramework.WHISPER,
+            verbose=True,
+            timestamps=True,
+            word_timestamps=True,
+            temperature=0.0,
+            max_new_tokens=256,
+            max_time_chunk=30.0,
+        )
+# Create the model instance
+WHISPER_MEDIUM = _get_whisper_medium_model()
+def _get_whisper_base_model():
+    """
+    Get the best Whisper Base model for the current hardware.
+    Automatically selects MLX Whisper Base for Apple Silicon (MPS) if available,
+    otherwise falls back to native Whisper Base.
+    """
+    # Check if MPS is available (Apple Silicon)
+    try:
+        import torch
+        has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
+    except ImportError:
+        has_mps = False
+    # Check if mlx-whisper is available
+    try:
+        import mlx_whisper  # type: ignore
+        has_mlx_whisper = True
+    except ImportError:
+        has_mlx_whisper = False
+    # Use MLX Whisper if both MPS and mlx-whisper are available
+    if has_mps and has_mlx_whisper:
+        return InlineAsrMlxWhisperOptions(
+            repo_id="mlx-community/whisper-base-mlx",
+            inference_framework=InferenceAsrFramework.MLX,
+            language="en",
+            task="transcribe",
+            word_timestamps=True,
+            no_speech_threshold=0.6,
+            logprob_threshold=-1.0,
+            compression_ratio_threshold=2.4,
+        )
+    else:
+        return InlineAsrNativeWhisperOptions(
+            repo_id="base",
+            inference_framework=InferenceAsrFramework.WHISPER,
+            verbose=True,
+            timestamps=True,
+            word_timestamps=True,
+            temperature=0.0,
+            max_new_tokens=256,
+            max_time_chunk=30.0,
+        )
+# Create the model instance
+WHISPER_BASE = _get_whisper_base_model()
+def _get_whisper_large_model():
+    """
+    Get the best Whisper Large model for the current hardware.
+    Automatically selects MLX Whisper Large for Apple Silicon (MPS) if available,
+    otherwise falls back to native Whisper Large.
+    """
+    # Check if MPS is available (Apple Silicon)
+    try:
+        import torch
+        has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
+    except ImportError:
+        has_mps = False
+    # Check if mlx-whisper is available
+    try:
+        import mlx_whisper  # type: ignore
+        has_mlx_whisper = True
+    except ImportError:
+        has_mlx_whisper = False
+    # Use MLX Whisper if both MPS and mlx-whisper are available
+    if has_mps and has_mlx_whisper:
+        return InlineAsrMlxWhisperOptions(
+            repo_id="mlx-community/whisper-large-mlx-8bit",
+            inference_framework=InferenceAsrFramework.MLX,
+            language="en",
+            task="transcribe",
+            word_timestamps=True,
+            no_speech_threshold=0.6,
+            logprob_threshold=-1.0,
+            compression_ratio_threshold=2.4,
+        )
+    else:
+        return InlineAsrNativeWhisperOptions(
+            repo_id="large",
+            inference_framework=InferenceAsrFramework.WHISPER,
+            verbose=True,
+            timestamps=True,
+            word_timestamps=True,
+            temperature=0.0,
+            max_new_tokens=256,
+            max_time_chunk=30.0,
+        )
+# Create the model instance
+WHISPER_LARGE = _get_whisper_large_model()
+def _get_whisper_turbo_model():
+    """
+    Get the best Whisper Turbo model for the current hardware.
+    Automatically selects MLX Whisper Turbo for Apple Silicon (MPS) if available,
+    otherwise falls back to native Whisper Turbo.
+    """
+    # Check if MPS is available (Apple Silicon)
+    try:
+        import torch
+        has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
+    except ImportError:
+        has_mps = False
+    # Check if mlx-whisper is available
+    try:
+        import mlx_whisper  # type: ignore
+        has_mlx_whisper = True
+    except ImportError:
+        has_mlx_whisper = False
+    # Use MLX Whisper if both MPS and mlx-whisper are available
+    if has_mps and has_mlx_whisper:
+        return InlineAsrMlxWhisperOptions(
+            repo_id="mlx-community/whisper-turbo",
+            inference_framework=InferenceAsrFramework.MLX,
+            language="en",
+            task="transcribe",
+            word_timestamps=True,
+            no_speech_threshold=0.6,
+            logprob_threshold=-1.0,
+            compression_ratio_threshold=2.4,
+        )
+    else:
+        return InlineAsrNativeWhisperOptions(
+            repo_id="turbo",
+            inference_framework=InferenceAsrFramework.WHISPER,
+            verbose=True,
+            timestamps=True,
+            word_timestamps=True,
+            temperature=0.0,
+            max_new_tokens=256,
+            max_time_chunk=30.0,
+        )
+# Create the model instance
+WHISPER_TURBO = _get_whisper_turbo_model()
+# Explicit MLX Whisper model options for users who want to force MLX usage
+WHISPER_TINY_MLX = InlineAsrMlxWhisperOptions(
+    repo_id="mlx-community/whisper-tiny-mlx",
+    inference_framework=InferenceAsrFramework.MLX,
+    language="en",
+    task="transcribe",
+    word_timestamps=True,
+    no_speech_threshold=0.6,
+    logprob_threshold=-1.0,
+    compression_ratio_threshold=2.4,
+)
+WHISPER_SMALL_MLX = InlineAsrMlxWhisperOptions(
+    repo_id="mlx-community/whisper-small-mlx",
+    inference_framework=InferenceAsrFramework.MLX,
+    language="en",
+    task="transcribe",
+    word_timestamps=True,
+    no_speech_threshold=0.6,
+    logprob_threshold=-1.0,
+    compression_ratio_threshold=2.4,
+)
+WHISPER_MEDIUM_MLX = InlineAsrMlxWhisperOptions(
+    repo_id="mlx-community/whisper-medium-mlx-8bit",
+    inference_framework=InferenceAsrFramework.MLX,
+    language="en",
+    task="transcribe",
+    word_timestamps=True,
+    no_speech_threshold=0.6,
+    logprob_threshold=-1.0,
+    compression_ratio_threshold=2.4,
+)
+WHISPER_BASE_MLX = InlineAsrMlxWhisperOptions(
+    repo_id="mlx-community/whisper-base-mlx",
+    inference_framework=InferenceAsrFramework.MLX,
+    language="en",
+    task="transcribe",
+    word_timestamps=True,
+    no_speech_threshold=0.6,
+    logprob_threshold=-1.0,
+    compression_ratio_threshold=2.4,
+)
+WHISPER_LARGE_MLX = InlineAsrMlxWhisperOptions(
+    repo_id="mlx-community/whisper-large-mlx-8bit",
+    inference_framework=InferenceAsrFramework.MLX,
+    language="en",
+    task="transcribe",
+    word_timestamps=True,
+    no_speech_threshold=0.6,
+    logprob_threshold=-1.0,
+    compression_ratio_threshold=2.4,
+)
+WHISPER_TURBO_MLX = InlineAsrMlxWhisperOptions(
+    repo_id="mlx-community/whisper-turbo",
+    inference_framework=InferenceAsrFramework.MLX,
+    language="en",
+    task="transcribe",
+    word_timestamps=True,
+    no_speech_threshold=0.6,
+    logprob_threshold=-1.0,
+    compression_ratio_threshold=2.4,
+)
+# Explicit Native Whisper model options for users who want to force native usage
+WHISPER_TINY_NATIVE = InlineAsrNativeWhisperOptions(
     repo_id="tiny",
     inference_framework=InferenceAsrFramework.WHISPER,
     verbose=True,
@@ -27,7 +408,7 @@ WHISPER_TINY = InlineAsrNativeWhisperOptions(
     max_time_chunk=30.0,
 )
-WHISPER_SMALL = InlineAsrNativeWhisperOptions(
+WHISPER_SMALL_NATIVE = InlineAsrNativeWhisperOptions(
     repo_id="small",
     inference_framework=InferenceAsrFramework.WHISPER,
     verbose=True,
@@ -38,7 +419,7 @@ WHISPER_SMALL = InlineAsrNativeWhisperOptions(
     max_time_chunk=30.0,
 )
-WHISPER_MEDIUM = InlineAsrNativeWhisperOptions(
+WHISPER_MEDIUM_NATIVE = InlineAsrNativeWhisperOptions(
     repo_id="medium",
     inference_framework=InferenceAsrFramework.WHISPER,
     verbose=True,
@@ -49,7 +430,7 @@ WHISPER_MEDIUM = InlineAsrNativeWhisperOptions(
     max_time_chunk=30.0,
 )
-WHISPER_BASE = InlineAsrNativeWhisperOptions(
+WHISPER_BASE_NATIVE = InlineAsrNativeWhisperOptions(
     repo_id="base",
     inference_framework=InferenceAsrFramework.WHISPER,
     verbose=True,
@@ -60,7 +441,7 @@ WHISPER_BASE = InlineAsrNativeWhisperOptions(
     max_time_chunk=30.0,
 )
-WHISPER_LARGE = InlineAsrNativeWhisperOptions(
+WHISPER_LARGE_NATIVE = InlineAsrNativeWhisperOptions(
     repo_id="large",
     inference_framework=InferenceAsrFramework.WHISPER,
     verbose=True,
@@ -71,7 +452,7 @@ WHISPER_LARGE = InlineAsrNativeWhisperOptions(
     max_time_chunk=30.0,
 )
-WHISPER_TURBO = InlineAsrNativeWhisperOptions(
+WHISPER_TURBO_NATIVE = InlineAsrNativeWhisperOptions(
     repo_id="turbo",
     inference_framework=InferenceAsrFramework.WHISPER,
     verbose=True,
@@ -82,11 +463,32 @@ WHISPER_TURBO = InlineAsrNativeWhisperOptions(
     max_time_chunk=30.0,
 )
+# Note: The main WHISPER_* models (WHISPER_TURBO, WHISPER_BASE, etc.) automatically
+# select the best implementation (MLX on Apple Silicon, Native elsewhere).
+# Use the explicit _MLX or _NATIVE variants if you need to force a specific implementation.
 class AsrModelType(str, Enum):
+    # Auto-selecting models (choose best implementation for hardware)
     WHISPER_TINY = "whisper_tiny"
     WHISPER_SMALL = "whisper_small"
     WHISPER_MEDIUM = "whisper_medium"
     WHISPER_BASE = "whisper_base"
     WHISPER_LARGE = "whisper_large"
     WHISPER_TURBO = "whisper_turbo"
+    # Explicit MLX models (force MLX implementation)
+    WHISPER_TINY_MLX = "whisper_tiny_mlx"
+    WHISPER_SMALL_MLX = "whisper_small_mlx"
+    WHISPER_MEDIUM_MLX = "whisper_medium_mlx"
+    WHISPER_BASE_MLX = "whisper_base_mlx"
+    WHISPER_LARGE_MLX = "whisper_large_mlx"
+    WHISPER_TURBO_MLX = "whisper_turbo_mlx"
+    # Explicit Native models (force native implementation)
+    WHISPER_TINY_NATIVE = "whisper_tiny_native"
+    WHISPER_SMALL_NATIVE = "whisper_small_native"
+    WHISPER_MEDIUM_NATIVE = "whisper_medium_native"
+    WHISPER_BASE_NATIVE = "whisper_base_native"
+    WHISPER_LARGE_NATIVE = "whisper_large_native"
+    WHISPER_TURBO_NATIVE = "whisper_turbo_native"

docling 2.57.0__py3-none-any.whl → 2.59.0__py3-none-any.whl

Potentially problematic release.

docling 2.57.0py3-none-any.whl → 2.59.0py3-none-any.whl