PyPI - docling - Versions diffs - 2.43.0__tar.gz → 2.44.0__tar.gz - Mend

docling 2.43.0tar.gz → 2.44.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (135) hide show

{docling-2.43.0 → docling-2.44.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docling
-Version: 2.43.0
+Version: 2.44.0
 Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
 Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
 License-Expression: MIT
@@ -58,7 +58,7 @@ Requires-Dist: ocrmac<2.0.0,>=1.0.0; sys_platform == "darwin" and extra == "ocrm
 Provides-Extra: vlm
 Requires-Dist: transformers<5.0.0,>=4.46.0; extra == "vlm"
 Requires-Dist: accelerate<2.0.0,>=1.2.1; extra == "vlm"
-Requires-Dist: mlx-vlm<0.2,>=0.1.22; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
+Requires-Dist: mlx-vlm<1.0.0,>=0.3.0; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
 Provides-Extra: rapidocr
 Requires-Dist: rapidocr-onnxruntime<2.0.0,>=1.4.0; python_version < "3.13" and extra == "rapidocr"
 Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"

{docling-2.43.0 → docling-2.44.0}/docling/backend/html_backend.py RENAMED Viewed

@@ -125,8 +125,11 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
         # set the title as furniture, since it is part of the document metadata
         title = self.soup.title
         if title:
+            title_text = title.get_text(separator=" ", strip=True)
+            title_clean = HTMLDocumentBackend._clean_unicode(title_text)
             doc.add_title(
-                text=title.get_text(separator=" ", strip=True),
+                text=title_clean,
+                orig=title_text,
                 content_layer=ContentLayer.FURNITURE,
             )
         # remove scripts/styles
@@ -168,10 +171,12 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
                 return
             for part in text.split("\n"):
                 seg = part.strip()
+                seg_clean = HTMLDocumentBackend._clean_unicode(seg)
                 if seg:
                     doc.add_text(
-                        DocItemLabel.TEXT,
-                        seg,
+                        label=DocItemLabel.TEXT,
+                        text=seg_clean,
+                        orig=seg,
                         parent=self.parents[self.level],
                         content_layer=self.content_layer,
                     )
@@ -203,13 +208,14 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
         self.content_layer = ContentLayer.BODY
         level = int(tag_name[1])
         text = tag.get_text(strip=True, separator=" ")
+        text_clean = HTMLDocumentBackend._clean_unicode(text)
         # the first level is for the title item
         if level == 1:
             for key in self.parents.keys():
                 self.parents[key] = None
             self.level = 0
             self.parents[self.level + 1] = doc.add_title(
-                text, content_layer=self.content_layer
+                text=text_clean, orig=text, content_layer=self.content_layer
             )
         # the other levels need to be lowered by 1 if a title was set
         else:
@@ -234,7 +240,8 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
                 self.level = level
             self.parents[self.level + 1] = doc.add_heading(
                 parent=self.parents[self.level],
-                text=text,
+                text=text_clean,
+                orig=text,
                 level=self.level,
                 content_layer=self.content_layer,
             )
@@ -296,13 +303,15 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
                         if text_part:
                             parts.append(text_part)
                 li_text = re.sub(r"\s+|\n+", " ", "".join(parts)).strip()
+                li_clean = HTMLDocumentBackend._clean_unicode(li_text)
                 # 3) add the list item
                 if li_text:
                     self.parents[self.level + 1] = doc.add_list_item(
-                        text=li_text,
+                        text=li_clean,
                         enumerated=is_ordered,
                         marker=marker,
+                        orig=li_text,
                         parent=list_group,
                         content_layer=self.content_layer,
                     )
@@ -344,11 +353,13 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
         elif tag_name in {"p", "address", "summary"}:
             for part in tag.text.split("\n"):
                 seg = part.strip()
+                seg_clean = HTMLDocumentBackend._clean_unicode(seg)
                 if seg:
                     doc.add_text(
-                        parent=self.parents[self.level],
                         label=DocItemLabel.TEXT,
-                        text=seg,
+                        text=seg_clean,
+                        orig=seg,
+                        parent=self.parents[self.level],
                         content_layer=self.content_layer,
                     )
             for img_tag in tag("img"):
@@ -370,10 +381,12 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
         elif tag_name in {"pre", "code"}:
             # handle monospace code snippets (pre).
             text = tag.get_text(strip=True)
+            text_clean = HTMLDocumentBackend._clean_unicode(text)
             if text:
                 doc.add_code(
                     parent=self.parents[self.level],
-                    text=text,
+                    text=text_clean,
+                    orig=text,
                     content_layer=self.content_layer,
                 )
@@ -402,8 +415,12 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
         caption_item: Optional[TextItem] = None
         if caption:
+            caption_clean = HTMLDocumentBackend._clean_unicode(caption)
             caption_item = doc.add_text(
-                DocItemLabel.CAPTION, text=caption, content_layer=self.content_layer
+                label=DocItemLabel.CAPTION,
+                text=caption_clean,
+                orig=caption,
+                content_layer=self.content_layer,
             )
         doc.add_picture(
@@ -442,6 +459,46 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
         return "".join(parts)
+    @staticmethod
+    def _clean_unicode(text: str) -> str:
+        """Replace typical Unicode characters in HTML for text processing.
+        Several Unicode characters (e.g., non-printable or formatting) are typically
+        found in HTML but are worth replacing to sanitize text and ensure consistency
+        in text processing tasks.
+        Args:
+            text: The original text.
+        Returns:
+            The sanitized text without typical Unicode characters.
+        """
+        replacements = {
+            "\u00a0": " ",  # non-breaking space
+            "\u200b": "",  # zero-width space
+            "\u200c": "",  # zero-width non-joiner
+            "\u200d": "",  # zero-width joiner
+            "\u2010": "-",  # hyphen
+            "\u2011": "-",  # non-breaking hyphen
+            "\u2012": "-",  # dash
+            "\u2013": "-",  # dash
+            "\u2014": "-",  # dash
+            "\u2015": "-",  # horizontal bar
+            "\u2018": "'",  # left single quotation mark
+            "\u2019": "'",  # right single quotation mark
+            "\u201c": '"',  # left double quotation mark
+            "\u201d": '"',  # right double quotation mark
+            "\u2026": "...",  # ellipsis
+            "\u00ad": "",  # soft hyphen
+            "\ufeff": "",  # zero width non-break space
+            "\u202f": " ",  # narrow non-break space
+            "\u2060": "",  # word joiner
+        }
+        for raw, clean in replacements.items():
+            text = text.replace(raw, clean)
+        return text
     @staticmethod
     def _get_cell_spans(cell: Tag) -> tuple[int, int]:
         """Extract colspan and rowspan values from a table cell tag.
@@ -454,9 +511,17 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
             str(cell.get("colspan", "1")),
             str(cell.get("rowspan", "1")),
         )
+        def _extract_num(s: str) -> int:
+            if s and s[0].isnumeric():
+                match = re.search(r"\d+", s)
+                if match:
+                    return int(match.group())
+            return 1
         int_spans: tuple[int, int] = (
-            int(raw_spans[0]) if raw_spans[0].isnumeric() else 1,
-            int(raw_spans[1]) if raw_spans[0].isnumeric() else 1,
+            _extract_num(raw_spans[0]),
+            _extract_num(raw_spans[1]),
         )
         return int_spans

{docling-2.43.0 → docling-2.44.0}/docling/cli/main.py RENAMED Viewed

@@ -262,6 +262,12 @@ def export_documents(
         else:
             _log.warning(f"Document {conv_res.input.file} failed to convert.")
+            if _log.isEnabledFor(logging.INFO):
+                for err in conv_res.errors:
+                    _log.info(
+                        f"  [Failure Detail] Component: {err.component_type}, "
+                        f"Module: {err.module_name}, Message: {err.error_message}"
+                    )
             failure_count += 1
     _log.info(

{docling-2.43.0 → docling-2.44.0}/docling/document_converter.py RENAMED Viewed

@@ -5,7 +5,9 @@ import threading
 import time
 from collections.abc import Iterable, Iterator
 from concurrent.futures import ThreadPoolExecutor
+from datetime import datetime
 from functools import partial
+from io import BytesIO
 from pathlib import Path
 from typing import Dict, List, Optional, Tuple, Type, Union
@@ -275,6 +277,34 @@ class DocumentConverter:
                 "Conversion failed because the provided file has no recognizable format or it wasn't in the list of allowed formats."
             )
+    @validate_call(config=ConfigDict(strict=True))
+    def convert_string(
+        self,
+        content: str,
+        format: InputFormat,
+        name: Optional[str],
+    ) -> ConversionResult:
+        name = name or datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+        if format == InputFormat.MD:
+            if not name.endswith(".md"):
+                name += ".md"
+            buff = BytesIO(content.encode("utf-8"))
+            doc_stream = DocumentStream(name=name, stream=buff)
+            return self.convert(doc_stream)
+        elif format == InputFormat.HTML:
+            if not name.endswith(".html"):
+                name += ".html"
+            buff = BytesIO(content.encode("utf-8"))
+            doc_stream = DocumentStream(name=name, stream=buff)
+            return self.convert(doc_stream)
+        else:
+            raise ValueError(f"format {format} is not supported in `convert_string`")
     def _convert(
         self, conv_input: _DocumentConversionInput, raises_on_error: bool
     ) -> Iterator[ConversionResult]:

{docling-2.43.0 → docling-2.44.0}/docling/models/vlm_models_inline/mlx_model.py RENAMED Viewed

@@ -35,9 +35,9 @@ class HuggingFaceMlxModel(BasePageModel, HuggingFaceModelDownloadMixin):
         if self.enabled:
             try:
-                from mlx_vlm import generate, load  # type: ignore
+                from mlx_vlm import generate, load, stream_generate  # type: ignore
                 from mlx_vlm.prompt_utils import apply_chat_template  # type: ignore
-                from mlx_vlm.utils import load_config, stream_generate  # type: ignore
+                from mlx_vlm.utils import load_config  # type: ignore
             except ImportError:
                 raise ImportError(
                     "mlx-vlm is not installed. Please install it via `pip install mlx-vlm` to use MLX VLM models."

{docling-2.43.0 → docling-2.44.0}/docling.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docling
-Version: 2.43.0
+Version: 2.44.0
 Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
 Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
 License-Expression: MIT
@@ -58,7 +58,7 @@ Requires-Dist: ocrmac<2.0.0,>=1.0.0; sys_platform == "darwin" and extra == "ocrm
 Provides-Extra: vlm
 Requires-Dist: transformers<5.0.0,>=4.46.0; extra == "vlm"
 Requires-Dist: accelerate<2.0.0,>=1.2.1; extra == "vlm"
-Requires-Dist: mlx-vlm<0.2,>=0.1.22; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
+Requires-Dist: mlx-vlm<1.0.0,>=0.3.0; (python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64") and extra == "vlm"
 Provides-Extra: rapidocr
 Requires-Dist: rapidocr-onnxruntime<2.0.0,>=1.4.0; python_version < "3.13" and extra == "rapidocr"
 Requires-Dist: onnxruntime<2.0.0,>=1.7.0; extra == "rapidocr"

{docling-2.43.0 → docling-2.44.0}/docling.egg-info/requires.txt RENAMED Viewed

@@ -47,4 +47,4 @@ transformers<5.0.0,>=4.46.0
 accelerate<2.0.0,>=1.2.1
 [vlm:python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"]
-mlx-vlm<0.2,>=0.1.22
+mlx-vlm<1.0.0,>=0.3.0

{docling-2.43.0 → docling-2.44.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "docling"
-version = "2.43.0"  # DO NOT EDIT, updated automatically
+version = "2.44.0"  # DO NOT EDIT, updated automatically
 description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
 license = "MIT"
 keywords = [
@@ -92,7 +92,7 @@ ocrmac = ['ocrmac (>=1.0.0,<2.0.0) ; sys_platform == "darwin"']
 vlm = [
   'transformers (>=4.46.0,<5.0.0)',
   'accelerate (>=1.2.1,<2.0.0)',
-  'mlx-vlm (>=0.1.22,<0.2) ; python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"',
+  'mlx-vlm (>=0.3.0,<1.0.0) ; python_version >= "3.10" and sys_platform == "darwin" and platform_machine == "arm64"',
 ]
 rapidocr = [
   'rapidocr-onnxruntime (>=1.4.0,<2.0.0) ; python_version < "3.13"',

{docling-2.43.0 → docling-2.44.0}/tests/test_backend_html.py RENAMED Viewed

@@ -100,6 +100,22 @@ def test_ordered_lists():
         assert doc.export_to_markdown() == pair[1], f"Error in case {idx}"
+def test_unicode_characters():
+    raw_html = "<html><body><h1>Hello World!</h1></body></html>".encode()  # noqa: RUF001
+    in_doc = InputDocument(
+        path_or_stream=BytesIO(raw_html),
+        format=InputFormat.HTML,
+        backend=HTMLDocumentBackend,
+        filename="test",
+    )
+    backend = HTMLDocumentBackend(
+        in_doc=in_doc,
+        path_or_stream=BytesIO(raw_html),
+    )
+    doc: DoclingDocument = backend.convert()
+    assert doc.texts[0].text == "Hello World!"
 def get_html_paths():
     # Define the directory you want to search
     directory = Path("./tests/data/html/")

{docling-2.43.0 → docling-2.44.0}/tests/test_backend_markdown.py RENAMED Viewed

@@ -2,10 +2,19 @@ from pathlib import Path
 from docling.backend.md_backend import MarkdownDocumentBackend
 from docling.datamodel.base_models import InputFormat
-from docling.datamodel.document import DoclingDocument, InputDocument
+from docling.datamodel.document import (
+    ConversionResult,
+    DoclingDocument,
+    InputDocument,
+    SectionHeaderItem,
+)
+from docling.document_converter import DocumentConverter
 from tests.verify_utils import CONFID_PREC, COORD_PREC
 from .test_data_gen_flag import GEN_TEST_DATA
+from .verify_utils import verify_document, verify_export
+GENERATE = GEN_TEST_DATA
 def test_convert_valid():
@@ -54,3 +63,45 @@ def test_convert_valid():
             if in_path.stem in yaml_filter:
                 exp_doc = DoclingDocument.load_from_yaml(yaml_gt_path)
                 assert act_doc == exp_doc, f"export to yaml failed on {in_path}"
+def get_md_paths():
+    # Define the directory you want to search
+    directory = Path("./tests/groundtruth/docling_v2")
+    # List all MD files in the directory and its subdirectories
+    md_files = sorted(directory.rglob("*.md"))
+    return md_files
+def get_converter():
+    converter = DocumentConverter(allowed_formats=[InputFormat.MD])
+    return converter
+def test_e2e_md_conversions():
+    md_paths = get_md_paths()
+    converter = get_converter()
+    for md_path in md_paths:
+        # print(f"converting {md_path}")
+        with open(md_path) as fr:
+            true_md = fr.read()
+        conv_result: ConversionResult = converter.convert(md_path)
+        doc: DoclingDocument = conv_result.document
+        pred_md: str = doc.export_to_markdown()
+        assert true_md == pred_md
+        conv_result_: ConversionResult = converter.convert_string(
+            true_md, format=InputFormat.MD
+        )
+        doc_: DoclingDocument = conv_result_.document
+        pred_md_: str = doc_.export_to_markdown()
+        assert true_md == pred_md_

{docling-2.43.0 → docling-2.44.0}/LICENSE RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/README.md RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/__init__.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/backend/__init__.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/backend/abstract_backend.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/backend/asciidoc_backend.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/backend/csv_backend.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/backend/docling_parse_backend.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/backend/docling_parse_v2_backend.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/backend/docling_parse_v4_backend.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/backend/docx/__init__.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/backend/docx/latex/__init__.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/backend/docx/latex/latex_dict.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/backend/docx/latex/omml.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/backend/json/__init__.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/backend/json/docling_json_backend.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/backend/md_backend.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/backend/msexcel_backend.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/backend/mspowerpoint_backend.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/backend/msword_backend.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/backend/noop_backend.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/backend/pdf_backend.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/backend/pypdfium2_backend.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/backend/xml/__init__.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/backend/xml/jats_backend.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/backend/xml/uspto_backend.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/chunking/__init__.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/cli/__init__.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/cli/models.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/cli/tools.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/datamodel/__init__.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/datamodel/accelerator_options.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/datamodel/asr_model_specs.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/datamodel/base_models.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/datamodel/document.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/datamodel/layout_model_specs.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/datamodel/pipeline_options.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/datamodel/pipeline_options_asr_model.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/datamodel/pipeline_options_vlm_model.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/datamodel/settings.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/datamodel/vlm_model_specs.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/exceptions.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/__init__.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/api_vlm_model.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/base_model.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/base_ocr_model.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/code_formula_model.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/document_picture_classifier.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/easyocr_model.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/factories/__init__.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/factories/base_factory.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/factories/ocr_factory.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/factories/picture_description_factory.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/layout_model.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/ocr_mac_model.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/page_assemble_model.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/page_preprocessing_model.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/picture_description_api_model.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/picture_description_base_model.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/picture_description_vlm_model.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/plugins/__init__.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/plugins/defaults.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/rapid_ocr_model.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/readingorder_model.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/table_structure_model.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/tesseract_ocr_cli_model.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/tesseract_ocr_model.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/utils/__init__.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/utils/hf_model_download.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/vlm_models_inline/__init__.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/models/vlm_models_inline/hf_transformers_model.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/pipeline/__init__.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/pipeline/asr_pipeline.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/pipeline/base_pipeline.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/pipeline/simple_pipeline.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/pipeline/standard_pdf_pipeline.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/pipeline/threaded_standard_pdf_pipeline.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/pipeline/vlm_pipeline.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/py.typed RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/utils/__init__.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/utils/accelerator_utils.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/utils/api_image_request.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/utils/export.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/utils/glm_utils.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/utils/layout_postprocessor.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/utils/locks.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/utils/model_downloader.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/utils/ocr_utils.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/utils/orientation.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/utils/profiling.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/utils/utils.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling/utils/visualization.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling.egg-info/entry_points.txt RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/docling.egg-info/top_level.txt RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/setup.cfg RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_asr_pipeline.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_backend_asciidoc.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_backend_csv.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_backend_docling_json.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_backend_docling_parse.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_backend_docling_parse_v2.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_backend_docling_parse_v4.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_backend_jats.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_backend_msexcel.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_backend_msword.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_backend_patent_uspto.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_backend_pdfium.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_backend_pptx.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_backend_webp.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_cli.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_code_formula.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_data_gen_flag.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_document_picture_classifier.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_e2e_conversion.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_e2e_ocr_conversion.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_input_doc.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_interfaces.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_invalid_input.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_legacy_format_transform.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_ocr_utils.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_options.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_settings_load.py RENAMED Viewed

File without changes

{docling-2.43.0 → docling-2.44.0}/tests/test_threaded_pipeline.py RENAMED Viewed

File without changes

docling 2.43.0__tar.gz → 2.44.0__tar.gz

docling 2.43.0tar.gz → 2.44.0tar.gz