docling 2.49.0__tar.gz → 2.50.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. {docling-2.49.0 → docling-2.50.0}/PKG-INFO +2 -2
  2. {docling-2.49.0 → docling-2.50.0}/docling/backend/html_backend.py +3 -2
  3. {docling-2.49.0 → docling-2.50.0}/docling/datamodel/pipeline_options.py +1 -1
  4. {docling-2.49.0 → docling-2.50.0}/docling/models/layout_model.py +3 -3
  5. {docling-2.49.0 → docling-2.50.0}/docling/models/page_preprocessing_model.py +1 -1
  6. {docling-2.49.0 → docling-2.50.0}/docling/models/table_structure_model.py +1 -1
  7. {docling-2.49.0 → docling-2.50.0}/docling/utils/model_downloader.py +2 -1
  8. {docling-2.49.0 → docling-2.50.0}/docling.egg-info/PKG-INFO +2 -2
  9. {docling-2.49.0 → docling-2.50.0}/docling.egg-info/requires.txt +1 -1
  10. {docling-2.49.0 → docling-2.50.0}/pyproject.toml +2 -2
  11. {docling-2.49.0 → docling-2.50.0}/tests/test_e2e_conversion.py +9 -1
  12. {docling-2.49.0 → docling-2.50.0}/LICENSE +0 -0
  13. {docling-2.49.0 → docling-2.50.0}/README.md +0 -0
  14. {docling-2.49.0 → docling-2.50.0}/docling/__init__.py +0 -0
  15. {docling-2.49.0 → docling-2.50.0}/docling/backend/__init__.py +0 -0
  16. {docling-2.49.0 → docling-2.50.0}/docling/backend/abstract_backend.py +0 -0
  17. {docling-2.49.0 → docling-2.50.0}/docling/backend/asciidoc_backend.py +0 -0
  18. {docling-2.49.0 → docling-2.50.0}/docling/backend/csv_backend.py +0 -0
  19. {docling-2.49.0 → docling-2.50.0}/docling/backend/docling_parse_backend.py +0 -0
  20. {docling-2.49.0 → docling-2.50.0}/docling/backend/docling_parse_v2_backend.py +0 -0
  21. {docling-2.49.0 → docling-2.50.0}/docling/backend/docling_parse_v4_backend.py +0 -0
  22. {docling-2.49.0 → docling-2.50.0}/docling/backend/docx/__init__.py +0 -0
  23. {docling-2.49.0 → docling-2.50.0}/docling/backend/docx/latex/__init__.py +0 -0
  24. {docling-2.49.0 → docling-2.50.0}/docling/backend/docx/latex/latex_dict.py +0 -0
  25. {docling-2.49.0 → docling-2.50.0}/docling/backend/docx/latex/omml.py +0 -0
  26. {docling-2.49.0 → docling-2.50.0}/docling/backend/json/__init__.py +0 -0
  27. {docling-2.49.0 → docling-2.50.0}/docling/backend/json/docling_json_backend.py +0 -0
  28. {docling-2.49.0 → docling-2.50.0}/docling/backend/md_backend.py +0 -0
  29. {docling-2.49.0 → docling-2.50.0}/docling/backend/mets_gbs_backend.py +0 -0
  30. {docling-2.49.0 → docling-2.50.0}/docling/backend/msexcel_backend.py +0 -0
  31. {docling-2.49.0 → docling-2.50.0}/docling/backend/mspowerpoint_backend.py +0 -0
  32. {docling-2.49.0 → docling-2.50.0}/docling/backend/msword_backend.py +0 -0
  33. {docling-2.49.0 → docling-2.50.0}/docling/backend/noop_backend.py +0 -0
  34. {docling-2.49.0 → docling-2.50.0}/docling/backend/pdf_backend.py +0 -0
  35. {docling-2.49.0 → docling-2.50.0}/docling/backend/pypdfium2_backend.py +0 -0
  36. {docling-2.49.0 → docling-2.50.0}/docling/backend/xml/__init__.py +0 -0
  37. {docling-2.49.0 → docling-2.50.0}/docling/backend/xml/jats_backend.py +0 -0
  38. {docling-2.49.0 → docling-2.50.0}/docling/backend/xml/uspto_backend.py +0 -0
  39. {docling-2.49.0 → docling-2.50.0}/docling/chunking/__init__.py +0 -0
  40. {docling-2.49.0 → docling-2.50.0}/docling/cli/__init__.py +0 -0
  41. {docling-2.49.0 → docling-2.50.0}/docling/cli/main.py +0 -0
  42. {docling-2.49.0 → docling-2.50.0}/docling/cli/models.py +0 -0
  43. {docling-2.49.0 → docling-2.50.0}/docling/cli/tools.py +0 -0
  44. {docling-2.49.0 → docling-2.50.0}/docling/datamodel/__init__.py +0 -0
  45. {docling-2.49.0 → docling-2.50.0}/docling/datamodel/accelerator_options.py +0 -0
  46. {docling-2.49.0 → docling-2.50.0}/docling/datamodel/asr_model_specs.py +0 -0
  47. {docling-2.49.0 → docling-2.50.0}/docling/datamodel/base_models.py +0 -0
  48. {docling-2.49.0 → docling-2.50.0}/docling/datamodel/document.py +0 -0
  49. {docling-2.49.0 → docling-2.50.0}/docling/datamodel/extraction.py +0 -0
  50. {docling-2.49.0 → docling-2.50.0}/docling/datamodel/layout_model_specs.py +0 -0
  51. {docling-2.49.0 → docling-2.50.0}/docling/datamodel/pipeline_options_asr_model.py +0 -0
  52. {docling-2.49.0 → docling-2.50.0}/docling/datamodel/pipeline_options_vlm_model.py +0 -0
  53. {docling-2.49.0 → docling-2.50.0}/docling/datamodel/settings.py +0 -0
  54. {docling-2.49.0 → docling-2.50.0}/docling/datamodel/vlm_model_specs.py +0 -0
  55. {docling-2.49.0 → docling-2.50.0}/docling/document_converter.py +0 -0
  56. {docling-2.49.0 → docling-2.50.0}/docling/document_extractor.py +0 -0
  57. {docling-2.49.0 → docling-2.50.0}/docling/exceptions.py +0 -0
  58. {docling-2.49.0 → docling-2.50.0}/docling/models/__init__.py +0 -0
  59. {docling-2.49.0 → docling-2.50.0}/docling/models/api_vlm_model.py +0 -0
  60. {docling-2.49.0 → docling-2.50.0}/docling/models/base_model.py +0 -0
  61. {docling-2.49.0 → docling-2.50.0}/docling/models/base_ocr_model.py +0 -0
  62. {docling-2.49.0 → docling-2.50.0}/docling/models/code_formula_model.py +0 -0
  63. {docling-2.49.0 → docling-2.50.0}/docling/models/document_picture_classifier.py +0 -0
  64. {docling-2.49.0 → docling-2.50.0}/docling/models/easyocr_model.py +0 -0
  65. {docling-2.49.0 → docling-2.50.0}/docling/models/factories/__init__.py +0 -0
  66. {docling-2.49.0 → docling-2.50.0}/docling/models/factories/base_factory.py +0 -0
  67. {docling-2.49.0 → docling-2.50.0}/docling/models/factories/ocr_factory.py +0 -0
  68. {docling-2.49.0 → docling-2.50.0}/docling/models/factories/picture_description_factory.py +0 -0
  69. {docling-2.49.0 → docling-2.50.0}/docling/models/ocr_mac_model.py +0 -0
  70. {docling-2.49.0 → docling-2.50.0}/docling/models/page_assemble_model.py +0 -0
  71. {docling-2.49.0 → docling-2.50.0}/docling/models/picture_description_api_model.py +0 -0
  72. {docling-2.49.0 → docling-2.50.0}/docling/models/picture_description_base_model.py +0 -0
  73. {docling-2.49.0 → docling-2.50.0}/docling/models/picture_description_vlm_model.py +0 -0
  74. {docling-2.49.0 → docling-2.50.0}/docling/models/plugins/__init__.py +0 -0
  75. {docling-2.49.0 → docling-2.50.0}/docling/models/plugins/defaults.py +0 -0
  76. {docling-2.49.0 → docling-2.50.0}/docling/models/rapid_ocr_model.py +0 -0
  77. {docling-2.49.0 → docling-2.50.0}/docling/models/readingorder_model.py +0 -0
  78. {docling-2.49.0 → docling-2.50.0}/docling/models/tesseract_ocr_cli_model.py +0 -0
  79. {docling-2.49.0 → docling-2.50.0}/docling/models/tesseract_ocr_model.py +0 -0
  80. {docling-2.49.0 → docling-2.50.0}/docling/models/utils/__init__.py +0 -0
  81. {docling-2.49.0 → docling-2.50.0}/docling/models/utils/hf_model_download.py +0 -0
  82. {docling-2.49.0 → docling-2.50.0}/docling/models/vlm_models_inline/__init__.py +0 -0
  83. {docling-2.49.0 → docling-2.50.0}/docling/models/vlm_models_inline/hf_transformers_model.py +0 -0
  84. {docling-2.49.0 → docling-2.50.0}/docling/models/vlm_models_inline/mlx_model.py +0 -0
  85. {docling-2.49.0 → docling-2.50.0}/docling/models/vlm_models_inline/nuextract_transformers_model.py +0 -0
  86. {docling-2.49.0 → docling-2.50.0}/docling/models/vlm_models_inline/vllm_model.py +0 -0
  87. {docling-2.49.0 → docling-2.50.0}/docling/pipeline/__init__.py +0 -0
  88. {docling-2.49.0 → docling-2.50.0}/docling/pipeline/asr_pipeline.py +0 -0
  89. {docling-2.49.0 → docling-2.50.0}/docling/pipeline/base_extraction_pipeline.py +0 -0
  90. {docling-2.49.0 → docling-2.50.0}/docling/pipeline/base_pipeline.py +0 -0
  91. {docling-2.49.0 → docling-2.50.0}/docling/pipeline/extraction_vlm_pipeline.py +0 -0
  92. {docling-2.49.0 → docling-2.50.0}/docling/pipeline/simple_pipeline.py +0 -0
  93. {docling-2.49.0 → docling-2.50.0}/docling/pipeline/standard_pdf_pipeline.py +0 -0
  94. {docling-2.49.0 → docling-2.50.0}/docling/pipeline/threaded_standard_pdf_pipeline.py +0 -0
  95. {docling-2.49.0 → docling-2.50.0}/docling/pipeline/vlm_pipeline.py +0 -0
  96. {docling-2.49.0 → docling-2.50.0}/docling/py.typed +0 -0
  97. {docling-2.49.0 → docling-2.50.0}/docling/utils/__init__.py +0 -0
  98. {docling-2.49.0 → docling-2.50.0}/docling/utils/accelerator_utils.py +0 -0
  99. {docling-2.49.0 → docling-2.50.0}/docling/utils/api_image_request.py +0 -0
  100. {docling-2.49.0 → docling-2.50.0}/docling/utils/export.py +0 -0
  101. {docling-2.49.0 → docling-2.50.0}/docling/utils/glm_utils.py +0 -0
  102. {docling-2.49.0 → docling-2.50.0}/docling/utils/layout_postprocessor.py +0 -0
  103. {docling-2.49.0 → docling-2.50.0}/docling/utils/locks.py +0 -0
  104. {docling-2.49.0 → docling-2.50.0}/docling/utils/ocr_utils.py +0 -0
  105. {docling-2.49.0 → docling-2.50.0}/docling/utils/orientation.py +0 -0
  106. {docling-2.49.0 → docling-2.50.0}/docling/utils/profiling.py +0 -0
  107. {docling-2.49.0 → docling-2.50.0}/docling/utils/utils.py +0 -0
  108. {docling-2.49.0 → docling-2.50.0}/docling/utils/visualization.py +0 -0
  109. {docling-2.49.0 → docling-2.50.0}/docling.egg-info/SOURCES.txt +0 -0
  110. {docling-2.49.0 → docling-2.50.0}/docling.egg-info/dependency_links.txt +0 -0
  111. {docling-2.49.0 → docling-2.50.0}/docling.egg-info/entry_points.txt +0 -0
  112. {docling-2.49.0 → docling-2.50.0}/docling.egg-info/top_level.txt +0 -0
  113. {docling-2.49.0 → docling-2.50.0}/setup.cfg +0 -0
  114. {docling-2.49.0 → docling-2.50.0}/tests/test_asr_pipeline.py +0 -0
  115. {docling-2.49.0 → docling-2.50.0}/tests/test_backend_asciidoc.py +0 -0
  116. {docling-2.49.0 → docling-2.50.0}/tests/test_backend_csv.py +0 -0
  117. {docling-2.49.0 → docling-2.50.0}/tests/test_backend_docling_json.py +0 -0
  118. {docling-2.49.0 → docling-2.50.0}/tests/test_backend_docling_parse.py +0 -0
  119. {docling-2.49.0 → docling-2.50.0}/tests/test_backend_docling_parse_v2.py +0 -0
  120. {docling-2.49.0 → docling-2.50.0}/tests/test_backend_docling_parse_v4.py +0 -0
  121. {docling-2.49.0 → docling-2.50.0}/tests/test_backend_html.py +0 -0
  122. {docling-2.49.0 → docling-2.50.0}/tests/test_backend_jats.py +0 -0
  123. {docling-2.49.0 → docling-2.50.0}/tests/test_backend_markdown.py +0 -0
  124. {docling-2.49.0 → docling-2.50.0}/tests/test_backend_mets_gbs.py +0 -0
  125. {docling-2.49.0 → docling-2.50.0}/tests/test_backend_msexcel.py +0 -0
  126. {docling-2.49.0 → docling-2.50.0}/tests/test_backend_msword.py +0 -0
  127. {docling-2.49.0 → docling-2.50.0}/tests/test_backend_patent_uspto.py +0 -0
  128. {docling-2.49.0 → docling-2.50.0}/tests/test_backend_pdfium.py +0 -0
  129. {docling-2.49.0 → docling-2.50.0}/tests/test_backend_pptx.py +0 -0
  130. {docling-2.49.0 → docling-2.50.0}/tests/test_backend_webp.py +0 -0
  131. {docling-2.49.0 → docling-2.50.0}/tests/test_cli.py +0 -0
  132. {docling-2.49.0 → docling-2.50.0}/tests/test_code_formula.py +0 -0
  133. {docling-2.49.0 → docling-2.50.0}/tests/test_data_gen_flag.py +0 -0
  134. {docling-2.49.0 → docling-2.50.0}/tests/test_document_picture_classifier.py +0 -0
  135. {docling-2.49.0 → docling-2.50.0}/tests/test_e2e_ocr_conversion.py +0 -0
  136. {docling-2.49.0 → docling-2.50.0}/tests/test_extraction.py +0 -0
  137. {docling-2.49.0 → docling-2.50.0}/tests/test_input_doc.py +0 -0
  138. {docling-2.49.0 → docling-2.50.0}/tests/test_interfaces.py +0 -0
  139. {docling-2.49.0 → docling-2.50.0}/tests/test_invalid_input.py +0 -0
  140. {docling-2.49.0 → docling-2.50.0}/tests/test_legacy_format_transform.py +0 -0
  141. {docling-2.49.0 → docling-2.50.0}/tests/test_ocr_utils.py +0 -0
  142. {docling-2.49.0 → docling-2.50.0}/tests/test_options.py +0 -0
  143. {docling-2.49.0 → docling-2.50.0}/tests/test_settings_load.py +0 -0
  144. {docling-2.49.0 → docling-2.50.0}/tests/test_threaded_pipeline.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling
3
- Version: 2.49.0
3
+ Version: 2.50.0
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
5
  Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  License-Expression: MIT
@@ -28,7 +28,7 @@ License-File: LICENSE
28
28
  Requires-Dist: pydantic<3.0.0,>=2.0.0
29
29
  Requires-Dist: docling-core[chunking]<3.0.0,>=2.42.0
30
30
  Requires-Dist: docling-parse<5.0.0,>=4.2.2
31
- Requires-Dist: docling-ibm-models<4,>=3.9.0
31
+ Requires-Dist: docling-ibm-models<4,>=3.9.1
32
32
  Requires-Dist: filetype<2.0.0,>=1.2.0
33
33
  Requires-Dist: pypdfium2!=4.30.1,<5.0.0,>=4.30.0
34
34
  Requires-Dist: pydantic-settings<3.0.0,>=2.3.0
@@ -467,13 +467,14 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
467
467
 
468
468
  @contextmanager
469
469
  def _use_hyperlink(self, tag: Tag):
470
+ old_hyperlink: Union[AnyUrl, Path, None] = None
471
+ new_hyperlink: Union[AnyUrl, Path, None] = None
470
472
  this_href = tag.get("href")
471
473
  if this_href is None:
472
474
  yield None
473
475
  else:
474
476
  if isinstance(this_href, str) and this_href:
475
- old_hyperlink: Union[AnyUrl, Path, None] = self.hyperlink
476
- new_hyperlink: Union[AnyUrl, Path, None] = None
477
+ old_hyperlink = self.hyperlink
477
478
  if self.original_url is not None:
478
479
  this_href = urljoin(str(self.original_url), str(this_href))
479
480
  # ugly fix for relative links since pydantic does not support them.
@@ -283,10 +283,10 @@ class LayoutOptions(BaseModel):
283
283
  keep_empty_clusters: bool = (
284
284
  False # Whether to keep clusters that contain no text cells
285
285
  )
286
+ model_spec: LayoutModelConfig = DOCLING_LAYOUT_HERON
286
287
  skip_cell_assignment: bool = (
287
288
  False # Skip cell-to-cluster assignment for VLM-only processing
288
289
  )
289
- model_spec: LayoutModelConfig = DOCLING_LAYOUT_V2
290
290
 
291
291
 
292
292
  class AsrPipelineOptions(PipelineOptions):
@@ -91,7 +91,7 @@ class LayoutModel(BasePageModel):
91
91
  local_dir: Optional[Path] = None,
92
92
  force: bool = False,
93
93
  progress: bool = False,
94
- layout_model_config: LayoutModelConfig = DOCLING_LAYOUT_V2,
94
+ layout_model_config: LayoutModelConfig = LayoutOptions().model_spec, # use default
95
95
  ) -> Path:
96
96
  return download_hf_model(
97
97
  repo_id=layout_model_config.repo_id,
@@ -122,8 +122,8 @@ class LayoutModel(BasePageModel):
122
122
  left_clusters = [c for c in clusters if c.label not in exclude_labels]
123
123
  right_clusters = [c for c in clusters if c.label in exclude_labels]
124
124
  # Create a deep copy of the original image for both sides
125
- left_image = copy.deepcopy(page.image)
126
- right_image = copy.deepcopy(page.image)
125
+ left_image = page.image.copy()
126
+ right_image = page.image.copy()
127
127
 
128
128
  # Draw clusters on both images
129
129
  draw_clusters(left_image, left_clusters, scale_x, scale_y)
@@ -90,7 +90,7 @@ class PagePreprocessingModel(BasePageModel):
90
90
 
91
91
  # DEBUG code:
92
92
  def draw_text_boxes(image, cells, show: bool = False):
93
- draw = ImageDraw.Draw(image)
93
+ draw = ImageDraw.Draw(image.copy())
94
94
  for c in cells:
95
95
  x0, y0, x1, y1 = (
96
96
  c.to_bounding_box().l,
@@ -94,7 +94,7 @@ class TableStructureModel(BasePageModel):
94
94
  ) -> Path:
95
95
  return download_hf_model(
96
96
  repo_id="ds4sd/docling-models",
97
- revision="v2.2.0",
97
+ revision="v2.3.0",
98
98
  local_dir=local_dir,
99
99
  force=force,
100
100
  progress=progress,
@@ -4,6 +4,7 @@ from typing import Optional
4
4
 
5
5
  from docling.datamodel.layout_model_specs import DOCLING_LAYOUT_V2
6
6
  from docling.datamodel.pipeline_options import (
7
+ LayoutOptions,
7
8
  granite_picture_description,
8
9
  smolvlm_picture_description,
9
10
  )
@@ -47,7 +48,7 @@ def download_models(
47
48
  if with_layout:
48
49
  _log.info("Downloading layout model...")
49
50
  LayoutModel.download_models(
50
- local_dir=output_dir / DOCLING_LAYOUT_V2.model_repo_folder,
51
+ local_dir=output_dir / LayoutOptions().model_spec.model_repo_folder,
51
52
  force=force,
52
53
  progress=progress,
53
54
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling
3
- Version: 2.49.0
3
+ Version: 2.50.0
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
5
  Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  License-Expression: MIT
@@ -28,7 +28,7 @@ License-File: LICENSE
28
28
  Requires-Dist: pydantic<3.0.0,>=2.0.0
29
29
  Requires-Dist: docling-core[chunking]<3.0.0,>=2.42.0
30
30
  Requires-Dist: docling-parse<5.0.0,>=4.2.2
31
- Requires-Dist: docling-ibm-models<4,>=3.9.0
31
+ Requires-Dist: docling-ibm-models<4,>=3.9.1
32
32
  Requires-Dist: filetype<2.0.0,>=1.2.0
33
33
  Requires-Dist: pypdfium2!=4.30.1,<5.0.0,>=4.30.0
34
34
  Requires-Dist: pydantic-settings<3.0.0,>=2.3.0
@@ -1,7 +1,7 @@
1
1
  pydantic<3.0.0,>=2.0.0
2
2
  docling-core[chunking]<3.0.0,>=2.42.0
3
3
  docling-parse<5.0.0,>=4.2.2
4
- docling-ibm-models<4,>=3.9.0
4
+ docling-ibm-models<4,>=3.9.1
5
5
  filetype<2.0.0,>=1.2.0
6
6
  pypdfium2!=4.30.1,<5.0.0,>=4.30.0
7
7
  pydantic-settings<3.0.0,>=2.3.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "docling"
3
- version = "2.49.0" # DO NOT EDIT, updated automatically
3
+ version = "2.50.0" # DO NOT EDIT, updated automatically
4
4
  description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
5
5
  license = "MIT"
6
6
  keywords = [
@@ -46,7 +46,7 @@ dependencies = [
46
46
  'pydantic (>=2.0.0,<3.0.0)',
47
47
  'docling-core[chunking] (>=2.42.0,<3.0.0)',
48
48
  'docling-parse (>=4.2.2,<5.0.0)',
49
- "docling-ibm-models>=3.9.0,<4",
49
+ "docling-ibm-models>=3.9.1,<4",
50
50
  'filetype (>=1.2.0,<2.0.0)',
51
51
  'pypdfium2 (>=4.30.0,!=4.30.1,<5.0.0)',
52
52
  'pydantic-settings (>=2.3.0,<3.0.0)',
@@ -11,6 +11,8 @@ from .verify_utils import verify_conversion_result_v2
11
11
 
12
12
  GENERATE_V2 = GEN_TEST_DATA
13
13
 
14
+ SKIP_DOCTAGS_COMPARISON = ["2203.01017v2.pdf"]
15
+
14
16
 
15
17
  def get_pdf_paths():
16
18
  # Define the directory you want to search
@@ -50,6 +52,12 @@ def test_e2e_pdfs_conversions():
50
52
 
51
53
  doc_result: ConversionResult = converter.convert(pdf_path)
52
54
 
55
+ # Decide if to skip doctags comparison
56
+ verify_doctags = pdf_path.name not in SKIP_DOCTAGS_COMPARISON
57
+
53
58
  verify_conversion_result_v2(
54
- input_path=pdf_path, doc_result=doc_result, generate=GENERATE_V2
59
+ input_path=pdf_path,
60
+ doc_result=doc_result,
61
+ generate=GENERATE_V2,
62
+ verify_doctags=verify_doctags,
55
63
  )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes