docling-core 2.78.0__tar.gz → 2.78.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. {docling_core-2.78.0 → docling_core-2.78.1}/PKG-INFO +3 -2
  2. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/utils/file.py +4 -0
  3. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core.egg-info/PKG-INFO +3 -2
  4. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core.egg-info/requires.txt +6 -1
  5. {docling_core-2.78.0 → docling_core-2.78.1}/pyproject.toml +4 -2
  6. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_utils.py +24 -12
  7. {docling_core-2.78.0 → docling_core-2.78.1}/LICENSE +0 -0
  8. {docling_core-2.78.0 → docling_core-2.78.1}/README.md +0 -0
  9. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/__init__.py +0 -0
  10. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/cli/__init__.py +0 -0
  11. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/cli/serialize.py +0 -0
  12. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/cli/view.py +0 -0
  13. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/experimental/__init__.py +0 -0
  14. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/experimental/doclang.py +0 -0
  15. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/experimental/serializer/__init__.py +0 -0
  16. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/experimental/serializer/outline.py +0 -0
  17. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/py.typed +0 -0
  18. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/resources/schemas/doc/ANN.json +0 -0
  19. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/resources/schemas/doc/DOC.json +0 -0
  20. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
  21. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/resources/schemas/doc/RAW.json +0 -0
  22. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
  23. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
  24. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
  25. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
  26. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/search/__init__.py +0 -0
  27. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/search/json_schema_to_search_mapper.py +0 -0
  28. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/search/mapping.py +0 -0
  29. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/search/meta.py +0 -0
  30. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/search/package.py +0 -0
  31. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/__init__.py +0 -0
  32. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/__init__.py +0 -0
  33. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/base.py +0 -0
  34. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/chunk_expander.py +0 -0
  35. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/code_chunking/__init__.py +0 -0
  36. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/code_chunking/_language_code_chunkers.py +0 -0
  37. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/code_chunking/_utils.py +0 -0
  38. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/code_chunking/base_code_chunking_strategy.py +0 -0
  39. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/code_chunking/code_chunk.py +0 -0
  40. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/code_chunking/standard_code_chunking_strategy.py +0 -0
  41. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/doc_chunk.py +0 -0
  42. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/hierarchical_chunker.py +0 -0
  43. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/hybrid_chunker.py +0 -0
  44. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/line_chunker.py +0 -0
  45. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/page_chunker.py +0 -0
  46. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/tokenizer/__init__.py +0 -0
  47. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/tokenizer/base.py +0 -0
  48. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/tokenizer/huggingface.py +0 -0
  49. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/tokenizer/openai.py +0 -0
  50. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/profiler/__init__.py +0 -0
  51. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/profiler/doc_profiler.py +0 -0
  52. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/__init__.py +0 -0
  53. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/azure.py +0 -0
  54. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/base.py +0 -0
  55. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/common.py +0 -0
  56. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/doctags.py +0 -0
  57. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/html.py +0 -0
  58. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/html_styles.py +0 -0
  59. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/latex.py +0 -0
  60. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/markdown.py +0 -0
  61. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/markdown_excel.py +0 -0
  62. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/plain_text.py +0 -0
  63. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/webvtt.py +0 -0
  64. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/visualizer/__init__.py +0 -0
  65. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/visualizer/base.py +0 -0
  66. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/visualizer/key_value_visualizer.py +0 -0
  67. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/visualizer/layout_visualizer.py +0 -0
  68. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/visualizer/reading_order_visualizer.py +0 -0
  69. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/visualizer/table_visualizer.py +0 -0
  70. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/__init__.py +0 -0
  71. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/base.py +0 -0
  72. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/doc/__init__.py +0 -0
  73. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/doc/base.py +0 -0
  74. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/doc/document.py +0 -0
  75. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/doc/labels.py +0 -0
  76. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/doc/page.py +0 -0
  77. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/doc/tokens.py +0 -0
  78. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/doc/utils.py +0 -0
  79. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/doc/webvtt.py +0 -0
  80. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/gen/__init__.py +0 -0
  81. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/gen/generic.py +0 -0
  82. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/io/__init__.py +0 -0
  83. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/legacy_doc/__init__.py +0 -0
  84. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/legacy_doc/base.py +0 -0
  85. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/legacy_doc/doc_ann.py +0 -0
  86. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
  87. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/legacy_doc/doc_raw.py +0 -0
  88. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/legacy_doc/document.py +0 -0
  89. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/legacy_doc/tokens.py +0 -0
  90. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/nlp/__init__.py +0 -0
  91. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/nlp/qa.py +0 -0
  92. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/nlp/qa_labels.py +0 -0
  93. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/rec/__init__.py +0 -0
  94. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/rec/attribute.py +0 -0
  95. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/rec/base.py +0 -0
  96. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/rec/predicate.py +0 -0
  97. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/rec/record.py +0 -0
  98. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/rec/statement.py +0 -0
  99. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/rec/subject.py +0 -0
  100. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/utils/__init__.py +0 -0
  101. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/utils/alias.py +0 -0
  102. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/utils/generate_docs.py +0 -0
  103. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/utils/generate_jsonschema.py +0 -0
  104. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/utils/legacy.py +0 -0
  105. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/utils/settings.py +0 -0
  106. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/utils/validate.py +0 -0
  107. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/utils/validators.py +0 -0
  108. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core.egg-info/SOURCES.txt +0 -0
  109. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core.egg-info/dependency_links.txt +0 -0
  110. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core.egg-info/entry_points.txt +0 -0
  111. {docling_core-2.78.0 → docling_core-2.78.1}/docling_core.egg-info/top_level.txt +0 -0
  112. {docling_core-2.78.0 → docling_core-2.78.1}/setup.cfg +0 -0
  113. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_azure_serializer.py +0 -0
  114. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_base.py +0 -0
  115. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_chunk_expander.py +0 -0
  116. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_code_chunker.py +0 -0
  117. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_code_chunking_strategy.py +0 -0
  118. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_collection.py +0 -0
  119. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_data_gen_flag.py +0 -0
  120. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_deserializer_doclang.py +0 -0
  121. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_doc_base.py +0 -0
  122. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_doc_legacy_convert.py +0 -0
  123. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_doc_schema.py +0 -0
  124. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_doc_schema_extractor.py +0 -0
  125. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_docling_doc.py +0 -0
  126. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_doctags_load.py +0 -0
  127. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_hierarchical_chunker.py +0 -0
  128. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_hierarchy.py +0 -0
  129. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_hybrid_chunker.py +0 -0
  130. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_json_schema_to_search_mapper.py +0 -0
  131. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_latex_serialization.py +0 -0
  132. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_line_chunker.py +0 -0
  133. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_metadata.py +0 -0
  134. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_nlp_qa.py +0 -0
  135. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_otsl_table_export.py +0 -0
  136. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_page.py +0 -0
  137. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_page_chunker.py +0 -0
  138. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_plain_text_serialization.py +0 -0
  139. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_profiler.py +0 -0
  140. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_rec_schema.py +0 -0
  141. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_regions_to_table.py +0 -0
  142. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_search_meta.py +0 -0
  143. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_serialization.py +0 -0
  144. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_serialization_doclang.py +0 -0
  145. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_serialization_doctag.py +0 -0
  146. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_serialization_outline.py +0 -0
  147. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_visualization.py +0 -0
  148. {docling_core-2.78.0 → docling_core-2.78.1}/test/test_webvtt.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-core
3
- Version: 2.78.0
3
+ Version: 2.78.1
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
@@ -47,7 +47,8 @@ Requires-Dist: tree-sitter-python>=0.23.6; extra == "chunking"
47
47
  Requires-Dist: tree-sitter-c>=0.23.4; extra == "chunking"
48
48
  Requires-Dist: tree-sitter-javascript>=0.23.1; extra == "chunking"
49
49
  Requires-Dist: tree-sitter-typescript>=0.23.2; extra == "chunking"
50
- Requires-Dist: transformers<6.0.0,>=4.34.0; extra == "chunking"
50
+ Requires-Dist: transformers!=5.0.*,!=5.1.*,!=5.2.*,!=5.3.*,<5.9.0,>=4.34.0; sys_platform == "darwin" and extra == "chunking"
51
+ Requires-Dist: transformers!=5.0.*,!=5.1.*,!=5.2.*,!=5.3.*,<6.0.0,>=4.34.0; sys_platform != "darwin" and extra == "chunking"
51
52
  Provides-Extra: chunking-openai
52
53
  Requires-Dist: semchunk<4.0.0,>=2.2.0; extra == "chunking-openai"
53
54
  Requires-Dist: tree-sitter<0.27.0,>=0.25.0; extra == "chunking-openai"
@@ -182,6 +182,10 @@ def resolve_source_to_stream(
182
182
  stream = BytesIO(res.content)
183
183
  doc_stream = DocumentStream(name=fname, stream=stream)
184
184
  except ValidationError:
185
+ if isinstance(source, str) and "://" in source:
186
+ scheme = source.split("://", 1)[0].lower()
187
+ if scheme not in ("http", "https"):
188
+ raise ValueError(f"Unsupported URL scheme: '{scheme}'. Only http:// and https:// are supported.")
185
189
  try:
186
190
  local_path = TypeAdapter(Path).validate_python(source)
187
191
  stream = BytesIO(local_path.read_bytes())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-core
3
- Version: 2.78.0
3
+ Version: 2.78.1
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
@@ -47,7 +47,8 @@ Requires-Dist: tree-sitter-python>=0.23.6; extra == "chunking"
47
47
  Requires-Dist: tree-sitter-c>=0.23.4; extra == "chunking"
48
48
  Requires-Dist: tree-sitter-javascript>=0.23.1; extra == "chunking"
49
49
  Requires-Dist: tree-sitter-typescript>=0.23.2; extra == "chunking"
50
- Requires-Dist: transformers<6.0.0,>=4.34.0; extra == "chunking"
50
+ Requires-Dist: transformers!=5.0.*,!=5.1.*,!=5.2.*,!=5.3.*,<5.9.0,>=4.34.0; sys_platform == "darwin" and extra == "chunking"
51
+ Requires-Dist: transformers!=5.0.*,!=5.1.*,!=5.2.*,!=5.3.*,<6.0.0,>=4.34.0; sys_platform != "darwin" and extra == "chunking"
51
52
  Provides-Extra: chunking-openai
52
53
  Requires-Dist: semchunk<4.0.0,>=2.2.0; extra == "chunking-openai"
53
54
  Requires-Dist: tree-sitter<0.27.0,>=0.25.0; extra == "chunking-openai"
@@ -18,7 +18,6 @@ tree-sitter-python>=0.23.6
18
18
  tree-sitter-c>=0.23.4
19
19
  tree-sitter-javascript>=0.23.1
20
20
  tree-sitter-typescript>=0.23.2
21
- transformers<6.0.0,>=4.34.0
22
21
 
23
22
  [chunking-openai]
24
23
  semchunk<4.0.0,>=2.2.0
@@ -29,6 +28,12 @@ tree-sitter-javascript>=0.23.1
29
28
  tree-sitter-typescript>=0.23.2
30
29
  tiktoken<0.13.0,>=0.9.0
31
30
 
31
+ [chunking:sys_platform != "darwin"]
32
+ transformers!=5.0.*,!=5.1.*,!=5.2.*,!=5.3.*,<6.0.0,>=4.34.0
33
+
34
+ [chunking:sys_platform == "darwin"]
35
+ transformers!=5.0.*,!=5.1.*,!=5.2.*,!=5.3.*,<5.9.0,>=4.34.0
36
+
32
37
  [examples]
33
38
  datasets>=4.0.0
34
39
  matplotlib>=3.7.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "docling-core"
3
- version = "2.78.0" # DO NOT EDIT, updated automatically
3
+ version = "2.78.1" # DO NOT EDIT, updated automatically
4
4
  description = "A python library to define and validate data types in Docling."
5
5
  license = "MIT"
6
6
  license-files = ["LICENSE"]
@@ -77,7 +77,9 @@ chunking = [
77
77
  'tree-sitter-typescript >=0.23.2',
78
78
 
79
79
  # specific:
80
- 'transformers (>=4.34.0,<6.0.0)',
80
+ # temporary solution until huggingface/transformers#46159 is resolved
81
+ 'transformers (>=4.34.0,<5.9.0,!=5.0.*,!=5.1.*,!=5.2.*,!=5.3.*) ; sys_platform == "darwin"',
82
+ 'transformers (>=4.34.0,<6.0.0,!=5.0.*,!=5.1.*,!=5.2.*,!=5.3.*) ; sys_platform != "darwin"',
81
83
  ]
82
84
  chunking-openai = [
83
85
  # common:
@@ -218,9 +218,10 @@ def test_is_safe_url_rejects_private_networks():
218
218
 
219
219
  def test_resolve_remote_filename_sanitizes_content_disposition(monkeypatch):
220
220
  """Test filename normalization from Content-Disposition."""
221
- from docling_core.utils.file import resolve_source_to_stream
222
221
  from requests import Response
223
222
 
223
+ from docling_core.utils.file import resolve_source_to_stream
224
+
224
225
  def get_response(*args, **kwargs):
225
226
  r = Response()
226
227
  r.status_code = 200
@@ -236,9 +237,10 @@ def test_resolve_remote_filename_sanitizes_content_disposition(monkeypatch):
236
237
 
237
238
  def test_resolve_source_rejects_non_public_urls(monkeypatch):
238
239
  """Test that non-public URLs are rejected."""
239
- from docling_core.utils.file import resolve_source_to_stream
240
240
  import pytest
241
241
 
242
+ from docling_core.utils.file import resolve_source_to_stream
243
+
242
244
  with pytest.raises(ValueError, match="URL is not allowed"):
243
245
  resolve_source_to_stream("http://127.0.0.1/file")
244
246
 
@@ -252,11 +254,22 @@ def test_resolve_source_rejects_non_public_urls(monkeypatch):
252
254
  resolve_source_to_stream("http://169.254.169.254/latest/meta-data/")
253
255
 
254
256
 
257
+ def test_resolve_source_rejects_unsupported_scheme():
258
+ """Test that unsupported URL schemes are rejected before file fallback."""
259
+ import pytest
260
+
261
+ from docling_core.utils.file import resolve_source_to_stream
262
+
263
+ with pytest.raises(ValueError, match="Unsupported URL scheme"):
264
+ resolve_source_to_stream("ftp://some-server/file.pdf")
265
+
266
+
255
267
  def test_resolve_source_to_path_sanitizes_filename(monkeypatch, tmp_path):
256
268
  """Test that saved filenames stay within the target directory."""
257
- from docling_core.utils.file import resolve_source_to_path
258
269
  from requests import Response
259
270
 
271
+ from docling_core.utils.file import resolve_source_to_path
272
+
260
273
  def get_response(*args, **kwargs):
261
274
  r = Response()
262
275
  r.status_code = 200
@@ -280,8 +293,9 @@ def test_resolve_source_to_path_sanitizes_filename(monkeypatch, tmp_path):
280
293
 
281
294
  def test_redirect_limit_enforced(monkeypatch):
282
295
  """Test that redirect limits are configured on the session."""
296
+ from requests import Response, Session
297
+
283
298
  from docling_core.utils.file import _MAX_REDIRECTS
284
- from requests import Session, Response
285
299
 
286
300
  session_created = []
287
301
 
@@ -313,23 +327,21 @@ def test_redirect_limit_enforced(monkeypatch):
313
327
  assert session.max_redirects == _MAX_REDIRECTS
314
328
 
315
329
 
316
-
317
330
  def test_redirect_to_non_public_ip_rejected(monkeypatch):
318
331
  """Test that redirects to non-public addresses are rejected."""
319
- from docling_core.utils.file import resolve_source_to_stream
320
- from requests import Response, Session
321
332
  import pytest
333
+ from requests import Response, Session
322
334
 
323
- original_get = Session.get
335
+ from docling_core.utils.file import resolve_source_to_stream
324
336
 
325
337
  def mock_get_with_redirect(self, *args, **kwargs):
326
338
  r = Response()
327
339
  r.status_code = 302
328
- r.headers['location'] = 'http://192.168.1.1/private-file'
329
- r.url = args[0] if args else kwargs.get('url', 'http://example.com')
340
+ r.headers["location"] = "http://192.168.1.1/private-file"
341
+ r.url = args[0] if args else kwargs.get("url", "http://example.com")
330
342
 
331
- if hasattr(self, 'hooks') and 'response' in self.hooks:
332
- for hook in self.hooks['response']:
343
+ if hasattr(self, "hooks") and "response" in self.hooks:
344
+ for hook in self.hooks["response"]:
333
345
  hook(r)
334
346
 
335
347
  return r
File without changes
File without changes
File without changes