docling-core 2.78.0__tar.gz → 2.78.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docling_core-2.78.0 → docling_core-2.78.1}/PKG-INFO +3 -2
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/utils/file.py +4 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core.egg-info/PKG-INFO +3 -2
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core.egg-info/requires.txt +6 -1
- {docling_core-2.78.0 → docling_core-2.78.1}/pyproject.toml +4 -2
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_utils.py +24 -12
- {docling_core-2.78.0 → docling_core-2.78.1}/LICENSE +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/README.md +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/__init__.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/cli/__init__.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/cli/serialize.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/cli/view.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/experimental/__init__.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/experimental/doclang.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/experimental/serializer/__init__.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/experimental/serializer/outline.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/py.typed +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/resources/schemas/doc/ANN.json +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/resources/schemas/doc/DOC.json +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/resources/schemas/doc/RAW.json +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/search/__init__.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/search/json_schema_to_search_mapper.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/search/mapping.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/search/meta.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/search/package.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/__init__.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/__init__.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/base.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/chunk_expander.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/code_chunking/__init__.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/code_chunking/_language_code_chunkers.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/code_chunking/_utils.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/code_chunking/base_code_chunking_strategy.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/code_chunking/code_chunk.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/code_chunking/standard_code_chunking_strategy.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/doc_chunk.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/hierarchical_chunker.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/hybrid_chunker.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/line_chunker.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/page_chunker.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/tokenizer/__init__.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/tokenizer/base.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/tokenizer/huggingface.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/tokenizer/openai.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/profiler/__init__.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/profiler/doc_profiler.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/__init__.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/azure.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/base.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/common.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/doctags.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/html.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/html_styles.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/latex.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/markdown.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/markdown_excel.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/plain_text.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/webvtt.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/visualizer/__init__.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/visualizer/base.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/visualizer/key_value_visualizer.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/visualizer/layout_visualizer.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/visualizer/reading_order_visualizer.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/visualizer/table_visualizer.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/__init__.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/base.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/doc/__init__.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/doc/base.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/doc/document.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/doc/labels.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/doc/page.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/doc/tokens.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/doc/utils.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/doc/webvtt.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/gen/__init__.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/gen/generic.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/io/__init__.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/legacy_doc/__init__.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/legacy_doc/base.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/legacy_doc/doc_ann.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/legacy_doc/doc_raw.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/legacy_doc/document.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/legacy_doc/tokens.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/nlp/__init__.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/nlp/qa.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/nlp/qa_labels.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/rec/__init__.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/rec/attribute.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/rec/base.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/rec/predicate.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/rec/record.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/rec/statement.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/types/rec/subject.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/utils/__init__.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/utils/alias.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/utils/generate_docs.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/utils/generate_jsonschema.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/utils/legacy.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/utils/settings.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/utils/validate.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core/utils/validators.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core.egg-info/SOURCES.txt +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core.egg-info/dependency_links.txt +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core.egg-info/entry_points.txt +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/docling_core.egg-info/top_level.txt +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/setup.cfg +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_azure_serializer.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_base.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_chunk_expander.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_code_chunker.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_code_chunking_strategy.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_collection.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_data_gen_flag.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_deserializer_doclang.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_doc_base.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_doc_legacy_convert.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_doc_schema.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_doc_schema_extractor.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_docling_doc.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_doctags_load.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_hierarchical_chunker.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_hierarchy.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_hybrid_chunker.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_json_schema_to_search_mapper.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_latex_serialization.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_line_chunker.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_metadata.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_nlp_qa.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_otsl_table_export.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_page.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_page_chunker.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_plain_text_serialization.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_profiler.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_rec_schema.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_regions_to_table.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_search_meta.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_serialization.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_serialization_doclang.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_serialization_doctag.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_serialization_outline.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_visualization.py +0 -0
- {docling_core-2.78.0 → docling_core-2.78.1}/test/test_webvtt.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docling-core
|
|
3
|
-
Version: 2.78.
|
|
3
|
+
Version: 2.78.1
|
|
4
4
|
Summary: A python library to define and validate data types in Docling.
|
|
5
5
|
Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
|
6
6
|
Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
|
|
@@ -47,7 +47,8 @@ Requires-Dist: tree-sitter-python>=0.23.6; extra == "chunking"
|
|
|
47
47
|
Requires-Dist: tree-sitter-c>=0.23.4; extra == "chunking"
|
|
48
48
|
Requires-Dist: tree-sitter-javascript>=0.23.1; extra == "chunking"
|
|
49
49
|
Requires-Dist: tree-sitter-typescript>=0.23.2; extra == "chunking"
|
|
50
|
-
Requires-Dist: transformers
|
|
50
|
+
Requires-Dist: transformers!=5.0.*,!=5.1.*,!=5.2.*,!=5.3.*,<5.9.0,>=4.34.0; sys_platform == "darwin" and extra == "chunking"
|
|
51
|
+
Requires-Dist: transformers!=5.0.*,!=5.1.*,!=5.2.*,!=5.3.*,<6.0.0,>=4.34.0; sys_platform != "darwin" and extra == "chunking"
|
|
51
52
|
Provides-Extra: chunking-openai
|
|
52
53
|
Requires-Dist: semchunk<4.0.0,>=2.2.0; extra == "chunking-openai"
|
|
53
54
|
Requires-Dist: tree-sitter<0.27.0,>=0.25.0; extra == "chunking-openai"
|
|
@@ -182,6 +182,10 @@ def resolve_source_to_stream(
|
|
|
182
182
|
stream = BytesIO(res.content)
|
|
183
183
|
doc_stream = DocumentStream(name=fname, stream=stream)
|
|
184
184
|
except ValidationError:
|
|
185
|
+
if isinstance(source, str) and "://" in source:
|
|
186
|
+
scheme = source.split("://", 1)[0].lower()
|
|
187
|
+
if scheme not in ("http", "https"):
|
|
188
|
+
raise ValueError(f"Unsupported URL scheme: '{scheme}'. Only http:// and https:// are supported.")
|
|
185
189
|
try:
|
|
186
190
|
local_path = TypeAdapter(Path).validate_python(source)
|
|
187
191
|
stream = BytesIO(local_path.read_bytes())
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docling-core
|
|
3
|
-
Version: 2.78.
|
|
3
|
+
Version: 2.78.1
|
|
4
4
|
Summary: A python library to define and validate data types in Docling.
|
|
5
5
|
Author-email: Cesar Berrospi Ramis <ceb@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
|
|
6
6
|
Maintainer-email: Panos Vagenas <pva@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Christoph Auer <cau@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>, Cesar Berrospi Ramis <ceb@zurich.ibm.com>
|
|
@@ -47,7 +47,8 @@ Requires-Dist: tree-sitter-python>=0.23.6; extra == "chunking"
|
|
|
47
47
|
Requires-Dist: tree-sitter-c>=0.23.4; extra == "chunking"
|
|
48
48
|
Requires-Dist: tree-sitter-javascript>=0.23.1; extra == "chunking"
|
|
49
49
|
Requires-Dist: tree-sitter-typescript>=0.23.2; extra == "chunking"
|
|
50
|
-
Requires-Dist: transformers
|
|
50
|
+
Requires-Dist: transformers!=5.0.*,!=5.1.*,!=5.2.*,!=5.3.*,<5.9.0,>=4.34.0; sys_platform == "darwin" and extra == "chunking"
|
|
51
|
+
Requires-Dist: transformers!=5.0.*,!=5.1.*,!=5.2.*,!=5.3.*,<6.0.0,>=4.34.0; sys_platform != "darwin" and extra == "chunking"
|
|
51
52
|
Provides-Extra: chunking-openai
|
|
52
53
|
Requires-Dist: semchunk<4.0.0,>=2.2.0; extra == "chunking-openai"
|
|
53
54
|
Requires-Dist: tree-sitter<0.27.0,>=0.25.0; extra == "chunking-openai"
|
|
@@ -18,7 +18,6 @@ tree-sitter-python>=0.23.6
|
|
|
18
18
|
tree-sitter-c>=0.23.4
|
|
19
19
|
tree-sitter-javascript>=0.23.1
|
|
20
20
|
tree-sitter-typescript>=0.23.2
|
|
21
|
-
transformers<6.0.0,>=4.34.0
|
|
22
21
|
|
|
23
22
|
[chunking-openai]
|
|
24
23
|
semchunk<4.0.0,>=2.2.0
|
|
@@ -29,6 +28,12 @@ tree-sitter-javascript>=0.23.1
|
|
|
29
28
|
tree-sitter-typescript>=0.23.2
|
|
30
29
|
tiktoken<0.13.0,>=0.9.0
|
|
31
30
|
|
|
31
|
+
[chunking:sys_platform != "darwin"]
|
|
32
|
+
transformers!=5.0.*,!=5.1.*,!=5.2.*,!=5.3.*,<6.0.0,>=4.34.0
|
|
33
|
+
|
|
34
|
+
[chunking:sys_platform == "darwin"]
|
|
35
|
+
transformers!=5.0.*,!=5.1.*,!=5.2.*,!=5.3.*,<5.9.0,>=4.34.0
|
|
36
|
+
|
|
32
37
|
[examples]
|
|
33
38
|
datasets>=4.0.0
|
|
34
39
|
matplotlib>=3.7.0
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "docling-core"
|
|
3
|
-
version = "2.78.
|
|
3
|
+
version = "2.78.1" # DO NOT EDIT, updated automatically
|
|
4
4
|
description = "A python library to define and validate data types in Docling."
|
|
5
5
|
license = "MIT"
|
|
6
6
|
license-files = ["LICENSE"]
|
|
@@ -77,7 +77,9 @@ chunking = [
|
|
|
77
77
|
'tree-sitter-typescript >=0.23.2',
|
|
78
78
|
|
|
79
79
|
# specific:
|
|
80
|
-
|
|
80
|
+
# temporary solution until huggingface/transformers#46159 is resolved
|
|
81
|
+
'transformers (>=4.34.0,<5.9.0,!=5.0.*,!=5.1.*,!=5.2.*,!=5.3.*) ; sys_platform == "darwin"',
|
|
82
|
+
'transformers (>=4.34.0,<6.0.0,!=5.0.*,!=5.1.*,!=5.2.*,!=5.3.*) ; sys_platform != "darwin"',
|
|
81
83
|
]
|
|
82
84
|
chunking-openai = [
|
|
83
85
|
# common:
|
|
@@ -218,9 +218,10 @@ def test_is_safe_url_rejects_private_networks():
|
|
|
218
218
|
|
|
219
219
|
def test_resolve_remote_filename_sanitizes_content_disposition(monkeypatch):
|
|
220
220
|
"""Test filename normalization from Content-Disposition."""
|
|
221
|
-
from docling_core.utils.file import resolve_source_to_stream
|
|
222
221
|
from requests import Response
|
|
223
222
|
|
|
223
|
+
from docling_core.utils.file import resolve_source_to_stream
|
|
224
|
+
|
|
224
225
|
def get_response(*args, **kwargs):
|
|
225
226
|
r = Response()
|
|
226
227
|
r.status_code = 200
|
|
@@ -236,9 +237,10 @@ def test_resolve_remote_filename_sanitizes_content_disposition(monkeypatch):
|
|
|
236
237
|
|
|
237
238
|
def test_resolve_source_rejects_non_public_urls(monkeypatch):
|
|
238
239
|
"""Test that non-public URLs are rejected."""
|
|
239
|
-
from docling_core.utils.file import resolve_source_to_stream
|
|
240
240
|
import pytest
|
|
241
241
|
|
|
242
|
+
from docling_core.utils.file import resolve_source_to_stream
|
|
243
|
+
|
|
242
244
|
with pytest.raises(ValueError, match="URL is not allowed"):
|
|
243
245
|
resolve_source_to_stream("http://127.0.0.1/file")
|
|
244
246
|
|
|
@@ -252,11 +254,22 @@ def test_resolve_source_rejects_non_public_urls(monkeypatch):
|
|
|
252
254
|
resolve_source_to_stream("http://169.254.169.254/latest/meta-data/")
|
|
253
255
|
|
|
254
256
|
|
|
257
|
+
def test_resolve_source_rejects_unsupported_scheme():
|
|
258
|
+
"""Test that unsupported URL schemes are rejected before file fallback."""
|
|
259
|
+
import pytest
|
|
260
|
+
|
|
261
|
+
from docling_core.utils.file import resolve_source_to_stream
|
|
262
|
+
|
|
263
|
+
with pytest.raises(ValueError, match="Unsupported URL scheme"):
|
|
264
|
+
resolve_source_to_stream("ftp://some-server/file.pdf")
|
|
265
|
+
|
|
266
|
+
|
|
255
267
|
def test_resolve_source_to_path_sanitizes_filename(monkeypatch, tmp_path):
|
|
256
268
|
"""Test that saved filenames stay within the target directory."""
|
|
257
|
-
from docling_core.utils.file import resolve_source_to_path
|
|
258
269
|
from requests import Response
|
|
259
270
|
|
|
271
|
+
from docling_core.utils.file import resolve_source_to_path
|
|
272
|
+
|
|
260
273
|
def get_response(*args, **kwargs):
|
|
261
274
|
r = Response()
|
|
262
275
|
r.status_code = 200
|
|
@@ -280,8 +293,9 @@ def test_resolve_source_to_path_sanitizes_filename(monkeypatch, tmp_path):
|
|
|
280
293
|
|
|
281
294
|
def test_redirect_limit_enforced(monkeypatch):
|
|
282
295
|
"""Test that redirect limits are configured on the session."""
|
|
296
|
+
from requests import Response, Session
|
|
297
|
+
|
|
283
298
|
from docling_core.utils.file import _MAX_REDIRECTS
|
|
284
|
-
from requests import Session, Response
|
|
285
299
|
|
|
286
300
|
session_created = []
|
|
287
301
|
|
|
@@ -313,23 +327,21 @@ def test_redirect_limit_enforced(monkeypatch):
|
|
|
313
327
|
assert session.max_redirects == _MAX_REDIRECTS
|
|
314
328
|
|
|
315
329
|
|
|
316
|
-
|
|
317
330
|
def test_redirect_to_non_public_ip_rejected(monkeypatch):
|
|
318
331
|
"""Test that redirects to non-public addresses are rejected."""
|
|
319
|
-
from docling_core.utils.file import resolve_source_to_stream
|
|
320
|
-
from requests import Response, Session
|
|
321
332
|
import pytest
|
|
333
|
+
from requests import Response, Session
|
|
322
334
|
|
|
323
|
-
|
|
335
|
+
from docling_core.utils.file import resolve_source_to_stream
|
|
324
336
|
|
|
325
337
|
def mock_get_with_redirect(self, *args, **kwargs):
|
|
326
338
|
r = Response()
|
|
327
339
|
r.status_code = 302
|
|
328
|
-
r.headers[
|
|
329
|
-
r.url = args[0] if args else kwargs.get(
|
|
340
|
+
r.headers["location"] = "http://192.168.1.1/private-file"
|
|
341
|
+
r.url = args[0] if args else kwargs.get("url", "http://example.com")
|
|
330
342
|
|
|
331
|
-
if hasattr(self,
|
|
332
|
-
for hook in self.hooks[
|
|
343
|
+
if hasattr(self, "hooks") and "response" in self.hooks:
|
|
344
|
+
for hook in self.hooks["response"]:
|
|
333
345
|
hook(r)
|
|
334
346
|
|
|
335
347
|
return r
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.78.0 → docling_core-2.78.1}/docling_core/experimental/serializer/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.78.0 → docling_core-2.78.1}/docling_core/resources/schemas/doc/OCR-output.json
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.78.0 → docling_core-2.78.1}/docling_core/search/json_schema_to_search_mapper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/chunk_expander.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/code_chunking/_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/hierarchical_chunker.py
RENAMED
|
File without changes
|
{docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/hybrid_chunker.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/tokenizer/__init__.py
RENAMED
|
File without changes
|
{docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/tokenizer/base.py
RENAMED
|
File without changes
|
{docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/tokenizer/huggingface.py
RENAMED
|
File without changes
|
{docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/chunker/tokenizer/openai.py
RENAMED
|
File without changes
|
|
File without changes
|
{docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/profiler/doc_profiler.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/html_styles.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/markdown_excel.py
RENAMED
|
File without changes
|
{docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/serializer/plain_text.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/visualizer/layout_visualizer.py
RENAMED
|
File without changes
|
|
File without changes
|
{docling_core-2.78.0 → docling_core-2.78.1}/docling_core/transforms/visualizer/table_visualizer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|