docling-ocr-onnxtr 0.1.2__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docling_ocr_onnxtr-0.1.2 → docling_ocr_onnxtr-0.1.3}/PKG-INFO +9 -9
- {docling_ocr_onnxtr-0.1.2 → docling_ocr_onnxtr-0.1.3}/README.md +8 -8
- {docling_ocr_onnxtr-0.1.2 → docling_ocr_onnxtr-0.1.3}/docling_ocr_onnxtr/onnxtr_model.py +1 -1
- docling_ocr_onnxtr-0.1.3/docling_ocr_onnxtr/version.py +1 -0
- {docling_ocr_onnxtr-0.1.2 → docling_ocr_onnxtr-0.1.3}/docling_ocr_onnxtr.egg-info/PKG-INFO +9 -9
- {docling_ocr_onnxtr-0.1.2 → docling_ocr_onnxtr-0.1.3}/setup.py +1 -1
- {docling_ocr_onnxtr-0.1.2 → docling_ocr_onnxtr-0.1.3}/tests/test_pipeline_invalid_cases.py +1 -0
- {docling_ocr_onnxtr-0.1.2 → docling_ocr_onnxtr-0.1.3}/tests/test_plugin.py +26 -11
- docling_ocr_onnxtr-0.1.2/docling_ocr_onnxtr/version.py +0 -1
- {docling_ocr_onnxtr-0.1.2 → docling_ocr_onnxtr-0.1.3}/LICENSE +0 -0
- {docling_ocr_onnxtr-0.1.2 → docling_ocr_onnxtr-0.1.3}/docling_ocr_onnxtr/__init__.py +0 -0
- {docling_ocr_onnxtr-0.1.2 → docling_ocr_onnxtr-0.1.3}/docling_ocr_onnxtr/options.py +0 -0
- {docling_ocr_onnxtr-0.1.2 → docling_ocr_onnxtr-0.1.3}/docling_ocr_onnxtr/plugin.py +0 -0
- {docling_ocr_onnxtr-0.1.2 → docling_ocr_onnxtr-0.1.3}/docling_ocr_onnxtr/py.typed +0 -0
- {docling_ocr_onnxtr-0.1.2 → docling_ocr_onnxtr-0.1.3}/docling_ocr_onnxtr.egg-info/SOURCES.txt +0 -0
- {docling_ocr_onnxtr-0.1.2 → docling_ocr_onnxtr-0.1.3}/docling_ocr_onnxtr.egg-info/dependency_links.txt +0 -0
- {docling_ocr_onnxtr-0.1.2 → docling_ocr_onnxtr-0.1.3}/docling_ocr_onnxtr.egg-info/entry_points.txt +0 -0
- {docling_ocr_onnxtr-0.1.2 → docling_ocr_onnxtr-0.1.3}/docling_ocr_onnxtr.egg-info/requires.txt +0 -0
- {docling_ocr_onnxtr-0.1.2 → docling_ocr_onnxtr-0.1.3}/docling_ocr_onnxtr.egg-info/top_level.txt +0 -0
- {docling_ocr_onnxtr-0.1.2 → docling_ocr_onnxtr-0.1.3}/docling_ocr_onnxtr.egg-info/zip-safe +0 -0
- {docling_ocr_onnxtr-0.1.2 → docling_ocr_onnxtr-0.1.3}/pyproject.toml +0 -0
- {docling_ocr_onnxtr-0.1.2 → docling_ocr_onnxtr-0.1.3}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docling-ocr-onnxtr
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: Onnx Text Recognition (OnnxTR) OCR plugin for docling
|
|
5
5
|
Author-email: Felix Dittrich <felixdittrich92@gmail.com>
|
|
6
6
|
Maintainer: Felix Dittrich
|
|
@@ -262,11 +262,11 @@ Dynamic: license-file
|
|
|
262
262
|
</p>
|
|
263
263
|
|
|
264
264
|
[](LICENSE)
|
|
265
|
-
[](https://github.com/felixdittrich92/docling-OCR-OnnxTR/actions/workflows/builds.yml)
|
|
266
266
|
[](https://codecov.io/gh/felixdittrich92/docling-OCR-OnnxTR)
|
|
267
267
|
[](https://app.codacy.com/gh/felixdittrich92/docling-OCR-OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
|
268
268
|
[](https://www.codefactor.io/repository/github/felixdittrich92/docling-ocr-onnxtr)
|
|
269
|
-
[](https://pypi.org/project/docling-ocr-onnxtr/)
|
|
270
270
|

|
|
271
271
|
|
|
272
272
|
The `docling-OCR-OnnxTR` repository provides a plugin that integrates the [OnnxTR OCR engine](https://github.com/felixdittrich92/OnnxTR) into the [Docling framework](https://github.com/docling-project/docling), enhancing document processing capabilities with efficient and accurate text recognition.
|
|
@@ -289,19 +289,19 @@ For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/get
|
|
|
289
289
|
|
|
290
290
|
```bash
|
|
291
291
|
# For CPU
|
|
292
|
-
pip install docling-ocr-onnxtr[cpu]
|
|
292
|
+
pip install "docling-ocr-onnxtr[cpu]"
|
|
293
293
|
# For Nvidia GPU
|
|
294
|
-
pip install docling-ocr-onnxtr[gpu]
|
|
294
|
+
pip install "docling-ocr-onnxtr[gpu]"
|
|
295
295
|
# For Intel GPU / Integrated Graphics
|
|
296
|
-
pip install docling-ocr-onnxtr[openvino]
|
|
296
|
+
pip install "docling-ocr-onnxtr[openvino]"
|
|
297
297
|
|
|
298
298
|
# Headless mode (no GUI)
|
|
299
299
|
# For CPU
|
|
300
|
-
pip install docling-ocr-onnxtr[cpu-headless]
|
|
300
|
+
pip install "docling-ocr-onnxtr[cpu-headless]"
|
|
301
301
|
# For Nvidia GPU
|
|
302
|
-
pip install docling-ocr-onnxtr[gpu-headless]
|
|
302
|
+
pip install "docling-ocr-onnxtr[gpu-headless]"
|
|
303
303
|
# For Intel GPU / Integrated Graphics
|
|
304
|
-
pip install docling-ocr-onnxtr[openvino-headless]
|
|
304
|
+
pip install "docling-ocr-onnxtr[openvino-headless]"
|
|
305
305
|
```
|
|
306
306
|
|
|
307
307
|
By integrating OnnxTR with Docling, users can achieve more efficient and accurate OCR results, enhancing the overall document processing experience.
|
|
@@ -3,11 +3,11 @@
|
|
|
3
3
|
</p>
|
|
4
4
|
|
|
5
5
|
[](LICENSE)
|
|
6
|
-
[](https://github.com/felixdittrich92/docling-OCR-OnnxTR/actions/workflows/builds.yml)
|
|
7
7
|
[](https://codecov.io/gh/felixdittrich92/docling-OCR-OnnxTR)
|
|
8
8
|
[](https://app.codacy.com/gh/felixdittrich92/docling-OCR-OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
|
9
9
|
[](https://www.codefactor.io/repository/github/felixdittrich92/docling-ocr-onnxtr)
|
|
10
|
-
[](https://pypi.org/project/docling-ocr-onnxtr/)
|
|
11
11
|

|
|
12
12
|
|
|
13
13
|
The `docling-OCR-OnnxTR` repository provides a plugin that integrates the [OnnxTR OCR engine](https://github.com/felixdittrich92/OnnxTR) into the [Docling framework](https://github.com/docling-project/docling), enhancing document processing capabilities with efficient and accurate text recognition.
|
|
@@ -30,19 +30,19 @@ For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/get
|
|
|
30
30
|
|
|
31
31
|
```bash
|
|
32
32
|
# For CPU
|
|
33
|
-
pip install docling-ocr-onnxtr[cpu]
|
|
33
|
+
pip install "docling-ocr-onnxtr[cpu]"
|
|
34
34
|
# For Nvidia GPU
|
|
35
|
-
pip install docling-ocr-onnxtr[gpu]
|
|
35
|
+
pip install "docling-ocr-onnxtr[gpu]"
|
|
36
36
|
# For Intel GPU / Integrated Graphics
|
|
37
|
-
pip install docling-ocr-onnxtr[openvino]
|
|
37
|
+
pip install "docling-ocr-onnxtr[openvino]"
|
|
38
38
|
|
|
39
39
|
# Headless mode (no GUI)
|
|
40
40
|
# For CPU
|
|
41
|
-
pip install docling-ocr-onnxtr[cpu-headless]
|
|
41
|
+
pip install "docling-ocr-onnxtr[cpu-headless]"
|
|
42
42
|
# For Nvidia GPU
|
|
43
|
-
pip install docling-ocr-onnxtr[gpu-headless]
|
|
43
|
+
pip install "docling-ocr-onnxtr[gpu-headless]"
|
|
44
44
|
# For Intel GPU / Integrated Graphics
|
|
45
|
-
pip install docling-ocr-onnxtr[openvino-headless]
|
|
45
|
+
pip install "docling-ocr-onnxtr[openvino-headless]"
|
|
46
46
|
```
|
|
47
47
|
|
|
48
48
|
By integrating OnnxTR with Docling, users can achieve more efficient and accurate OCR results, enhancing the overall document processing experience.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = 'v0.1.3'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docling-ocr-onnxtr
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: Onnx Text Recognition (OnnxTR) OCR plugin for docling
|
|
5
5
|
Author-email: Felix Dittrich <felixdittrich92@gmail.com>
|
|
6
6
|
Maintainer: Felix Dittrich
|
|
@@ -262,11 +262,11 @@ Dynamic: license-file
|
|
|
262
262
|
</p>
|
|
263
263
|
|
|
264
264
|
[](LICENSE)
|
|
265
|
-
[](https://github.com/felixdittrich92/docling-OCR-OnnxTR/actions/workflows/builds.yml)
|
|
266
266
|
[](https://codecov.io/gh/felixdittrich92/docling-OCR-OnnxTR)
|
|
267
267
|
[](https://app.codacy.com/gh/felixdittrich92/docling-OCR-OnnxTR/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
|
|
268
268
|
[](https://www.codefactor.io/repository/github/felixdittrich92/docling-ocr-onnxtr)
|
|
269
|
-
[](https://pypi.org/project/docling-ocr-onnxtr/)
|
|
270
270
|

|
|
271
271
|
|
|
272
272
|
The `docling-OCR-OnnxTR` repository provides a plugin that integrates the [OnnxTR OCR engine](https://github.com/felixdittrich92/OnnxTR) into the [Docling framework](https://github.com/docling-project/docling), enhancing document processing capabilities with efficient and accurate text recognition.
|
|
@@ -289,19 +289,19 @@ For GPU support please take a look at: [ONNX Runtime](https://onnxruntime.ai/get
|
|
|
289
289
|
|
|
290
290
|
```bash
|
|
291
291
|
# For CPU
|
|
292
|
-
pip install docling-ocr-onnxtr[cpu]
|
|
292
|
+
pip install "docling-ocr-onnxtr[cpu]"
|
|
293
293
|
# For Nvidia GPU
|
|
294
|
-
pip install docling-ocr-onnxtr[gpu]
|
|
294
|
+
pip install "docling-ocr-onnxtr[gpu]"
|
|
295
295
|
# For Intel GPU / Integrated Graphics
|
|
296
|
-
pip install docling-ocr-onnxtr[openvino]
|
|
296
|
+
pip install "docling-ocr-onnxtr[openvino]"
|
|
297
297
|
|
|
298
298
|
# Headless mode (no GUI)
|
|
299
299
|
# For CPU
|
|
300
|
-
pip install docling-ocr-onnxtr[cpu-headless]
|
|
300
|
+
pip install "docling-ocr-onnxtr[cpu-headless]"
|
|
301
301
|
# For Nvidia GPU
|
|
302
|
-
pip install docling-ocr-onnxtr[gpu-headless]
|
|
302
|
+
pip install "docling-ocr-onnxtr[gpu-headless]"
|
|
303
303
|
# For Intel GPU / Integrated Graphics
|
|
304
|
-
pip install docling-ocr-onnxtr[openvino-headless]
|
|
304
|
+
pip install "docling-ocr-onnxtr[openvino-headless]"
|
|
305
305
|
```
|
|
306
306
|
|
|
307
307
|
By integrating OnnxTR with Docling, users can achieve more efficient and accurate OCR results, enhancing the overall document processing experience.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
-
from typing import List
|
|
3
2
|
|
|
3
|
+
import pytest
|
|
4
4
|
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
|
|
5
5
|
from docling.datamodel.base_models import InputFormat
|
|
6
6
|
from docling.datamodel.document import ConversionResult
|
|
@@ -48,9 +48,9 @@ def get_converter(ocr_options: OcrOptions):
|
|
|
48
48
|
return converter
|
|
49
49
|
|
|
50
50
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
51
|
+
@pytest.mark.parametrize(
|
|
52
|
+
"ocr_options",
|
|
53
|
+
[
|
|
54
54
|
OnnxtrOcrOptions(),
|
|
55
55
|
OnnxtrOcrOptions(force_full_page_ocr=True),
|
|
56
56
|
OnnxtrOcrOptions(
|
|
@@ -63,15 +63,25 @@ def test_e2e_conversions():
|
|
|
63
63
|
reco_arch="crnn_mobilenet_v3_small",
|
|
64
64
|
auto_correct_orientation=True,
|
|
65
65
|
),
|
|
66
|
-
]
|
|
66
|
+
],
|
|
67
|
+
)
|
|
68
|
+
def test_e2e_conversions(ocr_options: OcrOptions):
|
|
69
|
+
pdf_paths = get_pdf_paths()
|
|
70
|
+
|
|
67
71
|
settings.debug.visualize_ocr = True
|
|
68
72
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
73
|
+
print(f"Converting with ocr_engine: {ocr_options.kind}, language: {ocr_options.lang}")
|
|
74
|
+
converter = get_converter(ocr_options=ocr_options)
|
|
75
|
+
for pdf_path in pdf_paths:
|
|
76
|
+
if not ocr_options.auto_correct_orientation and "rotated" in pdf_path.name:
|
|
77
|
+
# Skip rotated PDFs if orientation correction is disabled
|
|
78
|
+
print(f"Skipping {pdf_path} due to orientation correction settings.")
|
|
79
|
+
continue
|
|
80
|
+
|
|
81
|
+
print(f"converting {pdf_path}")
|
|
82
|
+
doc_result: ConversionResult = converter.convert(pdf_path)
|
|
83
|
+
|
|
84
|
+
try:
|
|
75
85
|
verify_conversion_result_v1(
|
|
76
86
|
input_path=pdf_path,
|
|
77
87
|
doc_result=doc_result,
|
|
@@ -84,3 +94,8 @@ def test_e2e_conversions():
|
|
|
84
94
|
generate=GENERATE_V2,
|
|
85
95
|
fuzzy=True,
|
|
86
96
|
)
|
|
97
|
+
except AssertionError as e:
|
|
98
|
+
if "rotated" in pdf_path.name:
|
|
99
|
+
pytest.xfail(f"Skipping {pdf_path} due to orientation correction settings: {e}")
|
|
100
|
+
else:
|
|
101
|
+
raise # Unexpected failure — re-raise the error
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = 'v0.1.2'
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_ocr_onnxtr-0.1.2 → docling_ocr_onnxtr-0.1.3}/docling_ocr_onnxtr.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{docling_ocr_onnxtr-0.1.2 → docling_ocr_onnxtr-0.1.3}/docling_ocr_onnxtr.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{docling_ocr_onnxtr-0.1.2 → docling_ocr_onnxtr-0.1.3}/docling_ocr_onnxtr.egg-info/requires.txt
RENAMED
|
File without changes
|
{docling_ocr_onnxtr-0.1.2 → docling_ocr_onnxtr-0.1.3}/docling_ocr_onnxtr.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|