doc-page-extractor 0.0.6__tar.gz → 0.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of doc-page-extractor might be problematic. Click here for more details.

Files changed (38) hide show
  1. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/PKG-INFO +12 -3
  2. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/README.md +11 -1
  3. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/extractor.py +1 -6
  4. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor.egg-info/PKG-INFO +12 -3
  5. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor.egg-info/requires.txt +0 -1
  6. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/setup.py +1 -2
  7. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/LICENSE +0 -0
  8. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/__init__.py +0 -0
  9. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/clipper.py +0 -0
  10. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/downloader.py +0 -0
  11. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/layoutreader.py +0 -0
  12. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/ocr.py +0 -0
  13. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/ocr_corrector.py +0 -0
  14. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/onnxocr/__init__.py +0 -0
  15. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/onnxocr/cls_postprocess.py +0 -0
  16. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/onnxocr/db_postprocess.py +0 -0
  17. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/onnxocr/imaug.py +0 -0
  18. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/onnxocr/operators.py +0 -0
  19. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/onnxocr/predict_base.py +0 -0
  20. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/onnxocr/predict_cls.py +0 -0
  21. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/onnxocr/predict_det.py +0 -0
  22. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/onnxocr/predict_rec.py +0 -0
  23. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/onnxocr/predict_system.py +0 -0
  24. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/onnxocr/rec_postprocess.py +0 -0
  25. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/onnxocr/utils.py +0 -0
  26. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/overlap.py +0 -0
  27. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/plot.py +0 -0
  28. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/raw_optimizer.py +0 -0
  29. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/rectangle.py +0 -0
  30. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/rotation.py +0 -0
  31. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/types.py +0 -0
  32. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor/utils.py +0 -0
  33. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor.egg-info/SOURCES.txt +0 -0
  34. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor.egg-info/dependency_links.txt +0 -0
  35. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/doc_page_extractor.egg-info/top_level.txt +0 -0
  36. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/setup.cfg +0 -0
  37. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/tests/__init__.py +0 -0
  38. {doc_page_extractor-0.0.6 → doc_page_extractor-0.0.7}/tests/test_history_bus.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: doc-page-extractor
3
- Version: 0.0.6
3
+ Version: 0.0.7
4
4
  Summary: doc page extractor can identify text and format in images and return structured data.
5
5
  Home-page: https://github.com/Moskize91/doc-page-extractor
6
6
  Author: Tao Zeyu
@@ -10,7 +10,6 @@ License-File: LICENSE
10
10
  Requires-Dist: opencv-python<5.0,>=4.11.0
11
11
  Requires-Dist: pillow<11.0,>=10.3
12
12
  Requires-Dist: pyclipper<2.0,>=1.2.0
13
- Requires-Dist: onnxruntime<2.0,>=1.19.0
14
13
  Requires-Dist: numpy<2.0,>=1.24.0
15
14
  Requires-Dist: shapely<3.0,>=2.0.0
16
15
  Requires-Dist: transformers<5.0,>=4.48.0
@@ -37,10 +36,20 @@ doc page extractor can identify text and format in images and return structured
37
36
  pip install doc-page-extractor
38
37
  ```
39
38
 
39
+ ```shell
40
+ pip install onnxruntime==1.21.0
41
+ ```
42
+
40
43
  ## Using CUDA
41
44
 
42
45
  Please refer to the introduction of [PyTorch](https://pytorch.org/get-started/locally/) and select the appropriate command to install according to your operating system.
43
46
 
47
+ In addition, replace the command to install `onnxruntime` in the previous article with the following:
48
+
49
+ ```shell
50
+ pip install onnxruntime-gpu==1.21.0
51
+ ```
52
+
44
53
  ## Example
45
54
 
46
55
  ```python
@@ -49,7 +58,7 @@ from doc_page_extractor import DocExtractor
49
58
 
50
59
  extractor = DocExtractor(
51
60
  model_dir_path=model_path, # Folder address where AI model is downloaded and installed
52
- device="cpu", # If you want to use CUDA, please change to device="cuda:0".
61
+ device="cpu", # If you want to use CUDA, please change to device="cuda".
53
62
  )
54
63
  with Image.open("/path/to/your/image.png") as image:
55
64
  result = extractor.extract(
@@ -12,10 +12,20 @@ doc page extractor can identify text and format in images and return structured
12
12
  pip install doc-page-extractor
13
13
  ```
14
14
 
15
+ ```shell
16
+ pip install onnxruntime==1.21.0
17
+ ```
18
+
15
19
  ## Using CUDA
16
20
 
17
21
  Please refer to the introduction of [PyTorch](https://pytorch.org/get-started/locally/) and select the appropriate command to install according to your operating system.
18
22
 
23
+ In addition, replace the command to install `onnxruntime` in the previous article with the following:
24
+
25
+ ```shell
26
+ pip install onnxruntime-gpu==1.21.0
27
+ ```
28
+
19
29
  ## Example
20
30
 
21
31
  ```python
@@ -24,7 +34,7 @@ from doc_page_extractor import DocExtractor
24
34
 
25
35
  extractor = DocExtractor(
26
36
  model_dir_path=model_path, # Folder address where AI model is downloaded and installed
27
- device="cpu", # If you want to use CUDA, please change to device="cuda:0".
37
+ device="cpu", # If you want to use CUDA, please change to device="cuda".
28
38
  )
29
39
  with Image.open("/path/to/your/image.png") as image:
30
40
  result = extractor.extract(
@@ -1,5 +1,4 @@
1
1
  import os
2
- import torch
3
2
 
4
3
  from typing import Literal, Iterable
5
4
  from pathlib import Path
@@ -34,10 +33,6 @@ class DocExtractor:
34
33
  self._yolo: YOLOv10 | None = None
35
34
  self._layout: LayoutLMv3ForTokenClassification | None = None
36
35
 
37
- if self._device.startswith("cuda") and not torch.cuda.is_available():
38
- self._device = "cpu"
39
- print("Warn: cuda is not available, use cpu instead")
40
-
41
36
  def extract(
42
37
  self,
43
38
  image: Image,
@@ -83,7 +78,7 @@ class DocExtractor:
83
78
  source=source,
84
79
  imgsz=1024,
85
80
  conf=0.2,
86
- device=self._device # Device to use (e.g., "cuda:0" or "cpu")
81
+ device=self._device # Device to use (e.g., "cuda" or "cpu")
87
82
  )
88
83
  boxes = det_res[0].__dict__["boxes"]
89
84
  layouts: list[Layout] = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: doc-page-extractor
3
- Version: 0.0.6
3
+ Version: 0.0.7
4
4
  Summary: doc page extractor can identify text and format in images and return structured data.
5
5
  Home-page: https://github.com/Moskize91/doc-page-extractor
6
6
  Author: Tao Zeyu
@@ -10,7 +10,6 @@ License-File: LICENSE
10
10
  Requires-Dist: opencv-python<5.0,>=4.11.0
11
11
  Requires-Dist: pillow<11.0,>=10.3
12
12
  Requires-Dist: pyclipper<2.0,>=1.2.0
13
- Requires-Dist: onnxruntime<2.0,>=1.19.0
14
13
  Requires-Dist: numpy<2.0,>=1.24.0
15
14
  Requires-Dist: shapely<3.0,>=2.0.0
16
15
  Requires-Dist: transformers<5.0,>=4.48.0
@@ -37,10 +36,20 @@ doc page extractor can identify text and format in images and return structured
37
36
  pip install doc-page-extractor
38
37
  ```
39
38
 
39
+ ```shell
40
+ pip install onnxruntime==1.21.0
41
+ ```
42
+
40
43
  ## Using CUDA
41
44
 
42
45
  Please refer to the introduction of [PyTorch](https://pytorch.org/get-started/locally/) and select the appropriate command to install according to your operating system.
43
46
 
47
+ In addition, replace the command to install `onnxruntime` in the previous article with the following:
48
+
49
+ ```shell
50
+ pip install onnxruntime-gpu==1.21.0
51
+ ```
52
+
44
53
  ## Example
45
54
 
46
55
  ```python
@@ -49,7 +58,7 @@ from doc_page_extractor import DocExtractor
49
58
 
50
59
  extractor = DocExtractor(
51
60
  model_dir_path=model_path, # Folder address where AI model is downloaded and installed
52
- device="cpu", # If you want to use CUDA, please change to device="cuda:0".
61
+ device="cpu", # If you want to use CUDA, please change to device="cuda".
53
62
  )
54
63
  with Image.open("/path/to/your/image.png") as image:
55
64
  result = extractor.extract(
@@ -1,7 +1,6 @@
1
1
  opencv-python<5.0,>=4.11.0
2
2
  pillow<11.0,>=10.3
3
3
  pyclipper<2.0,>=1.2.0
4
- onnxruntime<2.0,>=1.19.0
5
4
  numpy<2.0,>=1.24.0
6
5
  shapely<3.0,>=2.0.0
7
6
  transformers<5.0,>=4.48.0
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="doc-page-extractor",
5
- version="0.0.6",
5
+ version="0.0.7",
6
6
  author="Tao Zeyu",
7
7
  author_email="i@taozeyu.com",
8
8
  url="https://github.com/Moskize91/doc-page-extractor",
@@ -14,7 +14,6 @@ setup(
14
14
  "opencv-python>=4.11.0,<5.0",
15
15
  "pillow>=10.3,<11.0",
16
16
  "pyclipper>=1.2.0,<2.0",
17
- "onnxruntime>=1.19.0,<2.0",
18
17
  "numpy>=1.24.0,<2.0",
19
18
  "shapely>=2.0.0,<3.0",
20
19
  "transformers>=4.48.0,<5.0",