doc-page-extractor 0.1.0__tar.gz → 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of doc-page-extractor might be problematic. Click here for more details.
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/PKG-INFO +1 -1
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/extractor.py +7 -6
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/ocr.py +1 -1
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/rectangle.py +6 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor.egg-info/PKG-INFO +1 -1
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/setup.py +1 -1
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/LICENSE +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/README.md +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/__init__.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/clipper.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/downloader.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/latex.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/layout_order.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/layoutreader.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/ocr_corrector.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/onnxocr/__init__.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/onnxocr/cls_postprocess.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/onnxocr/db_postprocess.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/onnxocr/imaug.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/onnxocr/operators.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/onnxocr/predict_base.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/onnxocr/predict_cls.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/onnxocr/predict_det.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/onnxocr/predict_rec.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/onnxocr/predict_system.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/onnxocr/rec_postprocess.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/onnxocr/utils.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/overlap.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/plot.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/raw_optimizer.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/rotation.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/struct_eqtable/__init__.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/struct_eqtable/internvl/__init__.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/struct_eqtable/internvl/conversation.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/struct_eqtable/internvl/internvl.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/struct_eqtable/internvl/internvl_lmdeploy.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/struct_eqtable/pix2s/__init__.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/struct_eqtable/pix2s/pix2s.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/struct_eqtable/pix2s/pix2s_trt.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/table.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/types.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/utils.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor.egg-info/SOURCES.txt +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor.egg-info/dependency_links.txt +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor.egg-info/requires.txt +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor.egg-info/top_level.txt +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/setup.cfg +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/tests/__init__.py +0 -0
- {doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/tests/test_history_bus.py +0 -0
|
@@ -116,12 +116,13 @@ class DocExtractor:
|
|
|
116
116
|
lb=(x1, y2),
|
|
117
117
|
rb=(x2, y2),
|
|
118
118
|
)
|
|
119
|
-
if
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
119
|
+
if rect.is_valid:
|
|
120
|
+
if cls == LayoutClass.TABLE:
|
|
121
|
+
yield TableLayout(cls=cls, rect=rect, fragments=[], parsed=None)
|
|
122
|
+
elif cls == LayoutClass.ISOLATE_FORMULA:
|
|
123
|
+
yield FormulaLayout(cls=cls, rect=rect, fragments=[], latex=None)
|
|
124
|
+
else:
|
|
125
|
+
yield PlainLayout(cls=cls, rect=rect, fragments=[])
|
|
125
126
|
|
|
126
127
|
def _layouts_matched_by_fragments(self, fragments: list[OCRFragment], layouts: list[Layout]):
|
|
127
128
|
layouts_group = self._split_layouts_by_group(layouts)
|
|
@@ -19,6 +19,10 @@ class Rectangle:
|
|
|
19
19
|
yield self.rb
|
|
20
20
|
yield self.rt
|
|
21
21
|
|
|
22
|
+
@property
|
|
23
|
+
def is_valid(self) -> bool:
|
|
24
|
+
return Polygon(self).is_valid
|
|
25
|
+
|
|
22
26
|
@property
|
|
23
27
|
def segments(self) -> Generator[tuple[Point, Point], None, None]:
|
|
24
28
|
yield (self.lt, self.lb)
|
|
@@ -60,6 +64,8 @@ class Rectangle:
|
|
|
60
64
|
def intersection_area(rect1: Rectangle, rect2: Rectangle) -> float:
|
|
61
65
|
poly1 = Polygon(rect1)
|
|
62
66
|
poly2 = Polygon(rect2)
|
|
67
|
+
if not poly1.is_valid or not poly2.is_valid:
|
|
68
|
+
return 0.0
|
|
63
69
|
intersection = poly1.intersection(poly2)
|
|
64
70
|
if intersection.is_empty:
|
|
65
71
|
return 0.0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/onnxocr/__init__.py
RENAMED
|
File without changes
|
{doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/onnxocr/cls_postprocess.py
RENAMED
|
File without changes
|
{doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/onnxocr/db_postprocess.py
RENAMED
|
File without changes
|
|
File without changes
|
{doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/onnxocr/operators.py
RENAMED
|
File without changes
|
{doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/onnxocr/predict_base.py
RENAMED
|
File without changes
|
{doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/onnxocr/predict_cls.py
RENAMED
|
File without changes
|
{doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/onnxocr/predict_det.py
RENAMED
|
File without changes
|
{doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/onnxocr/predict_rec.py
RENAMED
|
File without changes
|
{doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/onnxocr/predict_system.py
RENAMED
|
File without changes
|
{doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/onnxocr/rec_postprocess.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor/struct_eqtable/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor.egg-info/requires.txt
RENAMED
|
File without changes
|
{doc_page_extractor-0.1.0 → doc_page_extractor-0.1.1}/doc_page_extractor.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|