doc-page-extractor 0.0.2__tar.gz → 0.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of doc-page-extractor might be problematic. Click here for more details.
- {doc_page_extractor-0.0.2 → doc_page_extractor-0.0.4}/PKG-INFO +1 -2
- {doc_page_extractor-0.0.2 → doc_page_extractor-0.0.4}/doc_page_extractor/extractor.py +74 -78
- {doc_page_extractor-0.0.2 → doc_page_extractor-0.0.4}/doc_page_extractor/ocr.py +44 -11
- doc_page_extractor-0.0.4/doc_page_extractor/ocr_corrector.py +126 -0
- doc_page_extractor-0.0.4/doc_page_extractor/overlap.py +156 -0
- {doc_page_extractor-0.0.2 → doc_page_extractor-0.0.4}/doc_page_extractor/plot.py +2 -2
- {doc_page_extractor-0.0.2 → doc_page_extractor-0.0.4}/doc_page_extractor/rectangle.py +13 -0
- {doc_page_extractor-0.0.2 → doc_page_extractor-0.0.4}/doc_page_extractor.egg-info/PKG-INFO +1 -2
- {doc_page_extractor-0.0.2 → doc_page_extractor-0.0.4}/doc_page_extractor.egg-info/SOURCES.txt +5 -1
- {doc_page_extractor-0.0.2 → doc_page_extractor-0.0.4}/doc_page_extractor.egg-info/requires.txt +0 -1
- {doc_page_extractor-0.0.2 → doc_page_extractor-0.0.4}/doc_page_extractor.egg-info/top_level.txt +1 -0
- {doc_page_extractor-0.0.2 → doc_page_extractor-0.0.4}/setup.py +1 -2
- doc_page_extractor-0.0.4/tests/__init__.py +0 -0
- doc_page_extractor-0.0.4/tests/test_history_bus.py +55 -0
- {doc_page_extractor-0.0.2 → doc_page_extractor-0.0.4}/LICENSE +0 -0
- {doc_page_extractor-0.0.2 → doc_page_extractor-0.0.4}/README.md +0 -0
- {doc_page_extractor-0.0.2 → doc_page_extractor-0.0.4}/doc_page_extractor/__init__.py +0 -0
- {doc_page_extractor-0.0.2 → doc_page_extractor-0.0.4}/doc_page_extractor/clipper.py +0 -0
- {doc_page_extractor-0.0.2 → doc_page_extractor-0.0.4}/doc_page_extractor/downloader.py +0 -0
- {doc_page_extractor-0.0.2 → doc_page_extractor-0.0.4}/doc_page_extractor/layoutreader.py +0 -0
- {doc_page_extractor-0.0.2 → doc_page_extractor-0.0.4}/doc_page_extractor/raw_optimizer.py +0 -0
- {doc_page_extractor-0.0.2 → doc_page_extractor-0.0.4}/doc_page_extractor/rotation.py +0 -0
- {doc_page_extractor-0.0.2 → doc_page_extractor-0.0.4}/doc_page_extractor/types.py +0 -0
- {doc_page_extractor-0.0.2 → doc_page_extractor-0.0.4}/doc_page_extractor/utils.py +0 -0
- {doc_page_extractor-0.0.2 → doc_page_extractor-0.0.4}/doc_page_extractor.egg-info/dependency_links.txt +0 -0
- {doc_page_extractor-0.0.2 → doc_page_extractor-0.0.4}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: doc-page-extractor
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.4
|
|
4
4
|
Summary: doc page extractor can identify text and format in images and return structured data.
|
|
5
5
|
Home-page: https://github.com/Moskize91/doc-page-extractor
|
|
6
6
|
Author: Tao Zeyu
|
|
@@ -12,7 +12,6 @@ Requires-Dist: pillow<11.0,>=10.3
|
|
|
12
12
|
Requires-Dist: shapely<3.0,>=2.0.0
|
|
13
13
|
Requires-Dist: transformers<5.0,>=4.48.0
|
|
14
14
|
Requires-Dist: doclayout_yolo>=0.0.3
|
|
15
|
-
Requires-Dist: paddlepaddle<3.0,>=2.6.0
|
|
16
15
|
Requires-Dist: paddleocr==2.9.0
|
|
17
16
|
Dynamic: author
|
|
18
17
|
Dynamic: author-email
|
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
|
-
import sys
|
|
3
2
|
import torch
|
|
4
|
-
import numpy as np
|
|
5
3
|
|
|
6
|
-
from typing import Literal,
|
|
4
|
+
from typing import Literal, Iterable
|
|
7
5
|
from pathlib import Path
|
|
8
6
|
from PIL.Image import Image
|
|
9
7
|
from transformers import LayoutLMv3ForTokenClassification
|
|
@@ -11,11 +9,13 @@ from doclayout_yolo import YOLOv10
|
|
|
11
9
|
|
|
12
10
|
from .layoutreader import prepare_inputs, boxes2inputs, parse_logits
|
|
13
11
|
from .ocr import OCR, PaddleLang
|
|
12
|
+
from .ocr_corrector import correct_fragments
|
|
14
13
|
from .raw_optimizer import RawOptimizer
|
|
15
14
|
from .rectangle import intersection_area, Rectangle
|
|
16
15
|
from .types import ExtractedResult, OCRFragment, LayoutClass, Layout
|
|
17
16
|
from .downloader import download
|
|
18
|
-
from .
|
|
17
|
+
from .overlap import regroup_lines, remove_overlap_layouts
|
|
18
|
+
from .utils import ensure_dir
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class DocExtractor:
|
|
@@ -23,10 +23,12 @@ class DocExtractor:
|
|
|
23
23
|
self,
|
|
24
24
|
model_dir_path: str,
|
|
25
25
|
device: Literal["cpu", "cuda"] = "cpu",
|
|
26
|
-
|
|
26
|
+
ocr_for_each_layouts: bool = True,
|
|
27
|
+
order_by_layoutreader: bool = False,
|
|
27
28
|
):
|
|
28
29
|
self._model_dir_path: str = model_dir_path
|
|
29
30
|
self._device: Literal["cpu", "cuda"] = device
|
|
31
|
+
self._ocr_for_each_layouts: bool = ocr_for_each_layouts
|
|
30
32
|
self._order_by_layoutreader: bool = order_by_layoutreader
|
|
31
33
|
self._ocr: OCR = OCR(device, os.path.join(model_dir_path, "paddle"))
|
|
32
34
|
self._yolo: YOLOv10 | None = None
|
|
@@ -44,15 +46,28 @@ class DocExtractor:
|
|
|
44
46
|
) -> ExtractedResult:
|
|
45
47
|
|
|
46
48
|
raw_optimizer = RawOptimizer(image, adjust_points)
|
|
47
|
-
fragments = list(self.
|
|
49
|
+
fragments = list(self._ocr.search_fragments(raw_optimizer.image_np, lang))
|
|
48
50
|
raw_optimizer.receive_raw_fragments(fragments)
|
|
49
51
|
|
|
52
|
+
layouts = self._get_layouts(raw_optimizer.image)
|
|
53
|
+
layouts = self._layouts_matched_by_fragments(fragments, layouts)
|
|
54
|
+
layouts = remove_overlap_layouts(layouts)
|
|
55
|
+
|
|
56
|
+
if self._ocr_for_each_layouts:
|
|
57
|
+
self._correct_fragments_by_ocr_layouts(raw_optimizer.image, layouts, lang)
|
|
58
|
+
|
|
50
59
|
if self._order_by_layoutreader:
|
|
51
60
|
width, height = raw_optimizer.image.size
|
|
52
|
-
self.
|
|
61
|
+
self._order_fragments_by_ai(width, height, layouts)
|
|
62
|
+
else:
|
|
63
|
+
self._order_fragments_by_y(layouts)
|
|
53
64
|
|
|
54
|
-
layouts = self.
|
|
55
|
-
|
|
65
|
+
layouts = [layout for layout in layouts if self._should_keep_layout(layout)]
|
|
66
|
+
for layout in layouts:
|
|
67
|
+
layout.fragments = regroup_lines(layout.fragments)
|
|
68
|
+
layout.fragments.sort(key=lambda fragment: fragment.order)
|
|
69
|
+
|
|
70
|
+
layouts = self._sort_layouts(layouts)
|
|
56
71
|
raw_optimizer.receive_raw_layouts(layouts)
|
|
57
72
|
|
|
58
73
|
return ExtractedResult(
|
|
@@ -62,57 +77,6 @@ class DocExtractor:
|
|
|
62
77
|
adjusted_image=raw_optimizer.adjusted_image,
|
|
63
78
|
)
|
|
64
79
|
|
|
65
|
-
def _search_orc_fragments(self, image: np.ndarray, lang: PaddleLang) -> Generator[OCRFragment, None, None]:
|
|
66
|
-
index: int = 0
|
|
67
|
-
for item in self._ocr.do(lang, image):
|
|
68
|
-
for line in item:
|
|
69
|
-
react: list[list[float]] = line[0]
|
|
70
|
-
text, rank = line[1]
|
|
71
|
-
if is_space_text(text):
|
|
72
|
-
continue
|
|
73
|
-
yield OCRFragment(
|
|
74
|
-
order=index,
|
|
75
|
-
text=text,
|
|
76
|
-
rank=rank,
|
|
77
|
-
rect=Rectangle(
|
|
78
|
-
lt=(react[0][0], react[0][1]),
|
|
79
|
-
rt=(react[1][0], react[1][1]),
|
|
80
|
-
rb=(react[2][0], react[2][1]),
|
|
81
|
-
lb=(react[3][0], react[3][1]),
|
|
82
|
-
),
|
|
83
|
-
)
|
|
84
|
-
index += 1
|
|
85
|
-
|
|
86
|
-
def _order_fragments(self, width: int, height: int, fragments: list[OCRFragment]):
|
|
87
|
-
layout_model = self._get_layout()
|
|
88
|
-
boxes: list[list[int]] = []
|
|
89
|
-
steps: float = 1000.0 # max value of layoutreader
|
|
90
|
-
x_rate: float = 1.0
|
|
91
|
-
y_rate: float = 1.0
|
|
92
|
-
x_offset: float = 0.0
|
|
93
|
-
y_offset: float = 0.0
|
|
94
|
-
if width > height:
|
|
95
|
-
y_rate = height / width
|
|
96
|
-
y_offset = (1.0 - y_rate) / 2.0
|
|
97
|
-
else:
|
|
98
|
-
x_rate = width / height
|
|
99
|
-
x_offset = (1.0 - x_rate) / 2.0
|
|
100
|
-
|
|
101
|
-
for left, top, right, bottom in self._collect_rate_boxes(fragments):
|
|
102
|
-
boxes.append([
|
|
103
|
-
round((left * x_rate + x_offset) * steps),
|
|
104
|
-
round((top * y_rate + y_offset) * steps),
|
|
105
|
-
round((right * x_rate + x_offset) * steps),
|
|
106
|
-
round((bottom * y_rate + y_offset) * steps),
|
|
107
|
-
])
|
|
108
|
-
inputs = boxes2inputs(boxes)
|
|
109
|
-
inputs = prepare_inputs(inputs, layout_model)
|
|
110
|
-
logits = layout_model(**inputs).logits.cpu().squeeze(0)
|
|
111
|
-
orders: list[int] = parse_logits(logits, len(boxes))
|
|
112
|
-
|
|
113
|
-
for order, fragment in zip(orders, fragments):
|
|
114
|
-
fragment.order = order
|
|
115
|
-
|
|
116
80
|
def _get_layouts(self, source: Image) -> list[Layout]:
|
|
117
81
|
# about source parameter to see:
|
|
118
82
|
# https://github.com/opendatalab/DocLayout-YOLO/blob/7c4be36bc61f11b67cf4a44ee47f3c41e9800a91/doclayout_yolo/data/build.py#L157-L175
|
|
@@ -152,14 +116,11 @@ class DocExtractor:
|
|
|
152
116
|
if layout is not None:
|
|
153
117
|
layout.fragments.append(fragment)
|
|
154
118
|
break
|
|
119
|
+
return layouts
|
|
155
120
|
|
|
121
|
+
def _correct_fragments_by_ocr_layouts(self, source: Image, layouts: list[Layout], lang: PaddleLang):
|
|
156
122
|
for layout in layouts:
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
layouts = [layout for layout in layouts if self._should_keep_layout(layout)]
|
|
160
|
-
layouts = self._sort_layouts(layouts)
|
|
161
|
-
|
|
162
|
-
return layouts
|
|
123
|
+
correct_fragments(self._ocr, source, layout, lang)
|
|
163
124
|
|
|
164
125
|
def _split_layouts_by_group(self, layouts: list[Layout]):
|
|
165
126
|
texts_layouts: list[Layout] = []
|
|
@@ -197,13 +158,6 @@ class DocExtractor:
|
|
|
197
158
|
|
|
198
159
|
return min_layout
|
|
199
160
|
|
|
200
|
-
def _layout_order(self, layout: Layout) -> int:
|
|
201
|
-
fragments = layout.fragments
|
|
202
|
-
if len(fragments) == 0:
|
|
203
|
-
return sys.maxsize
|
|
204
|
-
else:
|
|
205
|
-
return fragments[0].order
|
|
206
|
-
|
|
207
161
|
def _get_yolo(self) -> YOLOv10:
|
|
208
162
|
if self._yolo is None:
|
|
209
163
|
yolo_model_url = "https://huggingface.co/opendatalab/PDF-Extract-Kit-1.0/resolve/main/models/Layout/YOLO/doclayout_yolo_ft.pt"
|
|
@@ -214,6 +168,44 @@ class DocExtractor:
|
|
|
214
168
|
self._yolo = YOLOv10(str(yolo_model_path))
|
|
215
169
|
return self._yolo
|
|
216
170
|
|
|
171
|
+
def _order_fragments_by_y(self, layouts: list[Layout]):
|
|
172
|
+
fragments = list(self._iter_fragments(layouts))
|
|
173
|
+
fragments.sort(key=lambda f: f.rect.lt[1] + f.rect.rt[1])
|
|
174
|
+
for i, fragment in enumerate(fragments):
|
|
175
|
+
fragment.order = i
|
|
176
|
+
|
|
177
|
+
def _order_fragments_by_ai(self, width: int, height: int, layouts: list[Layout]):
|
|
178
|
+
layout_model = self._get_layout()
|
|
179
|
+
boxes: list[list[int]] = []
|
|
180
|
+
steps: float = 1000.0 # max value of layoutreader
|
|
181
|
+
x_rate: float = 1.0
|
|
182
|
+
y_rate: float = 1.0
|
|
183
|
+
x_offset: float = 0.0
|
|
184
|
+
y_offset: float = 0.0
|
|
185
|
+
if width > height:
|
|
186
|
+
y_rate = height / width
|
|
187
|
+
y_offset = (1.0 - y_rate) / 2.0
|
|
188
|
+
else:
|
|
189
|
+
x_rate = width / height
|
|
190
|
+
x_offset = (1.0 - x_rate) / 2.0
|
|
191
|
+
|
|
192
|
+
for left, top, right, bottom in self._collect_rate_boxes(
|
|
193
|
+
fragments=self._iter_fragments(layouts),
|
|
194
|
+
):
|
|
195
|
+
boxes.append([
|
|
196
|
+
round((left * x_rate + x_offset) * steps),
|
|
197
|
+
round((top * y_rate + y_offset) * steps),
|
|
198
|
+
round((right * x_rate + x_offset) * steps),
|
|
199
|
+
round((bottom * y_rate + y_offset) * steps),
|
|
200
|
+
])
|
|
201
|
+
inputs = boxes2inputs(boxes)
|
|
202
|
+
inputs = prepare_inputs(inputs, layout_model)
|
|
203
|
+
logits = layout_model(**inputs).logits.cpu().squeeze(0)
|
|
204
|
+
orders: list[int] = parse_logits(logits, len(boxes))
|
|
205
|
+
|
|
206
|
+
for order, fragment in zip(orders, self._iter_fragments(layouts)):
|
|
207
|
+
fragment.order = order
|
|
208
|
+
|
|
217
209
|
def _get_layout(self) -> LayoutLMv3ForTokenClassification:
|
|
218
210
|
if self._layout is None:
|
|
219
211
|
cache_dir = ensure_dir(
|
|
@@ -237,6 +229,8 @@ class DocExtractor:
|
|
|
237
229
|
)
|
|
238
230
|
|
|
239
231
|
def _sort_layouts(self, layouts: list[Layout]) -> list[Layout]:
|
|
232
|
+
layouts.sort(key=lambda layout: layout.rect.lt[1] + layout.rect.rt[1])
|
|
233
|
+
|
|
240
234
|
sorted_layouts: list[tuple[int, Layout]] = []
|
|
241
235
|
empty_layouts: list[tuple[int, Layout]] = []
|
|
242
236
|
|
|
@@ -246,11 +240,9 @@ class DocExtractor:
|
|
|
246
240
|
else:
|
|
247
241
|
empty_layouts.append((i, layout))
|
|
248
242
|
|
|
249
|
-
sorted_layouts.sort(key=lambda x: x[1].fragments[0].order)
|
|
250
|
-
|
|
251
243
|
# try to maintain the order of empty layouts and other layouts as much as possible
|
|
252
244
|
for i, layout in empty_layouts:
|
|
253
|
-
max_less_index: int =
|
|
245
|
+
max_less_index: int = -1
|
|
254
246
|
max_less_layout: Layout | None = None
|
|
255
247
|
max_less_index_in_enumerated: int = -1
|
|
256
248
|
for j, (k, sorted_layout) in enumerate(sorted_layouts):
|
|
@@ -266,7 +258,7 @@ class DocExtractor:
|
|
|
266
258
|
|
|
267
259
|
return [layout for _, layout in sorted_layouts]
|
|
268
260
|
|
|
269
|
-
def _collect_rate_boxes(self, fragments:
|
|
261
|
+
def _collect_rate_boxes(self, fragments: Iterable[OCRFragment]):
|
|
270
262
|
boxes = self._get_boxes(fragments)
|
|
271
263
|
left = float("inf")
|
|
272
264
|
top = float("inf")
|
|
@@ -290,7 +282,7 @@ class DocExtractor:
|
|
|
290
282
|
(_bottom - top) / height,
|
|
291
283
|
)
|
|
292
284
|
|
|
293
|
-
def _get_boxes(self, fragments:
|
|
285
|
+
def _get_boxes(self, fragments: Iterable[OCRFragment]):
|
|
294
286
|
boxes: list[tuple[float, float, float, float]] = []
|
|
295
287
|
for fragment in fragments:
|
|
296
288
|
left: float = float("inf")
|
|
@@ -304,3 +296,7 @@ class DocExtractor:
|
|
|
304
296
|
bottom = max(bottom, y)
|
|
305
297
|
boxes.append((left, top, right, bottom))
|
|
306
298
|
return boxes
|
|
299
|
+
|
|
300
|
+
def _iter_fragments(self, layouts: list[Layout]):
|
|
301
|
+
for layout in layouts:
|
|
302
|
+
yield from layout.fragments
|
|
@@ -2,9 +2,11 @@ import os
|
|
|
2
2
|
import numpy as np
|
|
3
3
|
import cv2
|
|
4
4
|
|
|
5
|
-
from typing import Literal,
|
|
5
|
+
from typing import Any, Literal, Generator
|
|
6
6
|
from paddleocr import PaddleOCR
|
|
7
|
-
from .
|
|
7
|
+
from .types import OCRFragment
|
|
8
|
+
from .rectangle import Rectangle
|
|
9
|
+
from .utils import is_space_text, ensure_dir
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
# https://github.com/PaddlePaddle/PaddleOCR/blob/2c0c4beb0606819735a16083cdebf652939c781a/paddleocr.py#L108-L157
|
|
@@ -16,16 +18,33 @@ class OCR:
|
|
|
16
18
|
self,
|
|
17
19
|
device: Literal["cpu", "cuda"],
|
|
18
20
|
model_dir_path: str,
|
|
19
|
-
bin: bool = True,
|
|
20
|
-
inv: bool = False,
|
|
21
21
|
):
|
|
22
22
|
self._device: Literal["cpu", "cuda"] = device
|
|
23
23
|
self._model_dir_path: str = model_dir_path
|
|
24
24
|
self._ocr_and_lan: tuple[PaddleOCR, PaddleLang] | None = None
|
|
25
|
-
self._bin: bool = bin
|
|
26
|
-
self._inv: bool = inv
|
|
27
25
|
|
|
28
|
-
def
|
|
26
|
+
def search_fragments(self, image: np.ndarray, lang: PaddleLang) -> Generator[OCRFragment, None, None]:
|
|
27
|
+
index: int = 0
|
|
28
|
+
for item in self._handle(lang, image):
|
|
29
|
+
for line in item:
|
|
30
|
+
react: list[list[float]] = line[0]
|
|
31
|
+
text, rank = line[1]
|
|
32
|
+
if is_space_text(text):
|
|
33
|
+
continue
|
|
34
|
+
yield OCRFragment(
|
|
35
|
+
order=index,
|
|
36
|
+
text=text,
|
|
37
|
+
rank=rank,
|
|
38
|
+
rect=Rectangle(
|
|
39
|
+
lt=(react[0][0], react[0][1]),
|
|
40
|
+
rt=(react[1][0], react[1][1]),
|
|
41
|
+
rb=(react[2][0], react[2][1]),
|
|
42
|
+
lb=(react[3][0], react[3][1]),
|
|
43
|
+
),
|
|
44
|
+
)
|
|
45
|
+
index += 1
|
|
46
|
+
|
|
47
|
+
def _handle(self, lang: PaddleLang, image: np.ndarray) -> list[Any]:
|
|
29
48
|
ocr = self._get_ocr(lang)
|
|
30
49
|
image = self._preprocess_image(image)
|
|
31
50
|
# about img parameter to see
|
|
@@ -59,10 +78,24 @@ class OCR:
|
|
|
59
78
|
|
|
60
79
|
def _preprocess_image(self, image: np.ndarray) -> np.ndarray:
|
|
61
80
|
image = self._alpha_to_color(image, (255, 255, 255))
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
81
|
+
# image = cv2.bitwise_not(image) # inv
|
|
82
|
+
# image = self._binarize_img(image) # bin
|
|
83
|
+
image = cv2.normalize(
|
|
84
|
+
src=image,
|
|
85
|
+
dst=np.zeros((image.shape[0], image.shape[1])),
|
|
86
|
+
alpha=0,
|
|
87
|
+
beta=255,
|
|
88
|
+
norm_type=cv2.NORM_MINMAX,
|
|
89
|
+
)
|
|
90
|
+
image = cv2.fastNlMeansDenoisingColored(
|
|
91
|
+
src=image,
|
|
92
|
+
dst=None,
|
|
93
|
+
h=10,
|
|
94
|
+
hColor=10,
|
|
95
|
+
templateWindowSize=7,
|
|
96
|
+
searchWindowSize=15,
|
|
97
|
+
)
|
|
98
|
+
# image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # image to gray
|
|
66
99
|
return image
|
|
67
100
|
|
|
68
101
|
def _alpha_to_color(self, image: np.ndarray, alpha_color: tuple[float, float, float]) -> np.ndarray:
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from typing import Iterable
|
|
4
|
+
from shapely.geometry import Polygon
|
|
5
|
+
from PIL.Image import new, Image, Resampling
|
|
6
|
+
from .types import Layout, OCRFragment
|
|
7
|
+
from .ocr import OCR, PaddleLang
|
|
8
|
+
from .overlap import overlap_rate
|
|
9
|
+
from .rectangle import Point, Rectangle
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
_MIN_RATE = 0.5
|
|
13
|
+
|
|
14
|
+
def correct_fragments(ocr: OCR, source: Image, layout: Layout, lang: PaddleLang):
|
|
15
|
+
x1, y1, x2, y2 = layout.rect.wrapper
|
|
16
|
+
image: Image = source.crop((
|
|
17
|
+
round(x1), round(y1),
|
|
18
|
+
round(x2), round(y2),
|
|
19
|
+
))
|
|
20
|
+
image, dx, dy, scale = _adjust_image(image)
|
|
21
|
+
image_np = np.array(image)
|
|
22
|
+
ocr_fragments = list(ocr.search_fragments(image_np, lang))
|
|
23
|
+
corrected_fragments: list[OCRFragment] = []
|
|
24
|
+
|
|
25
|
+
for fragment in ocr_fragments:
|
|
26
|
+
_apply_fragment(fragment.rect, layout, dx, dy, scale)
|
|
27
|
+
|
|
28
|
+
matched_fragments, not_matched_fragments = _match_fragments(
|
|
29
|
+
zone_rect=layout.rect,
|
|
30
|
+
fragments1=layout.fragments,
|
|
31
|
+
fragments2=ocr_fragments,
|
|
32
|
+
)
|
|
33
|
+
for fragment1, fragment2 in matched_fragments:
|
|
34
|
+
if fragment1.rank > fragment2.rank:
|
|
35
|
+
corrected_fragments.append(fragment1)
|
|
36
|
+
else:
|
|
37
|
+
corrected_fragments.append(fragment2)
|
|
38
|
+
|
|
39
|
+
corrected_fragments.extend(not_matched_fragments)
|
|
40
|
+
layout.fragments = corrected_fragments
|
|
41
|
+
|
|
42
|
+
def _adjust_image(image: Image) -> tuple[Image, int, int, float]:
|
|
43
|
+
# after testing, adding white borders to images can reduce
|
|
44
|
+
# the possibility of some text not being recognized
|
|
45
|
+
border_size: int = 50
|
|
46
|
+
adjusted_size: int = 1024 - 2 * border_size
|
|
47
|
+
width, height = image.size
|
|
48
|
+
core_width = float(max(adjusted_size, width))
|
|
49
|
+
core_height = float(max(adjusted_size, height))
|
|
50
|
+
|
|
51
|
+
scale_x = core_width / width
|
|
52
|
+
scale_y = core_height / height
|
|
53
|
+
scale = min(scale_x, scale_y)
|
|
54
|
+
adjusted_width = width * scale
|
|
55
|
+
adjusted_height = height * scale
|
|
56
|
+
|
|
57
|
+
dx = (core_width - adjusted_width) / 2.0
|
|
58
|
+
dy = (core_height - adjusted_height) / 2.0
|
|
59
|
+
dx = round(dx) + border_size
|
|
60
|
+
dy = round(dy) + border_size
|
|
61
|
+
|
|
62
|
+
if scale != 1.0:
|
|
63
|
+
width = round(width * scale)
|
|
64
|
+
height = round(height * scale)
|
|
65
|
+
image = image.resize((width, height), Resampling.BICUBIC)
|
|
66
|
+
|
|
67
|
+
width = round(core_width) + 2 * border_size
|
|
68
|
+
height = round(core_height) + 2 * border_size
|
|
69
|
+
new_image = new("RGB", (width, height), (255, 255, 255))
|
|
70
|
+
new_image.paste(image, (dx, dy))
|
|
71
|
+
|
|
72
|
+
return new_image, dx, dy, scale
|
|
73
|
+
|
|
74
|
+
def _apply_fragment(rect: Rectangle, layout: Layout, dx: int, dy: int, scale: float):
|
|
75
|
+
rect.lt = _apply_point(rect.lt, layout, dx, dy, scale)
|
|
76
|
+
rect.lb = _apply_point(rect.lb, layout, dx, dy, scale)
|
|
77
|
+
rect.rb = _apply_point(rect.rb, layout, dx, dy, scale)
|
|
78
|
+
rect.rt = _apply_point(rect.rt, layout, dx, dy, scale)
|
|
79
|
+
|
|
80
|
+
def _apply_point(point: Point, layout: Layout, dx: int, dy: int, scale: float) -> Point:
|
|
81
|
+
x, y = point
|
|
82
|
+
x = (x - dx) / scale + layout.rect.lt[0]
|
|
83
|
+
y = (y - dy) / scale + layout.rect.lt[1]
|
|
84
|
+
return x, y
|
|
85
|
+
|
|
86
|
+
def _match_fragments(
|
|
87
|
+
zone_rect: Rectangle,
|
|
88
|
+
fragments1: Iterable[OCRFragment],
|
|
89
|
+
fragments2: Iterable[OCRFragment],
|
|
90
|
+
) -> tuple[list[tuple[OCRFragment, OCRFragment]], list[OCRFragment]]:
|
|
91
|
+
|
|
92
|
+
zone_polygon = Polygon(zone_rect)
|
|
93
|
+
fragments2: list[OCRFragment] = list(fragments2)
|
|
94
|
+
matched_fragments: list[tuple[OCRFragment, OCRFragment]] = []
|
|
95
|
+
not_matched_fragments: list[OCRFragment] = []
|
|
96
|
+
|
|
97
|
+
for fragment1 in fragments1:
|
|
98
|
+
polygon1 = Polygon(fragment1.rect)
|
|
99
|
+
polygon1 = zone_polygon.intersection(polygon1)
|
|
100
|
+
if polygon1.is_empty:
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
beast_j = -1
|
|
104
|
+
beast_rate = 0.0
|
|
105
|
+
|
|
106
|
+
for j, fragment2 in enumerate(fragments2):
|
|
107
|
+
polygon2 = Polygon(fragment2.rect)
|
|
108
|
+
rate = overlap_rate(polygon1, polygon2)
|
|
109
|
+
if rate < _MIN_RATE:
|
|
110
|
+
continue
|
|
111
|
+
|
|
112
|
+
if rate > beast_rate:
|
|
113
|
+
beast_j = j
|
|
114
|
+
beast_rate = rate
|
|
115
|
+
|
|
116
|
+
if beast_j != -1:
|
|
117
|
+
matched_fragments.append((
|
|
118
|
+
fragment1,
|
|
119
|
+
fragments2[beast_j],
|
|
120
|
+
))
|
|
121
|
+
del fragments2[beast_j]
|
|
122
|
+
else:
|
|
123
|
+
not_matched_fragments.append(fragment1)
|
|
124
|
+
|
|
125
|
+
not_matched_fragments.extend(fragments2)
|
|
126
|
+
return matched_fragments, not_matched_fragments
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
from typing import Generator
|
|
2
|
+
from shapely.geometry import Polygon
|
|
3
|
+
from .types import Layout, OCRFragment
|
|
4
|
+
from .rectangle import Rectangle
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
_INCLUDES_MIN_RATE = 0.99
|
|
8
|
+
|
|
9
|
+
def remove_overlap_layouts(layouts: list[Layout]) -> list[Layout]:
|
|
10
|
+
ctx = _OverlapMatrixContext(layouts)
|
|
11
|
+
# the reason for repeating this multiple times is that deleting a layout
|
|
12
|
+
# may cause its parent layout to change from an originally non-deletable
|
|
13
|
+
# state to a deletable state.
|
|
14
|
+
while True:
|
|
15
|
+
removed_count = len(ctx.removed_indexes)
|
|
16
|
+
for i, layout in enumerate(layouts):
|
|
17
|
+
if i in ctx.removed_indexes or \
|
|
18
|
+
any(0.0 < rate < _INCLUDES_MIN_RATE for rate in ctx.rates_with_other(i)) or \
|
|
19
|
+
all(0.0 == rate for rate in ctx.rates_with_other(i)):
|
|
20
|
+
continue
|
|
21
|
+
|
|
22
|
+
if len(layout.fragments) == 0:
|
|
23
|
+
ctx.removed_indexes.add(i)
|
|
24
|
+
else:
|
|
25
|
+
for j in ctx.search_includes_indexes(i):
|
|
26
|
+
ctx.removed_indexes.add(j)
|
|
27
|
+
layout.fragments.extend(layouts[j].fragments)
|
|
28
|
+
|
|
29
|
+
if len(ctx.removed_indexes) == removed_count:
|
|
30
|
+
break
|
|
31
|
+
|
|
32
|
+
return [
|
|
33
|
+
layout for i, layout in enumerate(layouts)
|
|
34
|
+
if i not in ctx.removed_indexes
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
class _OverlapMatrixContext:
|
|
38
|
+
def __init__(self, layouts: list[Layout]):
|
|
39
|
+
length: int = len(layouts)
|
|
40
|
+
polygons: list[Polygon] = [Polygon(layout.rect) for layout in layouts]
|
|
41
|
+
self.rate_matrix: list[list[float]] = [[1.0 for _ in range(length)] for _ in range(length)]
|
|
42
|
+
self.removed_indexes: set[int] = set()
|
|
43
|
+
for i in range(length):
|
|
44
|
+
polygon1 = polygons[i]
|
|
45
|
+
rates = self.rate_matrix[i]
|
|
46
|
+
for j in range(length):
|
|
47
|
+
if i != j:
|
|
48
|
+
polygon2 = polygons[j]
|
|
49
|
+
rates[j] = overlap_rate(polygon1, polygon2)
|
|
50
|
+
|
|
51
|
+
def rates_with_other(self, index: int):
|
|
52
|
+
for i, rate in enumerate(self.rate_matrix[index]):
|
|
53
|
+
if i != index and i not in self.removed_indexes:
|
|
54
|
+
yield rate
|
|
55
|
+
|
|
56
|
+
def search_includes_indexes(self, index: int):
|
|
57
|
+
for i, rate in enumerate(self.rate_matrix[index]):
|
|
58
|
+
if i != index and \
|
|
59
|
+
i not in self.removed_indexes and \
|
|
60
|
+
rate >= _INCLUDES_MIN_RATE:
|
|
61
|
+
yield i
|
|
62
|
+
|
|
63
|
+
def regroup_lines(origin_fragments: list[OCRFragment]) -> list[OCRFragment]:
|
|
64
|
+
fragments: list[OCRFragment] = []
|
|
65
|
+
for group in _split_fragments_into_groups(origin_fragments):
|
|
66
|
+
if len(group) == 1:
|
|
67
|
+
fragments.append(group[0])
|
|
68
|
+
continue
|
|
69
|
+
|
|
70
|
+
min_order: float = float("inf")
|
|
71
|
+
texts: list[str] = []
|
|
72
|
+
text_rate_weights: float = 0.0
|
|
73
|
+
proto_texts_len: int = 0
|
|
74
|
+
|
|
75
|
+
x1: float = float("inf")
|
|
76
|
+
y1: float = float("inf")
|
|
77
|
+
x2: float = float("-inf")
|
|
78
|
+
y2: float = float("-inf")
|
|
79
|
+
|
|
80
|
+
for fragment in sorted(group, key=lambda x: x.rect.lt[0] + x.rect.lb[0]):
|
|
81
|
+
proto_texts_len += len(fragment.text)
|
|
82
|
+
text_rate_weights += fragment.rank * len(fragment.text)
|
|
83
|
+
texts.append(fragment.text)
|
|
84
|
+
min_order = min(min_order, fragment.order)
|
|
85
|
+
for x, y in fragment.rect:
|
|
86
|
+
x1 = min(x1, x)
|
|
87
|
+
y1 = min(y1, y)
|
|
88
|
+
x2 = max(x2, x)
|
|
89
|
+
y2 = max(y2, y)
|
|
90
|
+
|
|
91
|
+
fragments.append(OCRFragment(
|
|
92
|
+
order=min_order,
|
|
93
|
+
text=" ".join(texts),
|
|
94
|
+
rank=text_rate_weights / proto_texts_len,
|
|
95
|
+
rect=Rectangle(
|
|
96
|
+
lt=(x1, y1),
|
|
97
|
+
rt=(x2, y1),
|
|
98
|
+
lb=(x1, y2),
|
|
99
|
+
rb=(x2, y2),
|
|
100
|
+
),
|
|
101
|
+
))
|
|
102
|
+
return fragments
|
|
103
|
+
|
|
104
|
+
def _split_fragments_into_groups(fragments: list[OCRFragment]) -> Generator[list[OCRFragment], None, None]:
|
|
105
|
+
group: list[OCRFragment] = []
|
|
106
|
+
sum_height: float = 0.0
|
|
107
|
+
sum_median: float = 0.0
|
|
108
|
+
max_deviation_rate = 0.35
|
|
109
|
+
|
|
110
|
+
for fragment in sorted(fragments, key=lambda x: x.rect.lt[1] + x.rect.rt[1]):
|
|
111
|
+
_, y1, _, y2 = fragment.rect.wrapper
|
|
112
|
+
height = y2 - y1
|
|
113
|
+
median = (y1 + y2) / 2.0
|
|
114
|
+
|
|
115
|
+
if len(group) > 0:
|
|
116
|
+
next_mean_median = (sum_median + median) / (len(group) + 1)
|
|
117
|
+
next_mean_height = (sum_height + height) / (len(group) + 1)
|
|
118
|
+
deviation_rate = abs(median - next_mean_median) / next_mean_height
|
|
119
|
+
|
|
120
|
+
if deviation_rate > max_deviation_rate:
|
|
121
|
+
yield group
|
|
122
|
+
group = []
|
|
123
|
+
sum_height = 0.0
|
|
124
|
+
sum_median = 0.0
|
|
125
|
+
|
|
126
|
+
group.append(fragment)
|
|
127
|
+
sum_height += height
|
|
128
|
+
sum_median += median
|
|
129
|
+
|
|
130
|
+
if len(group) > 0:
|
|
131
|
+
yield group
|
|
132
|
+
|
|
133
|
+
# calculating overlap ratio: The reason why area is not used is
|
|
134
|
+
# that most of the measurements are of rectangles representing text lines.
|
|
135
|
+
# they are very sensitive to changes in height because they are very thin and long.
|
|
136
|
+
# In order to make it equally sensitive to length and width, the ratio of area is not used.
|
|
137
|
+
def overlap_rate(polygon1: Polygon, polygon2: Polygon) -> float:
|
|
138
|
+
intersection: Polygon = polygon1.intersection(polygon2)
|
|
139
|
+
if intersection.is_empty:
|
|
140
|
+
return 0.0
|
|
141
|
+
else:
|
|
142
|
+
overlay_width, overlay_height = _polygon_size(intersection)
|
|
143
|
+
polygon2_width, polygon2_height = _polygon_size(polygon2)
|
|
144
|
+
return (overlay_width / polygon2_width + overlay_height / polygon2_height) / 2.0
|
|
145
|
+
|
|
146
|
+
def _polygon_size(polygon: Polygon) -> tuple[float, float]:
|
|
147
|
+
x1: float = float("inf")
|
|
148
|
+
y1: float = float("inf")
|
|
149
|
+
x2: float = float("-inf")
|
|
150
|
+
y2: float = float("-inf")
|
|
151
|
+
for x, y in polygon.exterior.coords:
|
|
152
|
+
x1 = min(x1, x)
|
|
153
|
+
y1 = min(y1, y)
|
|
154
|
+
x2 = max(x2, x)
|
|
155
|
+
y2 = max(y2, y)
|
|
156
|
+
return x2 - x1, y2 - y1
|
|
@@ -8,11 +8,11 @@ _FRAGMENT_COLOR = (0x49, 0xCF, 0xCB) # Light Green
|
|
|
8
8
|
def plot(image: Image, layouts: Iterable[Layout]):
|
|
9
9
|
draw = ImageDraw.Draw(image, mode="RGBA")
|
|
10
10
|
for layout in layouts:
|
|
11
|
-
draw.polygon([p for p in layout.rect], outline=_layout_color(layout), width=
|
|
11
|
+
draw.polygon([p for p in layout.rect], outline=_layout_color(layout), width=5)
|
|
12
12
|
|
|
13
13
|
for layout in layouts:
|
|
14
14
|
for fragments in layout.fragments:
|
|
15
|
-
draw.polygon([p for p in fragments.rect], outline=_FRAGMENT_COLOR, width=
|
|
15
|
+
draw.polygon([p for p in fragments.rect], outline=_FRAGMENT_COLOR, width=3)
|
|
16
16
|
|
|
17
17
|
def _layout_color(layout: Layout) -> tuple[int, int, int]:
|
|
18
18
|
cls = layout.cls
|
|
@@ -44,6 +44,19 @@ class Rectangle:
|
|
|
44
44
|
width += distance
|
|
45
45
|
return width / 2, height / 2
|
|
46
46
|
|
|
47
|
+
@property
|
|
48
|
+
def wrapper(self) -> tuple[float, float, float, float]:
|
|
49
|
+
x1: float = float("inf")
|
|
50
|
+
y1: float = float("inf")
|
|
51
|
+
x2: float = float("-inf")
|
|
52
|
+
y2: float = float("-inf")
|
|
53
|
+
for x, y in self:
|
|
54
|
+
x1 = min(x1, x)
|
|
55
|
+
y1 = min(y1, y)
|
|
56
|
+
x2 = max(x2, x)
|
|
57
|
+
y2 = max(y2, y)
|
|
58
|
+
return x1, y1, x2, y2
|
|
59
|
+
|
|
47
60
|
def intersection_area(rect1: Rectangle, rect2: Rectangle) -> float:
|
|
48
61
|
poly1 = Polygon(rect1)
|
|
49
62
|
poly2 = Polygon(rect2)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: doc-page-extractor
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.4
|
|
4
4
|
Summary: doc page extractor can identify text and format in images and return structured data.
|
|
5
5
|
Home-page: https://github.com/Moskize91/doc-page-extractor
|
|
6
6
|
Author: Tao Zeyu
|
|
@@ -12,7 +12,6 @@ Requires-Dist: pillow<11.0,>=10.3
|
|
|
12
12
|
Requires-Dist: shapely<3.0,>=2.0.0
|
|
13
13
|
Requires-Dist: transformers<5.0,>=4.48.0
|
|
14
14
|
Requires-Dist: doclayout_yolo>=0.0.3
|
|
15
|
-
Requires-Dist: paddlepaddle<3.0,>=2.6.0
|
|
16
15
|
Requires-Dist: paddleocr==2.9.0
|
|
17
16
|
Dynamic: author
|
|
18
17
|
Dynamic: author-email
|
{doc_page_extractor-0.0.2 → doc_page_extractor-0.0.4}/doc_page_extractor.egg-info/SOURCES.txt
RENAMED
|
@@ -7,6 +7,8 @@ doc_page_extractor/downloader.py
|
|
|
7
7
|
doc_page_extractor/extractor.py
|
|
8
8
|
doc_page_extractor/layoutreader.py
|
|
9
9
|
doc_page_extractor/ocr.py
|
|
10
|
+
doc_page_extractor/ocr_corrector.py
|
|
11
|
+
doc_page_extractor/overlap.py
|
|
10
12
|
doc_page_extractor/plot.py
|
|
11
13
|
doc_page_extractor/raw_optimizer.py
|
|
12
14
|
doc_page_extractor/rectangle.py
|
|
@@ -17,4 +19,6 @@ doc_page_extractor.egg-info/PKG-INFO
|
|
|
17
19
|
doc_page_extractor.egg-info/SOURCES.txt
|
|
18
20
|
doc_page_extractor.egg-info/dependency_links.txt
|
|
19
21
|
doc_page_extractor.egg-info/requires.txt
|
|
20
|
-
doc_page_extractor.egg-info/top_level.txt
|
|
22
|
+
doc_page_extractor.egg-info/top_level.txt
|
|
23
|
+
tests/__init__.py
|
|
24
|
+
tests/test_history_bus.py
|
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name="doc-page-extractor",
|
|
5
|
-
version="0.0.
|
|
5
|
+
version="0.0.4",
|
|
6
6
|
author="Tao Zeyu",
|
|
7
7
|
author_email="i@taozeyu.com",
|
|
8
8
|
url="https://github.com/Moskize91/doc-page-extractor",
|
|
@@ -16,7 +16,6 @@ setup(
|
|
|
16
16
|
"shapely>=2.0.0,<3.0",
|
|
17
17
|
"transformers>=4.48.0,<5.0",
|
|
18
18
|
"doclayout_yolo>=0.0.3",
|
|
19
|
-
"paddlepaddle>=2.6.0,<3.0",
|
|
20
19
|
"paddleocr==2.9.0", # https://github.com/Moskize91/doc-page-extractor/issues/3
|
|
21
20
|
],
|
|
22
21
|
)
|
|
File without changes
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import unittest
|
|
3
|
+
|
|
4
|
+
from PIL import Image
|
|
5
|
+
from doc_page_extractor import DocExtractor, Layout, LayoutClass
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TestGroup(unittest.TestCase):
|
|
9
|
+
def test_history_bugs(self):
|
|
10
|
+
model_path = os.path.join(self._project_path(), "model")
|
|
11
|
+
image_path = os.path.join(self._project_path(), "tests", "images", "figure.png")
|
|
12
|
+
os.makedirs(model_path, exist_ok=True)
|
|
13
|
+
|
|
14
|
+
extractor = DocExtractor(model_path, "cpu")
|
|
15
|
+
layouts: list[tuple[LayoutClass, list[str]]]
|
|
16
|
+
|
|
17
|
+
with Image.open(image_path) as image:
|
|
18
|
+
result = extractor.extract(image, "ch")
|
|
19
|
+
layouts = [self._format_Layout(layout) for layout in result.layouts]
|
|
20
|
+
|
|
21
|
+
self.assertEqual(layouts, [
|
|
22
|
+
(LayoutClass.PLAIN_TEXT, [
|
|
23
|
+
"口的11.8%①。这既是江南农业落后的反映,又是它的原因。当战国以",
|
|
24
|
+
"后黄河流域因铁器牛耕的普及获得基本的开发,农区联结成一大片的",
|
|
25
|
+
"时候,南方农业开发始终没有突破星点状或斑块状分布的格局。由于",
|
|
26
|
+
"地旷人稀,耕作相当粗放,许多水田采取火耕水瓣的方式,旱田则多",
|
|
27
|
+
"行刀耕火种②。司马迁在《史记·货殖列传》中说:“总之,楚越之",
|
|
28
|
+
"地,地厂人希,饭稻囊鱼,或火耕而水瓣,果隋(蕨)赢(螺)蛤,",
|
|
29
|
+
"不待贾而足,地势饶食,无饥谨之患,以故皆偷生,无积聚而多",
|
|
30
|
+
"贫。”这种概括虽然未免太突出了南方经济的落后面,有一定片面性,",
|
|
31
|
+
"但大体还是反映了实际情形的。战国秦汉时期,南方与黄河流域农业",
|
|
32
|
+
"的差距显然拉大了。",
|
|
33
|
+
]),
|
|
34
|
+
(LayoutClass.FIGURE, []),
|
|
35
|
+
(LayoutClass.FIGURE_CAPTION, [
|
|
36
|
+
"西晋陶水田犁耙模型(广东连县出土)"
|
|
37
|
+
]),
|
|
38
|
+
(LayoutClass.FIGURE, []),
|
|
39
|
+
(LayoutClass.FIGURE_CAPTION, [
|
|
40
|
+
"南朝陶耙田模型 (广西苍梧倒水出土)"
|
|
41
|
+
]),
|
|
42
|
+
(LayoutClass.PLAIN_TEXT, [
|
|
43
|
+
"①据赵文林、谢淑君:《中国人口史》(人民出版社1988年)有关资料统计。",
|
|
44
|
+
"②《盐铁论·通有》:“荆扬…………伐木而树谷,焚莱而播粟,火耕而水。”"
|
|
45
|
+
]),
|
|
46
|
+
(LayoutClass.ABANDON, [
|
|
47
|
+
"136"
|
|
48
|
+
]),
|
|
49
|
+
])
|
|
50
|
+
|
|
51
|
+
def _format_Layout(self, layout: Layout) -> tuple[LayoutClass, list[str]]:
|
|
52
|
+
return layout.cls, [f.text.strip() for f in layout.fragments]
|
|
53
|
+
|
|
54
|
+
def _project_path(self) -> str:
|
|
55
|
+
return os.path.abspath(os.path.join(__file__, "..", ".."))
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|