deepdoctection 0.37.3__py3-none-any.whl → 0.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +5 -1
- deepdoctection/analyzer/_config.py +2 -1
- deepdoctection/analyzer/dd.py +6 -5
- deepdoctection/analyzer/factory.py +16 -6
- deepdoctection/configs/conf_dd_one.yaml +126 -85
- deepdoctection/datapoint/box.py +2 -4
- deepdoctection/datapoint/convert.py +14 -8
- deepdoctection/datapoint/image.py +12 -5
- deepdoctection/datapoint/view.py +151 -53
- deepdoctection/extern/hfdetr.py +4 -3
- deepdoctection/extern/model.py +6 -97
- deepdoctection/mapper/cats.py +21 -10
- deepdoctection/mapper/match.py +0 -22
- deepdoctection/mapper/misc.py +12 -2
- deepdoctection/mapper/pubstruct.py +1 -1
- deepdoctection/pipe/doctectionpipe.py +20 -3
- deepdoctection/pipe/lm.py +20 -5
- deepdoctection/pipe/refine.py +6 -13
- deepdoctection/pipe/segment.py +225 -46
- deepdoctection/pipe/sub_layout.py +40 -22
- deepdoctection/train/hf_layoutlm_train.py +3 -1
- deepdoctection/utils/pdf_utils.py +17 -9
- {deepdoctection-0.37.3.dist-info → deepdoctection-0.39.dist-info}/METADATA +15 -5
- {deepdoctection-0.37.3.dist-info → deepdoctection-0.39.dist-info}/RECORD +27 -27
- {deepdoctection-0.37.3.dist-info → deepdoctection-0.39.dist-info}/WHEEL +1 -1
- {deepdoctection-0.37.3.dist-info → deepdoctection-0.39.dist-info}/LICENSE +0 -0
- {deepdoctection-0.37.3.dist-info → deepdoctection-0.39.dist-info}/top_level.txt +0 -0
|
@@ -49,27 +49,27 @@ class DetectResultGenerator:
|
|
|
49
49
|
|
|
50
50
|
def __init__(
|
|
51
51
|
self,
|
|
52
|
-
|
|
53
|
-
group_categories: Optional[list[list[
|
|
54
|
-
|
|
52
|
+
categories_name_as_key: Mapping[ObjectTypes, int],
|
|
53
|
+
group_categories: Optional[list[list[ObjectTypes]]] = None,
|
|
54
|
+
exclude_category_names: Optional[Sequence[ObjectTypes]] = None,
|
|
55
55
|
absolute_coords: bool = True,
|
|
56
56
|
) -> None:
|
|
57
57
|
"""
|
|
58
|
-
:param
|
|
58
|
+
:param categories_name_as_key: The dict of all possible detection categories
|
|
59
59
|
:param group_categories: If you only want to generate only one DetectResult for a group of categories, provided
|
|
60
60
|
that the sum of the group is less than one, then you can pass a list of list for
|
|
61
61
|
grouping category ids.
|
|
62
62
|
:param absolute_coords: 'absolute_coords' value to be set in 'DetectionResult'
|
|
63
63
|
"""
|
|
64
|
-
self.
|
|
64
|
+
self.categories_name_as_key = MappingProxyType(dict(categories_name_as_key.items()))
|
|
65
65
|
self.width: Optional[int] = None
|
|
66
66
|
self.height: Optional[int] = None
|
|
67
67
|
if group_categories is None:
|
|
68
|
-
group_categories = [[
|
|
68
|
+
group_categories = [[cat_name] for cat_name in self.categories_name_as_key]
|
|
69
69
|
self.group_categories = group_categories
|
|
70
|
-
if
|
|
71
|
-
|
|
72
|
-
self.
|
|
70
|
+
if exclude_category_names is None:
|
|
71
|
+
exclude_category_names = []
|
|
72
|
+
self.exclude_category_names = exclude_category_names
|
|
73
73
|
self.dummy_for_group_generated = [False for _ in self.group_categories]
|
|
74
74
|
self.absolute_coords = absolute_coords
|
|
75
75
|
|
|
@@ -83,17 +83,17 @@ class DetectResultGenerator:
|
|
|
83
83
|
|
|
84
84
|
if self.width is None and self.height is None:
|
|
85
85
|
raise ValueError("Initialize height and width first")
|
|
86
|
-
|
|
86
|
+
detect_result_list = self._detection_result_sanity_check(detect_result_list)
|
|
87
87
|
count = self._create_condition(detect_result_list)
|
|
88
|
-
for
|
|
89
|
-
if
|
|
90
|
-
if count[
|
|
91
|
-
if not self._dummy_for_group_generated(
|
|
88
|
+
for category_name in self.categories_name_as_key:
|
|
89
|
+
if category_name not in self.exclude_category_names:
|
|
90
|
+
if count[category_name] < 1:
|
|
91
|
+
if not self._dummy_for_group_generated(category_name):
|
|
92
92
|
detect_result_list.append(
|
|
93
93
|
DetectionResult(
|
|
94
94
|
box=[0.0, 0.0, float(self.width), float(self.height)], # type: ignore
|
|
95
|
-
class_id=
|
|
96
|
-
class_name=
|
|
95
|
+
class_id=self.categories_name_as_key[category_name],
|
|
96
|
+
class_name=category_name,
|
|
97
97
|
score=0.0,
|
|
98
98
|
absolute_coords=self.absolute_coords,
|
|
99
99
|
)
|
|
@@ -102,8 +102,8 @@ class DetectResultGenerator:
|
|
|
102
102
|
self.dummy_for_group_generated = self._initialize_dummy_for_group_generated()
|
|
103
103
|
return detect_result_list
|
|
104
104
|
|
|
105
|
-
def _create_condition(self, detect_result_list: list[DetectionResult]) -> dict[
|
|
106
|
-
count = Counter([ann.
|
|
105
|
+
def _create_condition(self, detect_result_list: list[DetectionResult]) -> dict[ObjectTypes, int]:
|
|
106
|
+
count = Counter([ann.class_name for ann in detect_result_list])
|
|
107
107
|
cat_to_group_sum = {}
|
|
108
108
|
for group in self.group_categories:
|
|
109
109
|
group_sum = 0
|
|
@@ -113,9 +113,25 @@ class DetectResultGenerator:
|
|
|
113
113
|
cat_to_group_sum[el] = group_sum
|
|
114
114
|
return cat_to_group_sum
|
|
115
115
|
|
|
116
|
-
|
|
116
|
+
@staticmethod
|
|
117
|
+
def _detection_result_sanity_check(detect_result_list: list[DetectionResult]) -> list[DetectionResult]:
|
|
118
|
+
"""
|
|
119
|
+
Go through each detect_result in the list and check if the box argument has sensible coordinates:
|
|
120
|
+
ulx >= 0 and lrx - ulx >= 0 (same for y coordinate). Remove the detection result if this condition is not
|
|
121
|
+
satisfied. We need this check because if some detection results are not sane, we might end up with some
|
|
122
|
+
none existing categories.
|
|
123
|
+
"""
|
|
124
|
+
sane_detect_results = []
|
|
125
|
+
for detect_result in detect_result_list:
|
|
126
|
+
if detect_result.box:
|
|
127
|
+
ulx, uly, lrx, lry = detect_result.box
|
|
128
|
+
if ulx >= 0 and lrx - ulx >= 0 and uly >= 0 and lry - uly >= 0:
|
|
129
|
+
sane_detect_results.append(detect_result)
|
|
130
|
+
return sane_detect_results
|
|
131
|
+
|
|
132
|
+
def _dummy_for_group_generated(self, category_name: ObjectTypes) -> bool:
|
|
117
133
|
for idx, group in enumerate(self.group_categories):
|
|
118
|
-
if
|
|
134
|
+
if category_name in group:
|
|
119
135
|
is_generated = self.dummy_for_group_generated[idx]
|
|
120
136
|
self.dummy_for_group_generated[idx] = True
|
|
121
137
|
return is_generated
|
|
@@ -176,10 +192,12 @@ class SubImageLayoutService(PipelineComponent):
|
|
|
176
192
|
self.predictor = sub_image_detector
|
|
177
193
|
super().__init__(self._get_name(sub_image_detector.name), self.predictor.model_id)
|
|
178
194
|
if self.detect_result_generator is not None:
|
|
179
|
-
if self.detect_result_generator.
|
|
195
|
+
if self.detect_result_generator.categories_name_as_key != self.predictor.categories.get_categories(
|
|
196
|
+
as_dict=True, name_as_key=True
|
|
197
|
+
):
|
|
180
198
|
raise ValueError(
|
|
181
199
|
f"The categories of the 'detect_result_generator' must be the same as the categories of the "
|
|
182
|
-
f"'sub_image_detector'. Got {self.detect_result_generator.
|
|
200
|
+
f"'sub_image_detector'. Got {self.detect_result_generator.categories_name_as_key} #"
|
|
183
201
|
f"and {self.predictor.categories.get_categories()}."
|
|
184
202
|
)
|
|
185
203
|
|
|
@@ -499,7 +499,9 @@ def train_hf_layoutlm(
|
|
|
499
499
|
)
|
|
500
500
|
pipeline_component_cls = pipeline_component_registry.get(pipeline_component_name)
|
|
501
501
|
if dataset_type == DatasetType.SEQUENCE_CLASSIFICATION:
|
|
502
|
-
pipeline_component = pipeline_component_cls(tokenizer_fast,
|
|
502
|
+
pipeline_component = pipeline_component_cls(tokenizer_fast,
|
|
503
|
+
dd_model,
|
|
504
|
+
use_other_as_default_category=True)
|
|
503
505
|
else:
|
|
504
506
|
pipeline_component = pipeline_component_cls(
|
|
505
507
|
tokenizer_fast,
|
|
@@ -181,8 +181,6 @@ class PDFStreamer:
|
|
|
181
181
|
|
|
182
182
|
streamer.close() # Do not forget to close the streamer, otherwise the file will never be closed and might
|
|
183
183
|
# cause memory leaks if you open many files.
|
|
184
|
-
|
|
185
|
-
|
|
186
184
|
"""
|
|
187
185
|
|
|
188
186
|
def __init__(self, path_or_bytes: Union[PathLikeOrStr, bytes]) -> None:
|
|
@@ -223,7 +221,10 @@ class PDFStreamer:
|
|
|
223
221
|
|
|
224
222
|
|
|
225
223
|
def _input_to_cli_str(
|
|
226
|
-
input_file_name: PathLikeOrStr,
|
|
224
|
+
input_file_name: PathLikeOrStr,
|
|
225
|
+
output_file_name: PathLikeOrStr,
|
|
226
|
+
dpi: Optional[int] = None,
|
|
227
|
+
size: Optional[tuple[int, int]] = None,
|
|
227
228
|
) -> list[str]:
|
|
228
229
|
cmd_args: list[str] = []
|
|
229
230
|
|
|
@@ -237,7 +238,10 @@ def _input_to_cli_str(
|
|
|
237
238
|
if platform.system() == "Windows":
|
|
238
239
|
command = command + ".exe"
|
|
239
240
|
cmd_args.append(command)
|
|
240
|
-
|
|
241
|
+
|
|
242
|
+
if dpi:
|
|
243
|
+
cmd_args.extend(["-r", str(dpi)])
|
|
244
|
+
cmd_args.append(str(input_file_name))
|
|
241
245
|
cmd_args.append("-png")
|
|
242
246
|
cmd_args.append(str(output_file_name))
|
|
243
247
|
|
|
@@ -275,7 +279,9 @@ def _run_poppler(poppler_args: list[str]) -> None:
|
|
|
275
279
|
raise PopplerError(status=proc.returncode, message="Syntax Error: PDF cannot be read with Poppler")
|
|
276
280
|
|
|
277
281
|
|
|
278
|
-
def pdf_to_np_array_poppler(
|
|
282
|
+
def pdf_to_np_array_poppler(
|
|
283
|
+
pdf_bytes: bytes, size: Optional[tuple[int, int]] = None, dpi: Optional[int] = None
|
|
284
|
+
) -> PixelValues:
|
|
279
285
|
"""
|
|
280
286
|
Convert a single pdf page from its byte representation to a numpy array. This function will save the pdf as to a tmp
|
|
281
287
|
file and then call poppler via `pdftoppm` resp. `pdftocairo` if the former is not available.
|
|
@@ -285,7 +291,8 @@ def pdf_to_np_array_poppler(pdf_bytes: bytes, size: Optional[tuple[int, int]] =
|
|
|
285
291
|
:param dpi: Image quality in DPI/dots-per-inch (default 200)
|
|
286
292
|
:return: numpy array
|
|
287
293
|
"""
|
|
288
|
-
|
|
294
|
+
if dpi is None and size is None:
|
|
295
|
+
raise ValueError("Either dpi or size must be provided.")
|
|
289
296
|
with save_tmp_file(pdf_bytes, "pdf_") as (tmp_name, input_file_name):
|
|
290
297
|
_run_poppler(_input_to_cli_str(input_file_name, tmp_name, dpi, size))
|
|
291
298
|
image = viz_handler.read_image(tmp_name + "-1.png")
|
|
@@ -293,7 +300,7 @@ def pdf_to_np_array_poppler(pdf_bytes: bytes, size: Optional[tuple[int, int]] =
|
|
|
293
300
|
return image.astype(uint8)
|
|
294
301
|
|
|
295
302
|
|
|
296
|
-
def pdf_to_np_array_pdfmium(pdf_bytes: bytes, dpi: int =
|
|
303
|
+
def pdf_to_np_array_pdfmium(pdf_bytes: bytes, dpi: Optional[int] = None) -> PixelValues:
|
|
297
304
|
"""
|
|
298
305
|
Convert a single pdf page from its byte representation to a numpy array using pdfium.
|
|
299
306
|
|
|
@@ -301,12 +308,13 @@ def pdf_to_np_array_pdfmium(pdf_bytes: bytes, dpi: int = 200) -> PixelValues:
|
|
|
301
308
|
:param dpi: Image quality in DPI/dots-per-inch (default 200)
|
|
302
309
|
:return: numpy array
|
|
303
310
|
"""
|
|
304
|
-
|
|
311
|
+
if dpi is None:
|
|
312
|
+
raise ValueError("dpi must be provided.")
|
|
305
313
|
page = pypdfium2.PdfDocument(pdf_bytes)[0]
|
|
306
314
|
return page.render(scale=dpi * 1 / 72).to_numpy().astype(uint8)
|
|
307
315
|
|
|
308
316
|
|
|
309
|
-
def pdf_to_np_array(pdf_bytes: bytes, size: Optional[tuple[int, int]] = None, dpi: int =
|
|
317
|
+
def pdf_to_np_array(pdf_bytes: bytes, size: Optional[tuple[int, int]] = None, dpi: Optional[int] = None) -> PixelValues:
|
|
310
318
|
"""
|
|
311
319
|
Convert a single pdf page from its byte representation to a numpy array. This function will either use Poppler or
|
|
312
320
|
pdfium to render the pdf.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: deepdoctection
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.39
|
|
4
4
|
Summary: Repository for Document AI
|
|
5
5
|
Home-page: https://github.com/deepdoctection/deepdoctection
|
|
6
6
|
Author: Dr. Janis Meyer
|
|
@@ -17,7 +17,7 @@ Requires-Python: >=3.9
|
|
|
17
17
|
Description-Content-Type: text/markdown
|
|
18
18
|
License-File: LICENSE
|
|
19
19
|
Requires-Dist: catalogue==2.0.10
|
|
20
|
-
Requires-Dist: huggingface_hub
|
|
20
|
+
Requires-Dist: huggingface_hub>=0.26.0
|
|
21
21
|
Requires-Dist: importlib-metadata>=5.0.0
|
|
22
22
|
Requires-Dist: jsonlines==3.1.0
|
|
23
23
|
Requires-Dist: lazy-imports==0.3.1
|
|
@@ -36,7 +36,7 @@ Requires-Dist: tabulate>=0.7.7
|
|
|
36
36
|
Requires-Dist: tqdm==4.64.0
|
|
37
37
|
Provides-Extra: tf
|
|
38
38
|
Requires-Dist: catalogue==2.0.10; extra == "tf"
|
|
39
|
-
Requires-Dist: huggingface_hub
|
|
39
|
+
Requires-Dist: huggingface_hub>=0.26.0; extra == "tf"
|
|
40
40
|
Requires-Dist: importlib-metadata>=5.0.0; extra == "tf"
|
|
41
41
|
Requires-Dist: jsonlines==3.1.0; extra == "tf"
|
|
42
42
|
Requires-Dist: lazy-imports==0.3.1; extra == "tf"
|
|
@@ -68,7 +68,7 @@ Requires-Dist: distance==0.1.3; extra == "tf"
|
|
|
68
68
|
Requires-Dist: lxml>=4.9.1; extra == "tf"
|
|
69
69
|
Provides-Extra: pt
|
|
70
70
|
Requires-Dist: catalogue==2.0.10; extra == "pt"
|
|
71
|
-
Requires-Dist: huggingface_hub
|
|
71
|
+
Requires-Dist: huggingface_hub>=0.26.0; extra == "pt"
|
|
72
72
|
Requires-Dist: importlib-metadata>=5.0.0; extra == "pt"
|
|
73
73
|
Requires-Dist: jsonlines==3.1.0; extra == "pt"
|
|
74
74
|
Requires-Dist: lazy-imports==0.3.1; extra == "pt"
|
|
@@ -127,6 +127,16 @@ Requires-Dist: types-urllib3>=1.26.25.14; extra == "dev"
|
|
|
127
127
|
Provides-Extra: test
|
|
128
128
|
Requires-Dist: pytest==8.0.2; extra == "test"
|
|
129
129
|
Requires-Dist: pytest-cov; extra == "test"
|
|
130
|
+
Dynamic: author
|
|
131
|
+
Dynamic: classifier
|
|
132
|
+
Dynamic: description
|
|
133
|
+
Dynamic: description-content-type
|
|
134
|
+
Dynamic: home-page
|
|
135
|
+
Dynamic: license
|
|
136
|
+
Dynamic: provides-extra
|
|
137
|
+
Dynamic: requires-dist
|
|
138
|
+
Dynamic: requires-python
|
|
139
|
+
Dynamic: summary
|
|
130
140
|
|
|
131
141
|
|
|
132
142
|
<p align="center">
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
deepdoctection/__init__.py,sha256=
|
|
1
|
+
deepdoctection/__init__.py,sha256=0nxfBTu-aeg3DYu9g2kEAnt3Y-lCnHSgP31qvCnsLOs,12752
|
|
2
2
|
deepdoctection/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
deepdoctection/analyzer/__init__.py,sha256=icClxrd20XutD6LxLgEPIWceSs4j_QfI3szCE-9BL2w,729
|
|
4
|
-
deepdoctection/analyzer/_config.py,sha256=
|
|
5
|
-
deepdoctection/analyzer/dd.py,sha256=
|
|
6
|
-
deepdoctection/analyzer/factory.py,sha256=
|
|
4
|
+
deepdoctection/analyzer/_config.py,sha256=OZMOPlyFv4gcyabPG6KO08EYx-0tUH82Ehs9YDv2B1Q,5027
|
|
5
|
+
deepdoctection/analyzer/dd.py,sha256=bfR7e1JV7BwUNDRLu0jYZU7qQXnyA_vbRAJl2Ylrq5o,5905
|
|
6
|
+
deepdoctection/analyzer/factory.py,sha256=Kf3Ztv5FEcF5yJf6i4I557aOIUHybuxIP0moHryguTQ,32344
|
|
7
7
|
deepdoctection/configs/__init__.py,sha256=TX_P6tqDOF1LK1mi9ruAl7x0mtv1Asm8cYWCz3Pe2dk,646
|
|
8
|
-
deepdoctection/configs/conf_dd_one.yaml,sha256=
|
|
8
|
+
deepdoctection/configs/conf_dd_one.yaml,sha256=qnrDAST1PHBtdIKE_hdkZexW22FqVvNTI-PEo9wvinM,3025
|
|
9
9
|
deepdoctection/configs/conf_tesseract.yaml,sha256=oF6szDyoi15FHvq7yFUNIEjfA_jNLhGxoowiRsz_zY4,35
|
|
10
10
|
deepdoctection/dataflow/__init__.py,sha256=CWRHMpmJaPk4xY_oIIFubCt-z11SguWrMWxHZ7rdrvY,845
|
|
11
11
|
deepdoctection/dataflow/base.py,sha256=z4DCComSj5wStEPjtk0093cNNGfUMiDqx8dqz36nS_o,6221
|
|
@@ -17,10 +17,10 @@ deepdoctection/dataflow/serialize.py,sha256=4pYC7m9h53JCu99waVeKpHDpsCDDdYCrSZpP
|
|
|
17
17
|
deepdoctection/dataflow/stats.py,sha256=Bsr6v7lcesKXUYtO9wjqlzx_Yq_uyIF3Lel-tQ0i4wI,9619
|
|
18
18
|
deepdoctection/datapoint/__init__.py,sha256=3K406GbOPhoEp8koVaSbMocmSsmWifnSZ1SPb7C1lOY,1643
|
|
19
19
|
deepdoctection/datapoint/annotation.py,sha256=FEgz4COxVDfjic0gG7kS6iHnWLBIgFnquQ63Cbj2a4Y,22531
|
|
20
|
-
deepdoctection/datapoint/box.py,sha256=
|
|
21
|
-
deepdoctection/datapoint/convert.py,sha256=
|
|
22
|
-
deepdoctection/datapoint/image.py,sha256=
|
|
23
|
-
deepdoctection/datapoint/view.py,sha256=
|
|
20
|
+
deepdoctection/datapoint/box.py,sha256=UAdSnLexvFyg4KK1u9kXdJxhaWTwRxTU-cnQcvl37Q8,23410
|
|
21
|
+
deepdoctection/datapoint/convert.py,sha256=gJbHY2V8nlMht1N5VdxTmWSsOeydpFPTJsaJHp6XGgE,7516
|
|
22
|
+
deepdoctection/datapoint/image.py,sha256=S6yfsIRQgMCl6HYAcHYJSBcbfdYKKtebtkEkkkrXsMQ,33619
|
|
23
|
+
deepdoctection/datapoint/view.py,sha256=srMyPQGsK4OSiorxkyG6UAIgpViM6Ks1CI3b5k97cjY,49452
|
|
24
24
|
deepdoctection/datasets/__init__.py,sha256=-A3aR90aDsHPmVM35JavfnQ2itYSCn3ujl4krRni1QU,1076
|
|
25
25
|
deepdoctection/datasets/adapter.py,sha256=Ly_vbOAgVI73V41FUccnSX1ECTOyesW_qsuvQuvOZbw,7796
|
|
26
26
|
deepdoctection/datasets/base.py,sha256=DT4i-d74sIEiUNC6UspIHNJuHSK0t1dBv7qwadg4rLw,22341
|
|
@@ -55,10 +55,10 @@ deepdoctection/extern/d2detect.py,sha256=zrKv1yurApnjD7QZIZk_8LYCahjmN82MQUjHjv8
|
|
|
55
55
|
deepdoctection/extern/deskew.py,sha256=sPoixu8S9he-0wbs-jgxtPE2V9BiP4-3uZlb6F5Y1SA,3077
|
|
56
56
|
deepdoctection/extern/doctrocr.py,sha256=T3_tvlih22_dVCBZypS1Y8tjQQB1fkAxIbGdUGHIapQ,24473
|
|
57
57
|
deepdoctection/extern/fastlang.py,sha256=F4gK-SEwcCujjxH327ZDzMGWToJ49xS_dCKcePQ9IlY,4780
|
|
58
|
-
deepdoctection/extern/hfdetr.py,sha256=
|
|
58
|
+
deepdoctection/extern/hfdetr.py,sha256=JzHrrTyzS9qh6T2TsvKboAGZkIhno2txmSoLQ5Vd-lo,12077
|
|
59
59
|
deepdoctection/extern/hflayoutlm.py,sha256=tFaf90FRbZzhSycdp8rGkeiPywQa6UcTEEwbayIXkr0,57023
|
|
60
60
|
deepdoctection/extern/hflm.py,sha256=kwS6kcSlY_2m9u0RzBLTRq-UMM7c1PhyUaDTvSdejus,9217
|
|
61
|
-
deepdoctection/extern/model.py,sha256=
|
|
61
|
+
deepdoctection/extern/model.py,sha256=lbVwDa3vD6VwCD_dsozcI8b4xDZs4KJ1628SxaDdtHQ,55378
|
|
62
62
|
deepdoctection/extern/pdftext.py,sha256=KS_t27SUiYn_IOS_J2lF9lSSo22vLagxmxvYCY3CqXA,7228
|
|
63
63
|
deepdoctection/extern/tessocr.py,sha256=tG7etMvZ-jHFdq-jJAHYMJii3ujDjMfAFYUsjBp3nKI,17444
|
|
64
64
|
deepdoctection/extern/texocr.py,sha256=yMt5ZzKtsjd7ogrcNXba7zccGGGF9LXK194EtER6YNQ,5804
|
|
@@ -88,17 +88,17 @@ deepdoctection/extern/tp/tpfrcnn/utils/__init__.py,sha256=kiPlXxHlTGN9eI7YE9Bgwt
|
|
|
88
88
|
deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py,sha256=aBLqPg_ApaiimtBRaOsLKTZZFIBh87vVtqjLPMaX9fQ,2379
|
|
89
89
|
deepdoctection/extern/tp/tpfrcnn/utils/np_box_ops.py,sha256=O-q1GQiOEd1lN1MQDsJvHwD2OmBO-qHNeqJ1Qnec93g,3539
|
|
90
90
|
deepdoctection/mapper/__init__.py,sha256=Xqb34aCjslZDQnqQgCSvnloL5DbdT9eHhn-StpVPbzE,1130
|
|
91
|
-
deepdoctection/mapper/cats.py,sha256=
|
|
91
|
+
deepdoctection/mapper/cats.py,sha256=s73JzONV2UQ71szfljurk7H1-UjDBWsW4oNLs5xePUk,16474
|
|
92
92
|
deepdoctection/mapper/cocostruct.py,sha256=GcbUpPFUg67pcOHQluWBFOFcGaYnlZcTmwBDERBVgCA,5978
|
|
93
93
|
deepdoctection/mapper/d2struct.py,sha256=Dx-YnycsIQH4a5-9Gn_yMhiQ-gOFgMueNeH3rhXjuCU,8555
|
|
94
94
|
deepdoctection/mapper/hfstruct.py,sha256=2PjGKsYturVJBimLT1CahYh09KSRAFEHz_QNtC162kQ,5551
|
|
95
95
|
deepdoctection/mapper/laylmstruct.py,sha256=abMZkYU2W0e_VcCm_c0ZXNFuv-lfMFWcTedcZS5EYvE,42935
|
|
96
96
|
deepdoctection/mapper/maputils.py,sha256=eI6ZcDg9W5uB6xQNBZpMIdEd86HlCxTtkJuyROdTqiw,8146
|
|
97
|
-
deepdoctection/mapper/match.py,sha256=
|
|
98
|
-
deepdoctection/mapper/misc.py,sha256=
|
|
97
|
+
deepdoctection/mapper/match.py,sha256=Ed9FsuVPNp_faaW5PKnvUHZoEXcRcrO-muduTMzjp1s,8937
|
|
98
|
+
deepdoctection/mapper/misc.py,sha256=vX-fV420Te00eD-cqTiWBV2twHqdBcBV2_7rAFRgPRg,7164
|
|
99
99
|
deepdoctection/mapper/pascalstruct.py,sha256=TzVU1p0oiw0nOuxTFFbEB9vXJxH1v6VUvTJ7MD0manU,3828
|
|
100
100
|
deepdoctection/mapper/prodigystruct.py,sha256=Re4Sd_zAp6qOvbXZLmMJeG0IGEfMQxebuyDeZgMcTa8,6827
|
|
101
|
-
deepdoctection/mapper/pubstruct.py,sha256=
|
|
101
|
+
deepdoctection/mapper/pubstruct.py,sha256=PAJ2N1HSPNS6F2ZrIwlD7PiBhIM-rJscK_Ti8OR_IGs,23370
|
|
102
102
|
deepdoctection/mapper/tpstruct.py,sha256=YNABRibvcISD5Lavg3jouoE4FMdqXEJoM-hNoB_rnww,4481
|
|
103
103
|
deepdoctection/mapper/xfundstruct.py,sha256=_3r3c0K82fnF2h1HxA85h-9ETYrHwcERa6MNc6Ko6Z8,8807
|
|
104
104
|
deepdoctection/pipe/__init__.py,sha256=ywTVoetftdL6plXg2YlBzMfmqBZupq7yXblSVyvvkcQ,1127
|
|
@@ -106,21 +106,21 @@ deepdoctection/pipe/anngen.py,sha256=3319l4aaXzcY4w6ItVBNPX8LGS5fHFDVtyVY9KMefac
|
|
|
106
106
|
deepdoctection/pipe/base.py,sha256=ynNg5SSRuUVxN69VWOO3Oi7WSeGrYwn3A56NQMBJDvw,14222
|
|
107
107
|
deepdoctection/pipe/common.py,sha256=haOb4v0jLX3r41BSC8cVseX2E320_HkSrGlZsQiKE2g,17728
|
|
108
108
|
deepdoctection/pipe/concurrency.py,sha256=AAKRsVgaBEYNluntbDa46SBF1JZ_XqnWLDSWrNvAzEo,9657
|
|
109
|
-
deepdoctection/pipe/doctectionpipe.py,sha256=
|
|
109
|
+
deepdoctection/pipe/doctectionpipe.py,sha256=bGW3ugky-fb-nEe-3bvO6Oc_4_6w82cQboGM_6p2eIo,12530
|
|
110
110
|
deepdoctection/pipe/language.py,sha256=5zI0UQC6Fh12_r2pfVL42HoCGz2hpHrOhpXAn5m-rYw,5451
|
|
111
111
|
deepdoctection/pipe/layout.py,sha256=xIhnJpyUSbvLbhTXyAKXY1hmG9352jihGYFSclTH_1g,5567
|
|
112
|
-
deepdoctection/pipe/lm.py,sha256=
|
|
112
|
+
deepdoctection/pipe/lm.py,sha256=Ygj6MmBvBZ7l4RGCwBuhmMsOM0Ep3LWteNg7bzh-UmI,17703
|
|
113
113
|
deepdoctection/pipe/order.py,sha256=PnJZiCnxFluJiECXLTZT0c1Rr66vIRBFraa_G41UA2k,40121
|
|
114
|
-
deepdoctection/pipe/refine.py,sha256=
|
|
114
|
+
deepdoctection/pipe/refine.py,sha256=dTfI396xydPdbzpfo4yqFcuxl3UAB1y-WbSQn1o76ec,22367
|
|
115
115
|
deepdoctection/pipe/registry.py,sha256=aFx-Tn0xhVA5l5H18duNW5QoTNKQltybsEUEzsMgUfg,902
|
|
116
|
-
deepdoctection/pipe/segment.py,sha256=
|
|
117
|
-
deepdoctection/pipe/sub_layout.py,sha256=
|
|
116
|
+
deepdoctection/pipe/segment.py,sha256=CR83HQMW0hrRG8W6pFuB0YibxQMWpqI7_LaUIcJcQwo,59116
|
|
117
|
+
deepdoctection/pipe/sub_layout.py,sha256=N1RcID-boORcwsW_j0l64HpUu3rff0ge5qEanudLYgk,13838
|
|
118
118
|
deepdoctection/pipe/text.py,sha256=h9q6d3HFOs7LOg-iwdLUPiQxrPqgunBVNmtYMBrfRQE,11180
|
|
119
119
|
deepdoctection/pipe/transform.py,sha256=9Om7X7hJeL4jgUwHM1CHa4sb5v7Qo1PtVG0ls_3nI7w,3798
|
|
120
120
|
deepdoctection/train/__init__.py,sha256=YFTRAZF1F7cEAKTdAIi1BLyYb6rSRcwq09Ui5Lu8d6E,1071
|
|
121
121
|
deepdoctection/train/d2_frcnn_train.py,sha256=sFc_G-mEpaM8d1CCE0_6Gl4nBh11X2RYRBA3p_ylFJQ,16000
|
|
122
122
|
deepdoctection/train/hf_detr_train.py,sha256=8ydysxzOPE_IPoNFGaHb7PbKr9Nbl41rcY4lbylQavU,10783
|
|
123
|
-
deepdoctection/train/hf_layoutlm_train.py,sha256=
|
|
123
|
+
deepdoctection/train/hf_layoutlm_train.py,sha256=irSg-IpbVoSlaw1-vZCej2mCZcctONtXr5Z2NQAc_a4,22680
|
|
124
124
|
deepdoctection/train/tp_frcnn_train.py,sha256=pEpXokSVGveqo82pRnhnAmHPmjQ_8wQWpqM4ZyNHJgs,13049
|
|
125
125
|
deepdoctection/utils/__init__.py,sha256=brBceRWeov9WXMiJTjyJOF2rHMP8trGGRRjhMdZ61nI,2371
|
|
126
126
|
deepdoctection/utils/concurrency.py,sha256=nIhpkSncmv0LBB8PtcOLY-BsRGlfcDpz7foVdgzZd20,4598
|
|
@@ -134,15 +134,15 @@ deepdoctection/utils/identifier.py,sha256=QkNaGGqPynHwDPnd3_m8iur4Cv64rcQa7qolCE
|
|
|
134
134
|
deepdoctection/utils/logger.py,sha256=J0OVKiXP_2A82MWbbJoOeMEJ-75aZu5npgaS_yI6mVA,10003
|
|
135
135
|
deepdoctection/utils/metacfg.py,sha256=hD76KQ_RnD_5B02qLI2Zxf3WfnsnXhEI_KUTKpw91RI,5711
|
|
136
136
|
deepdoctection/utils/mocks.py,sha256=IkN3-IzAl4eX0ibgKIHg8IY7ykVw6BnpF6XnxKnKaZI,2389
|
|
137
|
-
deepdoctection/utils/pdf_utils.py,sha256=
|
|
137
|
+
deepdoctection/utils/pdf_utils.py,sha256=Fi0eZ2GbnO7N61Rd8b8YRKRff4dalHAzkcn3zpGPoic,13119
|
|
138
138
|
deepdoctection/utils/settings.py,sha256=k6OyuWbj-IPeaO9zT9RZ-5Yad1wNhWGYqGLZdtgXAZY,12464
|
|
139
139
|
deepdoctection/utils/tqdm.py,sha256=cBUtR0L1x0KMeYrLP2rrzyzCamCjpQAKroHXLv81_pk,1820
|
|
140
140
|
deepdoctection/utils/transform.py,sha256=3kCgsEeRkG1efCdkfvj7tUFMs-e2jbjbflq826F2GPU,8502
|
|
141
141
|
deepdoctection/utils/types.py,sha256=_3dmPdCIZNLbgU5QP5k_c5phDf18xLe1kYL6t2nM45s,2953
|
|
142
142
|
deepdoctection/utils/utils.py,sha256=csVs_VvCq4QBETPoE2JdTTL4MFYnD4xh-Js5vRb612g,6492
|
|
143
143
|
deepdoctection/utils/viz.py,sha256=Jf8ePNYWlpuyaS6SeTYQ4OyA3eNhtgjvAQZnGNdgHC0,27051
|
|
144
|
-
deepdoctection-0.
|
|
145
|
-
deepdoctection-0.
|
|
146
|
-
deepdoctection-0.
|
|
147
|
-
deepdoctection-0.
|
|
148
|
-
deepdoctection-0.
|
|
144
|
+
deepdoctection-0.39.dist-info/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
|
|
145
|
+
deepdoctection-0.39.dist-info/METADATA,sha256=0OcWmWb8bssiwC2_Xnb6hUKyY0ISv6Bc5qVGosbrn3c,19741
|
|
146
|
+
deepdoctection-0.39.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
147
|
+
deepdoctection-0.39.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
|
|
148
|
+
deepdoctection-0.39.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|