deepdoctection 0.43.1__py3-none-any.whl → 0.43.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +1 -1
- deepdoctection/datapoint/image.py +2 -2
- deepdoctection/datapoint/view.py +19 -5
- deepdoctection/extern/base.py +2 -0
- deepdoctection/mapper/xfundstruct.py +1 -1
- deepdoctection/pipe/anngen.py +14 -1
- deepdoctection/utils/settings.py +3 -1
- {deepdoctection-0.43.1.dist-info → deepdoctection-0.43.3.dist-info}/METADATA +3 -4
- {deepdoctection-0.43.1.dist-info → deepdoctection-0.43.3.dist-info}/RECORD +12 -12
- {deepdoctection-0.43.1.dist-info → deepdoctection-0.43.3.dist-info}/WHEEL +0 -0
- {deepdoctection-0.43.1.dist-info → deepdoctection-0.43.3.dist-info}/licenses/LICENSE +0 -0
- {deepdoctection-0.43.1.dist-info → deepdoctection-0.43.3.dist-info}/top_level.txt +0 -0
deepdoctection/__init__.py
CHANGED
|
@@ -479,8 +479,8 @@ class Image:
|
|
|
479
479
|
|
|
480
480
|
def remove(
|
|
481
481
|
self,
|
|
482
|
-
annotation_ids: Optional[Union[str,
|
|
483
|
-
service_ids: Optional[Union[str,
|
|
482
|
+
annotation_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
483
|
+
service_ids: Optional[Union[str, Sequence[str]]] = None,
|
|
484
484
|
) -> None:
|
|
485
485
|
"""
|
|
486
486
|
Instead of removing consider deactivating annotations.
|
deepdoctection/datapoint/view.py
CHANGED
|
@@ -183,7 +183,7 @@ class Word(ImageAnnotationBaseView):
|
|
|
183
183
|
attr_names = (
|
|
184
184
|
set(WordType)
|
|
185
185
|
.union(super().get_attribute_names())
|
|
186
|
-
.union({Relationships.READING_ORDER, Relationships.LAYOUT_LINK})
|
|
186
|
+
.union({Relationships.READING_ORDER, Relationships.LAYOUT_LINK, Relationships.LINK})
|
|
187
187
|
)
|
|
188
188
|
return {attr_name.value if isinstance(attr_name, ObjectTypes) else attr_name for attr_name in attr_names}
|
|
189
189
|
|
|
@@ -773,6 +773,7 @@ class Page(Image):
|
|
|
773
773
|
"figures",
|
|
774
774
|
"residual_layouts",
|
|
775
775
|
"document_summary",
|
|
776
|
+
"document_mapping",
|
|
776
777
|
}
|
|
777
778
|
include_residual_text_container: bool = True
|
|
778
779
|
|
|
@@ -1389,19 +1390,32 @@ class Page(Image):
|
|
|
1389
1390
|
include_residual_text_container=include_residual_text_container,
|
|
1390
1391
|
)
|
|
1391
1392
|
|
|
1392
|
-
def
|
|
1393
|
+
def get_entities(self) -> list[Mapping[str, str]]:
|
|
1393
1394
|
"""
|
|
1394
1395
|
Returns:
|
|
1395
|
-
A list of
|
|
1396
|
+
A list of dicts with the following structure:
|
|
1397
|
+
|
|
1398
|
+
```python
|
|
1399
|
+
{
|
|
1400
|
+
"word": str, # word characters
|
|
1401
|
+
"entity": str, # token tag
|
|
1402
|
+
"annotation_id": str, # annotation id of the word
|
|
1403
|
+
"successor_annotation_id": Optional[str] # annotation_id of the successor word, if any
|
|
1404
|
+
}
|
|
1405
|
+
```
|
|
1406
|
+
|
|
1396
1407
|
"""
|
|
1397
1408
|
block_with_order = self._order("layouts")
|
|
1398
1409
|
all_words = []
|
|
1399
1410
|
for block in block_with_order:
|
|
1400
1411
|
all_words.extend(block.get_ordered_words()) # type: ignore
|
|
1401
1412
|
return [
|
|
1402
|
-
{"word": word.
|
|
1413
|
+
{"word": word.characters,
|
|
1414
|
+
"entity": word.token_tag.value,
|
|
1415
|
+
"annotation_id": word.annotation_id,
|
|
1416
|
+
"successor_annotation_id": word.successor[0].annotation_id if word.successor else None}
|
|
1403
1417
|
for word in all_words
|
|
1404
|
-
if word.
|
|
1418
|
+
if word.token_tag not in (TokenClasses.OTHER, None)
|
|
1405
1419
|
]
|
|
1406
1420
|
|
|
1407
1421
|
def __copy__(self) -> Page:
|
deepdoctection/extern/base.py
CHANGED
|
@@ -502,6 +502,7 @@ class TokenClassResult:
|
|
|
502
502
|
semantic_name: semantic name
|
|
503
503
|
bio_tag: bio tag
|
|
504
504
|
score: prediction score
|
|
505
|
+
successor_uuid: uuid of the next token in the sequence
|
|
505
506
|
"""
|
|
506
507
|
|
|
507
508
|
uuid: str
|
|
@@ -512,6 +513,7 @@ class TokenClassResult:
|
|
|
512
513
|
bio_tag: ObjectTypes = DefaultType.DEFAULT_TYPE
|
|
513
514
|
score: Optional[float] = None
|
|
514
515
|
token_id: Optional[int] = None
|
|
516
|
+
successor_uuid: Optional[str] = None
|
|
515
517
|
|
|
516
518
|
|
|
517
519
|
@dataclass
|
|
@@ -200,7 +200,7 @@ def xfund_to_image(
|
|
|
200
200
|
ann_ids.extend(entity_id_to_ann_id[linked_entity])
|
|
201
201
|
for ann_id in ann_ids:
|
|
202
202
|
if ann_id != word.annotation_id:
|
|
203
|
-
word.dump_relationship(Relationships.
|
|
203
|
+
word.dump_relationship(Relationships.LINK, ann_id)
|
|
204
204
|
|
|
205
205
|
if mapping_context.context_error:
|
|
206
206
|
return None
|
deepdoctection/pipe/anngen.py
CHANGED
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
Datapoint manager
|
|
20
20
|
"""
|
|
21
21
|
from dataclasses import asdict
|
|
22
|
-
from typing import Optional, Union
|
|
22
|
+
from typing import Optional, Union, Sequence
|
|
23
23
|
|
|
24
24
|
import numpy as np
|
|
25
25
|
|
|
@@ -372,6 +372,19 @@ class DatapointManager:
|
|
|
372
372
|
return None
|
|
373
373
|
return ann.annotation_id
|
|
374
374
|
|
|
375
|
+
def remove_annotations(self, annotation_ids: Sequence[str]) -> None:
|
|
376
|
+
"""
|
|
377
|
+
Removes the annotation by the given `annotation_id`.
|
|
378
|
+
|
|
379
|
+
Args:
|
|
380
|
+
annotation_ids: The `annotation_id` to remove.
|
|
381
|
+
"""
|
|
382
|
+
self.assert_datapoint_passed()
|
|
383
|
+
self.datapoint.remove(annotation_ids)
|
|
384
|
+
for ann_id in annotation_ids:
|
|
385
|
+
if ann_id in self._cache_anns:
|
|
386
|
+
self._cache_anns.pop(ann_id)
|
|
387
|
+
|
|
375
388
|
def deactivate_annotation(self, annotation_id: str) -> None:
|
|
376
389
|
"""
|
|
377
390
|
Deactivates the annotation by the given `annotation_id`.
|
deepdoctection/utils/settings.py
CHANGED
|
@@ -80,6 +80,7 @@ class SummaryType(ObjectTypes):
|
|
|
80
80
|
|
|
81
81
|
SUMMARY = "summary"
|
|
82
82
|
DOCUMENT_SUMMARY = "document_summary"
|
|
83
|
+
DOCUMENT_MAPPING = "document_mapping"
|
|
83
84
|
|
|
84
85
|
|
|
85
86
|
@object_types_registry.register("DocumentType")
|
|
@@ -228,8 +229,9 @@ class Relationships(ObjectTypes):
|
|
|
228
229
|
|
|
229
230
|
CHILD = "child"
|
|
230
231
|
READING_ORDER = "reading_order"
|
|
231
|
-
|
|
232
|
+
LINK = "link"
|
|
232
233
|
LAYOUT_LINK = "layout_link"
|
|
234
|
+
SUCCESSOR = "successor"
|
|
233
235
|
|
|
234
236
|
|
|
235
237
|
@object_types_registry.register("Languages")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deepdoctection
|
|
3
|
-
Version: 0.43.
|
|
3
|
+
Version: 0.43.3
|
|
4
4
|
Summary: Repository for Document AI
|
|
5
5
|
Home-page: https://github.com/deepdoctection/deepdoctection
|
|
6
6
|
Author: Dr. Janis Meyer
|
|
@@ -192,7 +192,7 @@ Check the demo of a document layout analysis pipeline with OCR on 🤗
|
|
|
192
192
|
- Fine-tuning and evaluation tools.
|
|
193
193
|
- Lot's of [tutorials](https://github.com/deepdoctection/notebooks)
|
|
194
194
|
|
|
195
|
-
Have a look at the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/
|
|
195
|
+
Have a look at the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Analyzer_Get_Started.ipynb)
|
|
196
196
|
for an easy start.
|
|
197
197
|
|
|
198
198
|
Check the [**release notes**](https://github.com/deepdoctection/deepdoctection/releases) for recent updates.
|
|
@@ -344,8 +344,7 @@ pip install ".[tf]" # or "pip install -e .[tf]"
|
|
|
344
344
|
|
|
345
345
|
### Running a Docker container from Docker hub
|
|
346
346
|
|
|
347
|
-
Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.
|
|
348
|
-
com/r/deepdoctection/deepdoctection).
|
|
347
|
+
Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.com/r/deepdoctection/deepdoctection).
|
|
349
348
|
|
|
350
349
|
```
|
|
351
350
|
docker pull deepdoctection/deepdoctection:<release_tag>
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
deepdoctection/__init__.py,sha256=
|
|
1
|
+
deepdoctection/__init__.py,sha256=nJ6cTH9fWxNIGSDjMS7AoU9hbJwfbb-SvTwOw6eiPAg,12964
|
|
2
2
|
deepdoctection/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
deepdoctection/analyzer/__init__.py,sha256=wg0BcFwdCeREwzZfa--Yx8HUJ9LPv5z5PmLwtkZdPH8,772
|
|
4
4
|
deepdoctection/analyzer/config.py,sha256=Uwi9MYOym0QXzFcip7bRXoy732P4tZVMFBUnUZk9c1w,41761
|
|
@@ -20,8 +20,8 @@ deepdoctection/datapoint/__init__.py,sha256=DOhcN81MhyPUuFw9F4pyEDyZseeD9qxb8NdB
|
|
|
20
20
|
deepdoctection/datapoint/annotation.py,sha256=f32BNmzUGJoNMeGst2RGC2jmjJpzzjxyBRKFG8FCubY,23092
|
|
21
21
|
deepdoctection/datapoint/box.py,sha256=QAS8sK2Ge4_ysW6zOYkLlzNwhSyw_mhYcYsxscClEno,31453
|
|
22
22
|
deepdoctection/datapoint/convert.py,sha256=6ENXX3tBdY8ogb2NBPxsOsQMGnQux8ol5nrUfWS5tYE,7352
|
|
23
|
-
deepdoctection/datapoint/image.py,sha256=
|
|
24
|
-
deepdoctection/datapoint/view.py,sha256=
|
|
23
|
+
deepdoctection/datapoint/image.py,sha256=kqwCz8DSc19hQpkl_4L1_Ek7_2KrH5KsV9e0S-R4n5w,35147
|
|
24
|
+
deepdoctection/datapoint/view.py,sha256=lVUTi2vfh6w7k92kcwQNpKmfOGgZmM12bYJUFrO5Pno,57163
|
|
25
25
|
deepdoctection/datasets/__init__.py,sha256=4ifjIwWCPYiS31GzUlVDScrkNOrb1eo5xHlRXNyg_58,994
|
|
26
26
|
deepdoctection/datasets/adapter.py,sha256=VSLM_980aHi4TpgOxfxiBHiF_fUXyh348PXet6zTo-4,7779
|
|
27
27
|
deepdoctection/datasets/base.py,sha256=HTIquJir2BZRTLl1HSQM0ICfvjIaWAjJeyz3BEHgdb0,23175
|
|
@@ -51,7 +51,7 @@ deepdoctection/eval/registry.py,sha256=us6EGN_tAia1Mk1mwWQwDeE-xqxcuopztdi8n-ieG
|
|
|
51
51
|
deepdoctection/eval/tedsmetric.py,sha256=EcNeJynsmxyl5bOH3bjy2wE647ONf0SF5OZyGbVu35Q,9963
|
|
52
52
|
deepdoctection/eval/tp_eval_callback.py,sha256=lqrOn2tdaRiF_Vr_9CwBr2ryatcWu3mQKya8YZ2pA9A,5261
|
|
53
53
|
deepdoctection/extern/__init__.py,sha256=jG2qe5_X7fJFnJlx04Lf1KUTXKKKYtCkKR7WQ7looUk,991
|
|
54
|
-
deepdoctection/extern/base.py,sha256=
|
|
54
|
+
deepdoctection/extern/base.py,sha256=LomTR9HXcBU55MPDIA8D1rIamk7DUmToJmgcRXzCoeU,31650
|
|
55
55
|
deepdoctection/extern/d2detect.py,sha256=I0oEkprr5iVpKpM3a3nknAU-sXwNoDQdp_B1gzzODsk,22374
|
|
56
56
|
deepdoctection/extern/deskew.py,sha256=L_jU0rXh03qzwaT79EIqE_zYMUVeFwWDbsGbtahuL2k,3124
|
|
57
57
|
deepdoctection/extern/doctrocr.py,sha256=vOI67NCeEznHzdY4zu1tggwg3_PfwV1eigsr2Z8EPzc,25543
|
|
@@ -101,9 +101,9 @@ deepdoctection/mapper/pascalstruct.py,sha256=PviZjhTk4p5HDUTlF8qhWPyraKD0uh51f2h
|
|
|
101
101
|
deepdoctection/mapper/prodigystruct.py,sha256=OWzPUbNDrqwFipH8YWI5eSxwMdA7qYczaFdsHNrE_4c,7001
|
|
102
102
|
deepdoctection/mapper/pubstruct.py,sha256=UTyfUmzMSuf2BXtdYwHjK7ngsIwAxSZjwTxDtz6DySg,23416
|
|
103
103
|
deepdoctection/mapper/tpstruct.py,sha256=dxtEVHYVnkH-zjjbHzkFrPgS9eheys6E-CMlsjaOnxo,5468
|
|
104
|
-
deepdoctection/mapper/xfundstruct.py,sha256=
|
|
104
|
+
deepdoctection/mapper/xfundstruct.py,sha256=JVs92T5l3HedQvdc6ZMACj2mq5Co4WCS9B-CnQ01opA,9092
|
|
105
105
|
deepdoctection/pipe/__init__.py,sha256=E3cYAVWOvMzIN7jbKFyqLjFXahcFGcAGkb1uChM_XCY,1034
|
|
106
|
-
deepdoctection/pipe/anngen.py,sha256=
|
|
106
|
+
deepdoctection/pipe/anngen.py,sha256=I7VFoTTa-sJCBwFJNaZeLD7pcrLjRPTX2I6-ZDCDHPI,16819
|
|
107
107
|
deepdoctection/pipe/base.py,sha256=oszB_DepcFtORvDdGTZZPWMhk01C68RUWXHjeX7SF3M,18163
|
|
108
108
|
deepdoctection/pipe/common.py,sha256=OcsqHr_c66Yqt98hFeKwaa0mciWMCauw0HZ3YnHx8MU,24586
|
|
109
109
|
deepdoctection/pipe/concurrency.py,sha256=_EKZi4eCeF3mVHytZL_fMwyqa25C2aR9g8vrIFB8iR4,9780
|
|
@@ -136,14 +136,14 @@ deepdoctection/utils/logger.py,sha256=ddQ0xBStluf8OvoRlEB8YkqyRR-ZYgyJYLClTmJJMA
|
|
|
136
136
|
deepdoctection/utils/metacfg.py,sha256=5M390--ZMoyJEt5oZOwFMGt2i8OF_ayeb0NVmUO_3OQ,7235
|
|
137
137
|
deepdoctection/utils/mocks.py,sha256=IkN3-IzAl4eX0ibgKIHg8IY7ykVw6BnpF6XnxKnKaZI,2389
|
|
138
138
|
deepdoctection/utils/pdf_utils.py,sha256=BrxTuY9j0COyIRkJchJ0tt2h6ZsA2an6z-H8E8QwgUQ,13490
|
|
139
|
-
deepdoctection/utils/settings.py,sha256=
|
|
139
|
+
deepdoctection/utils/settings.py,sha256=0P6nh9-84wWyMm9J7w9I7gI1xo8mN4M4xZ0IXzcqDbE,12862
|
|
140
140
|
deepdoctection/utils/tqdm.py,sha256=kx3Ivf0x85S0ZmEaN5mImu0V6isOgygOU8iyr2U99XU,1850
|
|
141
141
|
deepdoctection/utils/transform.py,sha256=jgeCyQWLN9q79jCGW7jysyKUKcJ1AVMk8OslF-3fbag,16095
|
|
142
142
|
deepdoctection/utils/types.py,sha256=ti4WdtIJSg3TGK_YPkkoY9PYGMnR2tTX6Xfik8U1pNk,2986
|
|
143
143
|
deepdoctection/utils/utils.py,sha256=NBUb1qbx8Jm-AvYN1Sdbk0huXhbAKxZ-ZtOcMespsMM,7064
|
|
144
144
|
deepdoctection/utils/viz.py,sha256=bujRIujvX317rPz4jBrj0yd3WP8wPjDUiI5GUrw9MzQ,27339
|
|
145
|
-
deepdoctection-0.43.
|
|
146
|
-
deepdoctection-0.43.
|
|
147
|
-
deepdoctection-0.43.
|
|
148
|
-
deepdoctection-0.43.
|
|
149
|
-
deepdoctection-0.43.
|
|
145
|
+
deepdoctection-0.43.3.dist-info/licenses/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
|
|
146
|
+
deepdoctection-0.43.3.dist-info/METADATA,sha256=tFoZH2VB3ZkiTwzVmd1OBlg1f6eFLrKwwcLyYo6BX1g,13389
|
|
147
|
+
deepdoctection-0.43.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
148
|
+
deepdoctection-0.43.3.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
|
|
149
|
+
deepdoctection-0.43.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|