deepdoctection 0.43.1__py3-none-any.whl → 0.43.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

@@ -25,7 +25,7 @@ from .utils.logger import LoggingRecord, logger
25
25
 
26
26
  # pylint: enable=wrong-import-position
27
27
 
28
- __version__ = "0.43.1"
28
+ __version__ = "0.43.3"
29
29
 
30
30
  _IMPORT_STRUCTURE = {
31
31
  "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
@@ -479,8 +479,8 @@ class Image:
479
479
 
480
480
  def remove(
481
481
  self,
482
- annotation_ids: Optional[Union[str, list[str]]] = None,
483
- service_ids: Optional[Union[str, list[str]]] = None,
482
+ annotation_ids: Optional[Union[str, Sequence[str]]] = None,
483
+ service_ids: Optional[Union[str, Sequence[str]]] = None,
484
484
  ) -> None:
485
485
  """
486
486
  Instead of removing consider deactivating annotations.
@@ -183,7 +183,7 @@ class Word(ImageAnnotationBaseView):
183
183
  attr_names = (
184
184
  set(WordType)
185
185
  .union(super().get_attribute_names())
186
- .union({Relationships.READING_ORDER, Relationships.LAYOUT_LINK})
186
+ .union({Relationships.READING_ORDER, Relationships.LAYOUT_LINK, Relationships.LINK})
187
187
  )
188
188
  return {attr_name.value if isinstance(attr_name, ObjectTypes) else attr_name for attr_name in attr_names}
189
189
 
@@ -773,6 +773,7 @@ class Page(Image):
773
773
  "figures",
774
774
  "residual_layouts",
775
775
  "document_summary",
776
+ "document_mapping",
776
777
  }
777
778
  include_residual_text_container: bool = True
778
779
 
@@ -1389,19 +1390,32 @@ class Page(Image):
1389
1390
  include_residual_text_container=include_residual_text_container,
1390
1391
  )
1391
1392
 
1392
- def get_token(self) -> list[Mapping[str, str]]:
1393
+ def get_entities(self) -> list[Mapping[str, str]]:
1393
1394
  """
1394
1395
  Returns:
1395
- A list of tuples with word and non default token tags
1396
+ A list of dicts with the following structure:
1397
+
1398
+ ```python
1399
+ {
1400
+ "word": str, # word characters
1401
+ "entity": str, # token tag
1402
+ "annotation_id": str, # annotation id of the word
1403
+ "successor_annotation_id": Optional[str] # annotation_id of the successor word, if any
1404
+ }
1405
+ ```
1406
+
1396
1407
  """
1397
1408
  block_with_order = self._order("layouts")
1398
1409
  all_words = []
1399
1410
  for block in block_with_order:
1400
1411
  all_words.extend(block.get_ordered_words()) # type: ignore
1401
1412
  return [
1402
- {"word": word.CHARACTERS, "entity": word.TOKEN_TAG}
1413
+ {"word": word.characters,
1414
+ "entity": word.token_tag.value,
1415
+ "annotation_id": word.annotation_id,
1416
+ "successor_annotation_id": word.successor[0].annotation_id if word.successor else None}
1403
1417
  for word in all_words
1404
- if word.TOKEN_TAG not in (TokenClasses.OTHER, None)
1418
+ if word.token_tag not in (TokenClasses.OTHER, None)
1405
1419
  ]
1406
1420
 
1407
1421
  def __copy__(self) -> Page:
@@ -502,6 +502,7 @@ class TokenClassResult:
502
502
  semantic_name: semantic name
503
503
  bio_tag: bio tag
504
504
  score: prediction score
505
+ successor_uuid: uuid of the next token in the sequence
505
506
  """
506
507
 
507
508
  uuid: str
@@ -512,6 +513,7 @@ class TokenClassResult:
512
513
  bio_tag: ObjectTypes = DefaultType.DEFAULT_TYPE
513
514
  score: Optional[float] = None
514
515
  token_id: Optional[int] = None
516
+ successor_uuid: Optional[str] = None
515
517
 
516
518
 
517
519
  @dataclass
@@ -200,7 +200,7 @@ def xfund_to_image(
200
200
  ann_ids.extend(entity_id_to_ann_id[linked_entity])
201
201
  for ann_id in ann_ids:
202
202
  if ann_id != word.annotation_id:
203
- word.dump_relationship(Relationships.SEMANTIC_ENTITY_LINK, ann_id)
203
+ word.dump_relationship(Relationships.LINK, ann_id)
204
204
 
205
205
  if mapping_context.context_error:
206
206
  return None
@@ -19,7 +19,7 @@
19
19
  Datapoint manager
20
20
  """
21
21
  from dataclasses import asdict
22
- from typing import Optional, Union
22
+ from typing import Optional, Union, Sequence
23
23
 
24
24
  import numpy as np
25
25
 
@@ -372,6 +372,19 @@ class DatapointManager:
372
372
  return None
373
373
  return ann.annotation_id
374
374
 
375
+ def remove_annotations(self, annotation_ids: Sequence[str]) -> None:
376
+ """
377
+ Removes the annotation by the given `annotation_id`.
378
+
379
+ Args:
380
+ annotation_ids: The `annotation_id` to remove.
381
+ """
382
+ self.assert_datapoint_passed()
383
+ self.datapoint.remove(annotation_ids)
384
+ for ann_id in annotation_ids:
385
+ if ann_id in self._cache_anns:
386
+ self._cache_anns.pop(ann_id)
387
+
375
388
  def deactivate_annotation(self, annotation_id: str) -> None:
376
389
  """
377
390
  Deactivates the annotation by the given `annotation_id`.
@@ -80,6 +80,7 @@ class SummaryType(ObjectTypes):
80
80
 
81
81
  SUMMARY = "summary"
82
82
  DOCUMENT_SUMMARY = "document_summary"
83
+ DOCUMENT_MAPPING = "document_mapping"
83
84
 
84
85
 
85
86
  @object_types_registry.register("DocumentType")
@@ -228,8 +229,9 @@ class Relationships(ObjectTypes):
228
229
 
229
230
  CHILD = "child"
230
231
  READING_ORDER = "reading_order"
231
- SEMANTIC_ENTITY_LINK = "semantic_entity_link"
232
+ LINK = "link"
232
233
  LAYOUT_LINK = "layout_link"
234
+ SUCCESSOR = "successor"
233
235
 
234
236
 
235
237
  @object_types_registry.register("Languages")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepdoctection
3
- Version: 0.43.1
3
+ Version: 0.43.3
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -192,7 +192,7 @@ Check the demo of a document layout analysis pipeline with OCR on 🤗
192
192
  - Fine-tuning and evaluation tools.
193
193
  - Lot's of [tutorials](https://github.com/deepdoctection/notebooks)
194
194
 
195
- Have a look at the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Get_Started.ipynb)
195
+ Have a look at the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Analyzer_Get_Started.ipynb)
196
196
  for an easy start.
197
197
 
198
198
  Check the [**release notes**](https://github.com/deepdoctection/deepdoctection/releases) for recent updates.
@@ -344,8 +344,7 @@ pip install ".[tf]" # or "pip install -e .[tf]"
344
344
 
345
345
  ### Running a Docker container from Docker hub
346
346
 
347
- Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.
348
- com/r/deepdoctection/deepdoctection).
347
+ Pre-existing Docker images can be downloaded from the [Docker hub](https://hub.docker.com/r/deepdoctection/deepdoctection).
349
348
 
350
349
  ```
351
350
  docker pull deepdoctection/deepdoctection:<release_tag>
@@ -1,4 +1,4 @@
1
- deepdoctection/__init__.py,sha256=QYGjP3fSt1deLMEKIb7LUDVlzZgi1Q7phZQADkOjlGk,12964
1
+ deepdoctection/__init__.py,sha256=nJ6cTH9fWxNIGSDjMS7AoU9hbJwfbb-SvTwOw6eiPAg,12964
2
2
  deepdoctection/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  deepdoctection/analyzer/__init__.py,sha256=wg0BcFwdCeREwzZfa--Yx8HUJ9LPv5z5PmLwtkZdPH8,772
4
4
  deepdoctection/analyzer/config.py,sha256=Uwi9MYOym0QXzFcip7bRXoy732P4tZVMFBUnUZk9c1w,41761
@@ -20,8 +20,8 @@ deepdoctection/datapoint/__init__.py,sha256=DOhcN81MhyPUuFw9F4pyEDyZseeD9qxb8NdB
20
20
  deepdoctection/datapoint/annotation.py,sha256=f32BNmzUGJoNMeGst2RGC2jmjJpzzjxyBRKFG8FCubY,23092
21
21
  deepdoctection/datapoint/box.py,sha256=QAS8sK2Ge4_ysW6zOYkLlzNwhSyw_mhYcYsxscClEno,31453
22
22
  deepdoctection/datapoint/convert.py,sha256=6ENXX3tBdY8ogb2NBPxsOsQMGnQux8ol5nrUfWS5tYE,7352
23
- deepdoctection/datapoint/image.py,sha256=nDaWUtdD5j6l_iXW9d2PoIyXBC8M3_idoEIXm7JWGyQ,35139
24
- deepdoctection/datapoint/view.py,sha256=5TYmKpNNZwJb-NrUXv08H3_zSfHKDHhg6LnEZjBqVns,56622
23
+ deepdoctection/datapoint/image.py,sha256=kqwCz8DSc19hQpkl_4L1_Ek7_2KrH5KsV9e0S-R4n5w,35147
24
+ deepdoctection/datapoint/view.py,sha256=lVUTi2vfh6w7k92kcwQNpKmfOGgZmM12bYJUFrO5Pno,57163
25
25
  deepdoctection/datasets/__init__.py,sha256=4ifjIwWCPYiS31GzUlVDScrkNOrb1eo5xHlRXNyg_58,994
26
26
  deepdoctection/datasets/adapter.py,sha256=VSLM_980aHi4TpgOxfxiBHiF_fUXyh348PXet6zTo-4,7779
27
27
  deepdoctection/datasets/base.py,sha256=HTIquJir2BZRTLl1HSQM0ICfvjIaWAjJeyz3BEHgdb0,23175
@@ -51,7 +51,7 @@ deepdoctection/eval/registry.py,sha256=us6EGN_tAia1Mk1mwWQwDeE-xqxcuopztdi8n-ieG
51
51
  deepdoctection/eval/tedsmetric.py,sha256=EcNeJynsmxyl5bOH3bjy2wE647ONf0SF5OZyGbVu35Q,9963
52
52
  deepdoctection/eval/tp_eval_callback.py,sha256=lqrOn2tdaRiF_Vr_9CwBr2ryatcWu3mQKya8YZ2pA9A,5261
53
53
  deepdoctection/extern/__init__.py,sha256=jG2qe5_X7fJFnJlx04Lf1KUTXKKKYtCkKR7WQ7looUk,991
54
- deepdoctection/extern/base.py,sha256=vs4EO9vkfyTW-nVM0dnlqsmnPPeHIXI2wW5b4Wpiz-Y,31547
54
+ deepdoctection/extern/base.py,sha256=LomTR9HXcBU55MPDIA8D1rIamk7DUmToJmgcRXzCoeU,31650
55
55
  deepdoctection/extern/d2detect.py,sha256=I0oEkprr5iVpKpM3a3nknAU-sXwNoDQdp_B1gzzODsk,22374
56
56
  deepdoctection/extern/deskew.py,sha256=L_jU0rXh03qzwaT79EIqE_zYMUVeFwWDbsGbtahuL2k,3124
57
57
  deepdoctection/extern/doctrocr.py,sha256=vOI67NCeEznHzdY4zu1tggwg3_PfwV1eigsr2Z8EPzc,25543
@@ -101,9 +101,9 @@ deepdoctection/mapper/pascalstruct.py,sha256=PviZjhTk4p5HDUTlF8qhWPyraKD0uh51f2h
101
101
  deepdoctection/mapper/prodigystruct.py,sha256=OWzPUbNDrqwFipH8YWI5eSxwMdA7qYczaFdsHNrE_4c,7001
102
102
  deepdoctection/mapper/pubstruct.py,sha256=UTyfUmzMSuf2BXtdYwHjK7ngsIwAxSZjwTxDtz6DySg,23416
103
103
  deepdoctection/mapper/tpstruct.py,sha256=dxtEVHYVnkH-zjjbHzkFrPgS9eheys6E-CMlsjaOnxo,5468
104
- deepdoctection/mapper/xfundstruct.py,sha256=XLUZ-yBMWtKFQ40vxHl6p8EZZvl68JdwJlV00A93Zy8,9108
104
+ deepdoctection/mapper/xfundstruct.py,sha256=JVs92T5l3HedQvdc6ZMACj2mq5Co4WCS9B-CnQ01opA,9092
105
105
  deepdoctection/pipe/__init__.py,sha256=E3cYAVWOvMzIN7jbKFyqLjFXahcFGcAGkb1uChM_XCY,1034
106
- deepdoctection/pipe/anngen.py,sha256=Hfi7C6-iOv7t8tjFoz4FuIhcz6yMZx52f5SG9bsVnLg,16365
106
+ deepdoctection/pipe/anngen.py,sha256=I7VFoTTa-sJCBwFJNaZeLD7pcrLjRPTX2I6-ZDCDHPI,16819
107
107
  deepdoctection/pipe/base.py,sha256=oszB_DepcFtORvDdGTZZPWMhk01C68RUWXHjeX7SF3M,18163
108
108
  deepdoctection/pipe/common.py,sha256=OcsqHr_c66Yqt98hFeKwaa0mciWMCauw0HZ3YnHx8MU,24586
109
109
  deepdoctection/pipe/concurrency.py,sha256=_EKZi4eCeF3mVHytZL_fMwyqa25C2aR9g8vrIFB8iR4,9780
@@ -136,14 +136,14 @@ deepdoctection/utils/logger.py,sha256=ddQ0xBStluf8OvoRlEB8YkqyRR-ZYgyJYLClTmJJMA
136
136
  deepdoctection/utils/metacfg.py,sha256=5M390--ZMoyJEt5oZOwFMGt2i8OF_ayeb0NVmUO_3OQ,7235
137
137
  deepdoctection/utils/mocks.py,sha256=IkN3-IzAl4eX0ibgKIHg8IY7ykVw6BnpF6XnxKnKaZI,2389
138
138
  deepdoctection/utils/pdf_utils.py,sha256=BrxTuY9j0COyIRkJchJ0tt2h6ZsA2an6z-H8E8QwgUQ,13490
139
- deepdoctection/utils/settings.py,sha256=OrFEe9Mll3UuDhjyS-cTCv_q1ZSr30Jpl9nQxk__t2I,12824
139
+ deepdoctection/utils/settings.py,sha256=0P6nh9-84wWyMm9J7w9I7gI1xo8mN4M4xZ0IXzcqDbE,12862
140
140
  deepdoctection/utils/tqdm.py,sha256=kx3Ivf0x85S0ZmEaN5mImu0V6isOgygOU8iyr2U99XU,1850
141
141
  deepdoctection/utils/transform.py,sha256=jgeCyQWLN9q79jCGW7jysyKUKcJ1AVMk8OslF-3fbag,16095
142
142
  deepdoctection/utils/types.py,sha256=ti4WdtIJSg3TGK_YPkkoY9PYGMnR2tTX6Xfik8U1pNk,2986
143
143
  deepdoctection/utils/utils.py,sha256=NBUb1qbx8Jm-AvYN1Sdbk0huXhbAKxZ-ZtOcMespsMM,7064
144
144
  deepdoctection/utils/viz.py,sha256=bujRIujvX317rPz4jBrj0yd3WP8wPjDUiI5GUrw9MzQ,27339
145
- deepdoctection-0.43.1.dist-info/licenses/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
146
- deepdoctection-0.43.1.dist-info/METADATA,sha256=jD_6fJFeK-4XpVsjUL0BgFooiuhFPNE9rK0RR6-_2gY,13381
147
- deepdoctection-0.43.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
148
- deepdoctection-0.43.1.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
149
- deepdoctection-0.43.1.dist-info/RECORD,,
145
+ deepdoctection-0.43.3.dist-info/licenses/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
146
+ deepdoctection-0.43.3.dist-info/METADATA,sha256=tFoZH2VB3ZkiTwzVmd1OBlg1f6eFLrKwwcLyYo6BX1g,13389
147
+ deepdoctection-0.43.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
148
+ deepdoctection-0.43.3.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
149
+ deepdoctection-0.43.3.dist-info/RECORD,,