deepdoctection 0.44.0__py3-none-any.whl → 0.44.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

@@ -25,7 +25,7 @@ from .utils.logger import LoggingRecord, logger
25
25
 
26
26
  # pylint: enable=wrong-import-position
27
27
 
28
- __version__ = "0.44.0"
28
+ __version__ = "0.44.1"
29
29
 
30
30
  _IMPORT_STRUCTURE = {
31
31
  "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
@@ -315,6 +315,8 @@ _IMPORT_STRUCTURE = {
315
315
  "get_apted_requirement",
316
316
  "distance_available",
317
317
  "get_distance_requirement",
318
+ "numpy_v1_available",
319
+ "get_numpy_v1_requirement",
318
320
  "transformers_available",
319
321
  "get_transformers_requirement",
320
322
  "detectron2_available",
@@ -42,6 +42,7 @@ from .convert import as_dict, convert_b64_to_np_array, convert_np_array_to_b64,
42
42
 
43
43
  class MetaAnnotationDict(TypedDict):
44
44
  """MetaAnnotationDict"""
45
+
45
46
  image_annotations: list[str]
46
47
  sub_categories: dict[str, dict[str, list[str]]]
47
48
  relationships: dict[str, list[str]]
@@ -42,13 +42,60 @@ from ..utils.settings import (
42
42
  get_type,
43
43
  )
44
44
  from ..utils.transform import ResizeTransform, box_to_point4, point4_to_box
45
- from ..utils.types import HTML, AnnotationDict, Chunks, ImageDict, PathLikeOrStr, PixelValues, Text_, csv
45
+ from ..utils.types import HTML, AnnotationDict, Chunks, ImageDict, PathLikeOrStr, PixelValues, csv
46
46
  from ..utils.viz import draw_boxes, interactive_imshow, viz_handler
47
47
  from .annotation import CategoryAnnotation, ContainerAnnotation, ImageAnnotation, ann_from_dict
48
48
  from .box import BoundingBox, crop_box_from_image
49
49
  from .image import Image
50
50
 
51
51
 
52
+ @dataclass(frozen=True)
53
+ class Text_:
54
+ """
55
+ Immutable dataclass for storing structured text extraction results.
56
+
57
+ Attributes:
58
+ text: The concatenated text string.
59
+ words: List of word strings.
60
+ ann_ids: List of annotation IDs for each word.
61
+ token_classes: List of token class names for each word.
62
+ token_class_ann_ids: List of annotation IDs for each token class.
63
+ token_tags: List of token tag names for each word.
64
+ token_tag_ann_ids: List of annotation IDs for each token tag.
65
+ token_class_ids: List of token class IDs.
66
+ token_tag_ids: List of token tag IDs.
67
+ """
68
+
69
+ text: str = ""
70
+ words: list[str] = field(default_factory=list)
71
+ ann_ids: list[str] = field(default_factory=list)
72
+ token_classes: list[str] = field(default_factory=list)
73
+ token_class_ann_ids: list[str] = field(default_factory=list)
74
+ token_tags: list[str] = field(default_factory=list)
75
+ token_tag_ann_ids: list[str] = field(default_factory=list)
76
+ token_class_ids: list[str] = field(default_factory=list)
77
+ token_tag_ids: list[str] = field(default_factory=list)
78
+
79
+ def as_dict(self) -> dict[str, Union[list[str], str]]:
80
+ """
81
+ Returns the Text_ as a dictionary.
82
+
83
+ Returns:
84
+ A dictionary representation of the Text_ dataclass.
85
+ """
86
+ return {
87
+ "text": self.text,
88
+ "words": self.words,
89
+ "ann_ids": self.ann_ids,
90
+ "token_classes": self.token_classes,
91
+ "token_class_ann_ids": self.token_class_ann_ids,
92
+ "token_tags": self.token_tags,
93
+ "token_tag_ann_ids": self.token_tag_ann_ids,
94
+ "token_class_ids": self.token_class_ids,
95
+ "token_tag_ids": self.token_tag_ids,
96
+ }
97
+
98
+
52
99
  class ImageAnnotationBaseView(ImageAnnotation):
53
100
  """
54
101
  Consumption class for having easier access to categories added to an `ImageAnnotation`.
@@ -263,13 +310,28 @@ class Layout(ImageAnnotationBaseView):
263
310
  """
264
311
  words = self.get_ordered_words()
265
312
  if words:
266
- characters, ann_ids, token_classes, token_tags, token_classes_ids, token_tag_ids = zip(
313
+ (
314
+ characters,
315
+ ann_ids,
316
+ token_classes,
317
+ token_class_ann_ids,
318
+ token_tags,
319
+ token_tag_ann_ids,
320
+ token_classes_ids,
321
+ token_tag_ids,
322
+ ) = map(list, zip(
267
323
  *[
268
324
  (
269
325
  word.characters,
270
326
  word.annotation_id,
271
327
  word.token_class,
328
+ word.get_sub_category(WordType.TOKEN_CLASS).annotation_id
329
+ if WordType.TOKEN_CLASS in word.sub_categories
330
+ else None,
272
331
  word.token_tag,
332
+ word.get_sub_category(WordType.TOKEN_TAG).annotation_id
333
+ if WordType.TOKEN_TAG in word.sub_categories
334
+ else None,
273
335
  word.get_sub_category(WordType.TOKEN_CLASS).category_id
274
336
  if WordType.TOKEN_CLASS in word.sub_categories
275
337
  else None,
@@ -279,25 +341,40 @@ class Layout(ImageAnnotationBaseView):
279
341
  )
280
342
  for word in words
281
343
  ]
282
- )
344
+ ))
283
345
  else:
284
- characters, ann_ids, token_classes, token_tags, token_classes_ids, token_tag_ids = (
285
- [], # type: ignore
286
- [], # type: ignore
287
- [], # type: ignore
288
- [], # type: ignore
289
- [], # type: ignore
290
- [], # type: ignore
346
+ (
347
+ characters,
348
+ ann_ids,
349
+ token_classes,
350
+ token_class_ann_ids,
351
+ token_tags,
352
+ token_tag_ann_ids,
353
+ token_classes_ids,
354
+ token_tag_ids,
355
+ ) = (
356
+ [],
357
+ [],
358
+ [],
359
+ [],
360
+ [],
361
+ [],
362
+ [],
363
+ [],
291
364
  )
292
- return {
293
- "text": " ".join(characters),
294
- "words": characters,
295
- "ann_ids": ann_ids,
296
- "token_classes": token_classes,
297
- "token_tags": token_tags,
298
- "token_class_ids": token_classes_ids,
299
- "token_tag_ids": token_tag_ids,
300
- }
365
+
366
+ return Text_(
367
+ text=" ".join(characters), # type: ignore
368
+ words=characters, # type: ignore
369
+ ann_ids=ann_ids, # type: ignore
370
+ token_classes=token_classes, # type: ignore
371
+ token_class_ann_ids=token_class_ann_ids, # type: ignore
372
+ token_tags=token_tags, # type: ignore
373
+ token_tag_ann_ids=token_tag_ann_ids, # type: ignore
374
+ token_class_ids=token_classes_ids, # type: ignore
375
+ token_tag_ids=token_tag_ids, # type: ignore
376
+ )
377
+
301
378
 
302
379
  def get_attribute_names(self) -> set[str]:
303
380
  attr_names = (
@@ -626,26 +703,34 @@ class Table(Layout):
626
703
  words: list[str] = []
627
704
  ann_ids: list[str] = []
628
705
  token_classes: list[str] = []
706
+ token_class_ann_ids: list[str] = []
629
707
  token_tags: list[str] = []
708
+ token_tag_ann_ids: list[str] = []
630
709
  token_class_ids: list[str] = []
631
710
  token_tag_ids: list[str] = []
632
711
  for cell in cells:
633
- text.append(cell.text_["text"])
634
- words.extend(cell.text_["words"])
635
- ann_ids.extend(cell.text_["ann_ids"])
636
- token_classes.extend(cell.text_["token_classes"])
637
- token_tags.extend(cell.text_["token_tags"])
638
- token_class_ids.extend(cell.text_["token_class_ids"])
639
- token_tag_ids.extend(cell.text_["token_tag_ids"])
640
- return {
641
- "text": " ".join(text),
642
- "words": words,
643
- "ann_ids": ann_ids,
644
- "token_classes": token_classes,
645
- "token_tags": token_tags,
646
- "token_class_ids": token_class_ids,
647
- "token_tag_ids": token_tag_ids,
648
- }
712
+ text_ = cell.text_
713
+ text.append(text_.text)
714
+ words.extend(text_.words)
715
+ ann_ids.extend(text_.ann_ids)
716
+ token_classes.extend(text_.token_classes)
717
+ token_class_ann_ids.extend(text_.token_class_ann_ids)
718
+ token_tags.extend(text_.token_tags)
719
+ token_tag_ann_ids.extend(text_.token_tag_ann_ids)
720
+ token_class_ids.extend(text_.token_class_ids)
721
+ token_tag_ids.extend(text_.token_tag_ids)
722
+ return Text_(
723
+ text=" ".join(text),
724
+ words=words,
725
+ ann_ids=ann_ids,
726
+ token_classes=token_classes,
727
+ token_class_ann_ids=token_class_ann_ids,
728
+ token_tags=token_tags,
729
+ token_tag_ann_ids=token_tag_ann_ids,
730
+ token_class_ids=token_class_ids,
731
+ token_tag_ids=token_tag_ids,
732
+ )
733
+
649
734
 
650
735
  @property
651
736
  def words(self) -> list[ImageAnnotationBaseView]:
@@ -1053,7 +1138,7 @@ class Page(Image):
1053
1138
 
1054
1139
  ```python
1055
1140
  {"text": text string,
1056
- "text_list": list of single words,
1141
+ "words": list of single words,
1057
1142
  "annotation_ids": word annotation ids}
1058
1143
  ```
1059
1144
  """
@@ -1062,26 +1147,34 @@ class Page(Image):
1062
1147
  words: list[str] = []
1063
1148
  ann_ids: list[str] = []
1064
1149
  token_classes: list[str] = []
1150
+ token_class_ann_ids: list[str] = []
1065
1151
  token_tags: list[str] = []
1152
+ token_tag_ann_ids: list[str] = []
1066
1153
  token_class_ids: list[str] = []
1067
1154
  token_tag_ids: list[str] = []
1068
1155
  for block in block_with_order:
1069
- text.append(block.text_["text"]) # type: ignore
1070
- words.extend(block.text_["words"]) # type: ignore
1071
- ann_ids.extend(block.text_["ann_ids"]) # type: ignore
1072
- token_classes.extend(block.text_["token_classes"]) # type: ignore
1073
- token_tags.extend(block.text_["token_tags"]) # type: ignore
1074
- token_class_ids.extend(block.text_["token_class_ids"]) # type: ignore
1075
- token_tag_ids.extend(block.text_["token_tag_ids"]) # type: ignore
1076
- return {
1077
- "text": " ".join(text),
1078
- "words": words,
1079
- "ann_ids": ann_ids,
1080
- "token_classes": token_classes,
1081
- "token_tags": token_tags,
1082
- "token_class_ids": token_class_ids,
1083
- "token_tag_ids": token_tag_ids,
1084
- }
1156
+ text_ = block.text_
1157
+ text.append(text_.text) # type: ignore
1158
+ words.extend(text_.words) # type: ignore
1159
+ ann_ids.extend(text_.ann_ids) # type: ignore
1160
+ token_classes.extend(text_.token_classes) # type: ignore
1161
+ token_class_ann_ids.extend(text_.token_class_ann_ids) # type: ignore
1162
+ token_tags.extend(text_.token_tags) # type: ignore
1163
+ token_tag_ann_ids.extend(text_.token_tag_ann_ids) # type: ignore
1164
+ token_class_ids.extend(text_.token_class_ids) # type: ignore
1165
+ token_tag_ids.extend(text_.token_tag_ids) # type: ignore
1166
+ return Text_(
1167
+ text=" ".join(text),
1168
+ words=words,
1169
+ ann_ids=ann_ids,
1170
+ token_classes=token_classes,
1171
+ token_class_ann_ids=token_class_ann_ids,
1172
+ token_tags=token_tags,
1173
+ token_tag_ann_ids=token_tag_ann_ids,
1174
+ token_class_ids=token_class_ids,
1175
+ token_tag_ids=token_tag_ann_ids,
1176
+ )
1177
+
1085
1178
 
1086
1179
  def get_layout_context(self, annotation_id: str, context_size: int = 3) -> list[ImageAnnotationBaseView]:
1087
1180
  """
@@ -408,6 +408,7 @@ class MergeDataset(DatasetBase):
408
408
 
409
409
  class DatasetCardDict(TypedDict):
410
410
  """DatasetCardDict"""
411
+
411
412
  name: str
412
413
  dataset_type: Union[str, Any]
413
414
  location: str
@@ -29,7 +29,8 @@ from typing import Any, Mapping, Union
29
29
 
30
30
  from lazy_imports import try_import
31
31
 
32
- from ..utils.file_utils import Requirement, get_fasttext_requirement
32
+ from ..utils.develop import deprecated
33
+ from ..utils.file_utils import Requirement, get_fasttext_requirement, get_numpy_v1_requirement
33
34
  from ..utils.settings import TypeOrStr, get_type
34
35
  from ..utils.types import PathLikeOrStr
35
36
  from .base import DetectionResult, LanguageDetector, ModelCategories
@@ -69,6 +70,7 @@ class FasttextLangDetectorMixin(LanguageDetector, ABC):
69
70
  return "fasttext_" + "_".join(Path(path_weights).parts[-2:])
70
71
 
71
72
 
73
+ @deprecated("As FastText archived, it will be deprecated in the near future.", "2025-08-17")
72
74
  class FasttextLangDetector(FasttextLangDetectorMixin):
73
75
  """
74
76
  Fasttext language detector wrapper. Two models provided in the fasttext library can be used to identify languages.
@@ -114,7 +116,7 @@ class FasttextLangDetector(FasttextLangDetectorMixin):
114
116
 
115
117
  @classmethod
116
118
  def get_requirements(cls) -> list[Requirement]:
117
- return [get_fasttext_requirement()]
119
+ return [get_numpy_v1_requirement(), get_fasttext_requirement()]
118
120
 
119
121
  def clone(self) -> FasttextLangDetector:
120
122
  return self.__class__(self.path_weights, self.categories.get_categories(), self.categories_orig)
@@ -806,17 +806,17 @@ def image_to_raw_lm_features(
806
806
  raw_features["image_id"] = page.image_id
807
807
  raw_features["width"] = page.width
808
808
  raw_features["height"] = page.height
809
- raw_features["ann_ids"] = text_["ann_ids"]
810
- raw_features["words"] = text_["words"]
809
+ raw_features["ann_ids"] = text_.ann_ids
810
+ raw_features["words"] = text_.words
811
811
  # We use a dummy bounding box for all bounding boxes so that we can pass the raw features to
812
812
  # raw_features_to_layoutlm_features
813
- raw_features["bbox"] = [_CLS_BOX] * len(text_["words"])
813
+ raw_features["bbox"] = [_CLS_BOX] * len(text_.words)
814
814
  raw_features["dataset_type"] = dataset_type
815
815
 
816
- if use_token_tag and text_["token_tags"]:
817
- raw_features["labels"] = text_["token_tags"]
818
- elif text_["token_classes"]:
819
- raw_features["labels"] = text_["token_classes"]
816
+ if use_token_tag and text_.token_tags:
817
+ raw_features["labels"] = text_.token_tags
818
+ elif text_.token_classes:
819
+ raw_features["labels"] = text_.token_classes
820
820
  elif page.document_type is not None:
821
821
  document_type_id = page.image_orig.summary.get_sub_category(PageType.DOCUMENT_TYPE).category_id - 1
822
822
  raw_features["labels"] = [document_type_id]
@@ -18,6 +18,7 @@ from types import ModuleType
18
18
  from typing import Any, Union, no_type_check
19
19
 
20
20
  import importlib_metadata
21
+ import numpy as np
21
22
  from packaging import version
22
23
 
23
24
  from .error import DependencyError
@@ -249,6 +250,39 @@ def get_distance_requirement() -> Requirement:
249
250
  return "distance", distance_available(), _DISTANCE_ERR_MSG
250
251
 
251
252
 
253
+ _NUMPY_V1_ERR_MSG = "numpy v1 must be installed."
254
+
255
+
256
+ def numpy_v1_available() -> bool:
257
+ """
258
+ Check if the installed NumPy version is version 1.
259
+
260
+ This helper function determines whether the currently installed version
261
+ of NumPy is version 1 by inspecting its major version number.
262
+
263
+ Returns:
264
+ True if the installed NumPy version is 1, otherwise False
265
+ """
266
+ major_version = np.__version__.split('.', maxsplit=1)[0]
267
+ print(f"major version: {major_version}")
268
+ if major_version in (1, "1"):
269
+ return True
270
+ return False
271
+
272
+
273
+ def get_numpy_v1_requirement() -> Requirement:
274
+ """
275
+ Retrieves the requirement details for numpy version 1.
276
+
277
+ Returns:
278
+ A tuple containing three elements:
279
+ - The requirement name for numpy version 1.
280
+ - A Boolean value indicating whether numpy version 1 is available.
281
+ - An error message in case numpy version 1 is not available.
282
+ """
283
+ return "numpy v1", numpy_v1_available(), _NUMPY_V1_ERR_MSG
284
+
285
+
252
286
  # Transformers
253
287
  _TRANSFORMERS_AVAILABLE = importlib.util.find_spec("transformers") is not None
254
288
  _TRANSFORMERS_ERR_MSG = f"transformers must be installed. {_GENERIC_ERR_MSG}"
@@ -70,7 +70,6 @@ AnnotationDict: TypeAlias = dict[str, Any]
70
70
  ImageDict: TypeAlias = dict[str, Any]
71
71
 
72
72
  # We use these types for output types of the Page object
73
- Text_: TypeAlias = dict[str, Any]
74
73
  HTML: TypeAlias = str
75
74
  csv: TypeAlias = list[list[str]]
76
75
  Chunks: TypeAlias = list[tuple[str, str, int, str, str, str, str]]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: deepdoctection
3
- Version: 0.44.0
3
+ Version: 0.44.1
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -27,7 +27,7 @@ Requires-Dist: networkx>=2.7.1
27
27
  Requires-Dist: numpy<2.0,>=1.21
28
28
  Requires-Dist: packaging>=20.0
29
29
  Requires-Dist: Pillow>=10.0.0
30
- Requires-Dist: pypdf>=3.16.0
30
+ Requires-Dist: pypdf>=6.0.0
31
31
  Requires-Dist: pypdfium2>=4.30.0
32
32
  Requires-Dist: pyyaml>=6.0.1
33
33
  Requires-Dist: pyzmq>=16
@@ -46,7 +46,7 @@ Requires-Dist: networkx>=2.7.1; extra == "tf"
46
46
  Requires-Dist: numpy<2.0,>=1.21; extra == "tf"
47
47
  Requires-Dist: packaging>=20.0; extra == "tf"
48
48
  Requires-Dist: Pillow>=10.0.0; extra == "tf"
49
- Requires-Dist: pypdf>=3.16.0; extra == "tf"
49
+ Requires-Dist: pypdf>=6.0.0; extra == "tf"
50
50
  Requires-Dist: pypdfium2>=4.30.0; extra == "tf"
51
51
  Requires-Dist: pyyaml>=6.0.1; extra == "tf"
52
52
  Requires-Dist: pyzmq>=16; extra == "tf"
@@ -78,7 +78,7 @@ Requires-Dist: networkx>=2.7.1; extra == "pt"
78
78
  Requires-Dist: numpy<2.0,>=1.21; extra == "pt"
79
79
  Requires-Dist: packaging>=20.0; extra == "pt"
80
80
  Requires-Dist: Pillow>=10.0.0; extra == "pt"
81
- Requires-Dist: pypdf>=3.16.0; extra == "pt"
81
+ Requires-Dist: pypdf>=6.0.0; extra == "pt"
82
82
  Requires-Dist: pypdfium2>=4.30.0; extra == "pt"
83
83
  Requires-Dist: pyyaml>=6.0.1; extra == "pt"
84
84
  Requires-Dist: pyzmq>=16; extra == "pt"
@@ -1,4 +1,4 @@
1
- deepdoctection/__init__.py,sha256=1LqCXNB-9wXMO5dvw_6i-4tpEeTm507uPaWLgO4zFaQ,13013
1
+ deepdoctection/__init__.py,sha256=fVndKpw3xg78BcWFoq5ic79tY42QpsL9L5GLcfgG1CU,13079
2
2
  deepdoctection/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  deepdoctection/analyzer/__init__.py,sha256=wg0BcFwdCeREwzZfa--Yx8HUJ9LPv5z5PmLwtkZdPH8,772
4
4
  deepdoctection/analyzer/config.py,sha256=lTfBKwzm9iVKCZoq7-FcoYUfrcgWmRknwYUzD5Jx-0U,41762
@@ -20,11 +20,11 @@ deepdoctection/datapoint/__init__.py,sha256=ruyV4DTOkUFWhkJ5VO_eJmrAxrzgygzTtr4W
20
20
  deepdoctection/datapoint/annotation.py,sha256=f32BNmzUGJoNMeGst2RGC2jmjJpzzjxyBRKFG8FCubY,23092
21
21
  deepdoctection/datapoint/box.py,sha256=QAS8sK2Ge4_ysW6zOYkLlzNwhSyw_mhYcYsxscClEno,31453
22
22
  deepdoctection/datapoint/convert.py,sha256=6ENXX3tBdY8ogb2NBPxsOsQMGnQux8ol5nrUfWS5tYE,7352
23
- deepdoctection/datapoint/image.py,sha256=ZydLd_JLubQJ3v84CzX2zwM7f-3VF2lgVb4-zZPt-xk,37186
24
- deepdoctection/datapoint/view.py,sha256=5xwT0wjySUAw8FXpGMiZAccAorlMy9eRKmsoaLWafbo,58460
23
+ deepdoctection/datapoint/image.py,sha256=N5VH2oeKQWIt5FQvFaeu-FL8eckv7LQS0ZJsHSuVwjI,37187
24
+ deepdoctection/datapoint/view.py,sha256=R-xv9SzK-7sZ6-kbok9K64OldkRj6n_SeVdRRZqoZI4,61730
25
25
  deepdoctection/datasets/__init__.py,sha256=4ifjIwWCPYiS31GzUlVDScrkNOrb1eo5xHlRXNyg_58,994
26
26
  deepdoctection/datasets/adapter.py,sha256=VSLM_980aHi4TpgOxfxiBHiF_fUXyh348PXet6zTo-4,7779
27
- deepdoctection/datasets/base.py,sha256=fGXMB7zD-_W9BXdmOwBrMft3BvnH9exKkNoHNpQgGfc,30669
27
+ deepdoctection/datasets/base.py,sha256=oLv2o9QiKVN44kO7Llj-z00_TQRYBsVlvBL3ZQoscUQ,30670
28
28
  deepdoctection/datasets/dataflow_builder.py,sha256=0vwkItr0wVbKPtTXoS6uJLO9QQNWbS0Ri7CySuywWxU,4186
29
29
  deepdoctection/datasets/info.py,sha256=DLRYq3cHp3L34CcSXPUJ8j8wguJp2aVdoH-AhODNLBA,20814
30
30
  deepdoctection/datasets/registry.py,sha256=qYRVycNYFeAzWB7jENGYzokgyzIEvTRb49he2UmPUe8,3451
@@ -55,7 +55,7 @@ deepdoctection/extern/base.py,sha256=LomTR9HXcBU55MPDIA8D1rIamk7DUmToJmgcRXzCoeU
55
55
  deepdoctection/extern/d2detect.py,sha256=I0oEkprr5iVpKpM3a3nknAU-sXwNoDQdp_B1gzzODsk,22374
56
56
  deepdoctection/extern/deskew.py,sha256=L_jU0rXh03qzwaT79EIqE_zYMUVeFwWDbsGbtahuL2k,3124
57
57
  deepdoctection/extern/doctrocr.py,sha256=jB0mnvGmmygoUu9e9zw2_HtAgQUdCJHbxMSt1cfK5bA,25381
58
- deepdoctection/extern/fastlang.py,sha256=4D9A-_hTXUcvXG6IJJknX34LrD71v08XtNdWgvXD7fE,4736
58
+ deepdoctection/extern/fastlang.py,sha256=raLC2CgHiKmkSA8wZkuvsb7Ob3w3UbLNZCUd0b09hWA,4922
59
59
  deepdoctection/extern/hfdetr.py,sha256=N3eLNI5BsQS9_7YZyBeWndSgUydJij7ugZA9p4V1xaQ,14316
60
60
  deepdoctection/extern/hflayoutlm.py,sha256=3mZZ3byn00jSrLWO2vZFas9j4VrhbYQNmF1mwPG2ElQ,59642
61
61
  deepdoctection/extern/hflm.py,sha256=y-9brzmT2NYtFoNcWHABNg2ZZQXSOP9CyqtT1OoeV9U,9754
@@ -93,7 +93,7 @@ deepdoctection/mapper/cats.py,sha256=YEnf5uOvyf_UFcEtN5ddJxF7LGwkwdPWjBE14QvSPV4
93
93
  deepdoctection/mapper/cocostruct.py,sha256=BbykSMXklsr6YJ4HRDYEABL1NUxndZvhKPr683aIG_A,6287
94
94
  deepdoctection/mapper/d2struct.py,sha256=XiIuQAcC-ekn97RHz2hALcD02Mpdze7Lrfm4vPEB9Iw,11481
95
95
  deepdoctection/mapper/hfstruct.py,sha256=15eOUwQ_f3rflZJdnQzIaN7tpj8dhDKDRlAykOtiDsk,5727
96
- deepdoctection/mapper/laylmstruct.py,sha256=_10260AtRcF2xdkALz2JatiAKMcNIwNMbJgO__hOPN8,42094
96
+ deepdoctection/mapper/laylmstruct.py,sha256=ILlNzlMAmf6MtEy3bUUD9fv2DZV1XLBTE-vB1i9Aa9c,42073
97
97
  deepdoctection/mapper/maputils.py,sha256=21Oyt4I8IV5jSgtplBP-opPTKk3idgJnA3s8ICPvMvc,8977
98
98
  deepdoctection/mapper/match.py,sha256=Q_Dq95IpO9o0gRKk-Jg7ua0eiZ2rMHUhIhwXygT2aGU,10202
99
99
  deepdoctection/mapper/misc.py,sha256=LYSORlUR7sn0Qf-wgpTyVNwGgnpuKN9ln7TAiFrbBrQ,7366
@@ -129,7 +129,7 @@ deepdoctection/utils/context.py,sha256=5QfdzxsiSPnNs1qtJdgjguIoD8srLQ2W8oeDzwp9F
129
129
  deepdoctection/utils/develop.py,sha256=4myrqBDypM6tQ2a2Jo3Q20RuE_W2czykpXBwgXPrxNw,3568
130
130
  deepdoctection/utils/env_info.py,sha256=b1WohrfQuoL-BPN0_s8Rjtwzx-WKvCyaX2I4qYl1Emc,19878
131
131
  deepdoctection/utils/error.py,sha256=sIry8F5MZ0yLvKfAwVz90IorKWVvjoRqcC0L8qq8mLk,2480
132
- deepdoctection/utils/file_utils.py,sha256=EepfAZVADaqpBdVq2LOJXLFLsMd_oZF_FAKUHOAhiZ0,25246
132
+ deepdoctection/utils/file_utils.py,sha256=b-Be3qMhBsrQxpEvYM-IKBQdWqBX2HGhq_Gtk12BL3U,26276
133
133
  deepdoctection/utils/fs.py,sha256=KTS9FJzZk9le_vmIPr9IisJw0AyTfjkyX1KoWQy4DNs,12729
134
134
  deepdoctection/utils/identifier.py,sha256=Jt12MeZf7eC1qciY5Fp_AYUGxYVcjsy7xNBUvJil7dU,2270
135
135
  deepdoctection/utils/logger.py,sha256=ddQ0xBStluf8OvoRlEB8YkqyRR-ZYgyJYLClTmJJMAU,10290
@@ -139,11 +139,11 @@ deepdoctection/utils/pdf_utils.py,sha256=BrxTuY9j0COyIRkJchJ0tt2h6ZsA2an6z-H8E8Q
139
139
  deepdoctection/utils/settings.py,sha256=0P6nh9-84wWyMm9J7w9I7gI1xo8mN4M4xZ0IXzcqDbE,12862
140
140
  deepdoctection/utils/tqdm.py,sha256=kx3Ivf0x85S0ZmEaN5mImu0V6isOgygOU8iyr2U99XU,1850
141
141
  deepdoctection/utils/transform.py,sha256=jgeCyQWLN9q79jCGW7jysyKUKcJ1AVMk8OslF-3fbag,16095
142
- deepdoctection/utils/types.py,sha256=ti4WdtIJSg3TGK_YPkkoY9PYGMnR2tTX6Xfik8U1pNk,2986
142
+ deepdoctection/utils/types.py,sha256=Nsr2J7XSZazXho94y0oc01LBQxh0ve67c4Yx2gMlSXU,2952
143
143
  deepdoctection/utils/utils.py,sha256=NBUb1qbx8Jm-AvYN1Sdbk0huXhbAKxZ-ZtOcMespsMM,7064
144
144
  deepdoctection/utils/viz.py,sha256=bujRIujvX317rPz4jBrj0yd3WP8wPjDUiI5GUrw9MzQ,27339
145
- deepdoctection-0.44.0.dist-info/licenses/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
146
- deepdoctection-0.44.0.dist-info/METADATA,sha256=7cCCLg-Z2eTzBxJxXlN79K1WlBmFYtMlK8_UloBYqo4,14796
147
- deepdoctection-0.44.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
148
- deepdoctection-0.44.0.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
149
- deepdoctection-0.44.0.dist-info/RECORD,,
145
+ deepdoctection-0.44.1.dist-info/licenses/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
146
+ deepdoctection-0.44.1.dist-info/METADATA,sha256=6Kcja098MH-8gLa-icffL8NdVOMcQkSzTznp8aIHir8,14793
147
+ deepdoctection-0.44.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
148
+ deepdoctection-0.44.1.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
149
+ deepdoctection-0.44.1.dist-info/RECORD,,