deepdoctection 0.37.1__py3-none-any.whl → 0.37.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

@@ -24,7 +24,7 @@ from .utils.logger import LoggingRecord, logger
24
24
 
25
25
  # pylint: enable=wrong-import-position
26
26
 
27
- __version__ = "0.37.1"
27
+ __version__ = "0.37.2"
28
28
 
29
29
  _IMPORT_STRUCTURE = {
30
30
  "analyzer": ["config_sanity_checks", "get_dd_analyzer", "ServiceFactory"],
@@ -32,9 +32,9 @@ from numpy import uint8
32
32
 
33
33
  from ..utils.error import AnnotationError, BoundingBoxError, ImageError, UUIDError
34
34
  from ..utils.identifier import get_uuid, is_uuid_like
35
+ from ..utils.logger import LoggingRecord, logger
35
36
  from ..utils.settings import ObjectTypes, SummaryType, get_type
36
37
  from ..utils.types import ImageDict, PathLikeOrStr, PixelValues
37
- from ..utils.logger import LoggingRecord, logger
38
38
  from .annotation import Annotation, AnnotationMap, BoundingBox, CategoryAnnotation, ImageAnnotation
39
39
  from .box import crop_box_from_image, global_to_local_coords, intersection_box
40
40
  from .convert import as_dict, convert_b64_to_np_array, convert_np_array_to_b64, convert_pdf_bytes_to_np_array_v2
@@ -477,7 +477,9 @@ class Image:
477
477
  if service_id not in service_id_to_annotation_id:
478
478
  logger.info(
479
479
  LoggingRecord(
480
- f"Service_id {service_id} for image_id: {self.image_id} not found. Skipping removal."))
480
+ f"Service_id {service_id} for image_id: {self.image_id} not found. Skipping removal."
481
+ )
482
+ )
481
483
 
482
484
  annotation_ids = service_id_to_annotation_id.get(service_id, [])
483
485
 
@@ -48,7 +48,7 @@ with try_import() as pt_import_guard:
48
48
  import torch.nn.functional as F
49
49
 
50
50
  with try_import() as tr_import_guard:
51
- from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
51
+ from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD # type:ignore
52
52
  from transformers import (
53
53
  LayoutLMForSequenceClassification,
54
54
  LayoutLMForTokenClassification,
@@ -161,16 +161,17 @@ class DoctectionPipe(Pipeline):
161
161
 
162
162
  super().__init__(pipeline_component_list)
163
163
 
164
- def _entry(self, **kwargs: Union[str, bytes, DataFlow, bool, int, PathLikeOrStr, Union[str, List[str]]]) \
165
- -> DataFlow:
164
+ def _entry(
165
+ self, **kwargs: Union[str, bytes, DataFlow, bool, int, PathLikeOrStr, Union[str, List[str]]]
166
+ ) -> DataFlow:
166
167
  path, file_type, shuffle, max_datapoints, doc_path, dataset_dataflow, b_bytes = _collect_from_kwargs(**kwargs)
167
168
 
168
169
  df: DataFlow
169
170
 
170
171
  if isinstance(b_bytes, bytes):
171
- df = DoctectionPipe.bytes_to_dataflow(path=doc_path if path is None else path,
172
- b_bytes=b_bytes,
173
- file_type=file_type)
172
+ df = DoctectionPipe.bytes_to_dataflow(
173
+ path=doc_path if path is None else path, b_bytes=b_bytes, file_type=file_type
174
+ )
174
175
 
175
176
  elif isinstance(path, (str, Path)):
176
177
  if not isinstance(file_type, (str, list)):
@@ -161,11 +161,12 @@ class LayoutLMTrainer(Trainer):
161
161
  model: Union[PreTrainedModel, nn.Module],
162
162
  args: TrainingArguments,
163
163
  data_collator: LayoutLMDataCollator,
164
- train_dataset: Dataset[Any],
164
+ train_dataset: DatasetAdapter,
165
+ eval_dataset: Optional[DatasetBase] = None,
165
166
  ):
166
167
  self.evaluator: Optional[Evaluator] = None
167
168
  self.build_eval_kwargs: Optional[dict[str, Any]] = None
168
- super().__init__(model, args, data_collator, train_dataset)
169
+ super().__init__(model, args, data_collator, train_dataset, eval_dataset=eval_dataset)
169
170
 
170
171
  def setup_evaluator(
171
172
  self,
@@ -472,7 +473,7 @@ def train_hf_layoutlm(
472
473
  max_batch_size=max_batch_size, # type: ignore
473
474
  remove_bounding_box_features=remove_box_features,
474
475
  )
475
- trainer = LayoutLMTrainer(model, arguments, data_collator, dataset)
476
+ trainer = LayoutLMTrainer(model, arguments, data_collator, dataset, eval_dataset=dataset_val)
476
477
 
477
478
  if arguments.evaluation_strategy in (IntervalStrategy.STEPS,):
478
479
  assert metric is not None # silence mypy
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deepdoctection
3
- Version: 0.37.1
3
+ Version: 0.37.2
4
4
  Summary: Repository for Document AI
5
5
  Home-page: https://github.com/deepdoctection/deepdoctection
6
6
  Author: Dr. Janis Meyer
@@ -1,4 +1,4 @@
1
- deepdoctection/__init__.py,sha256=i23UZBqMlkcvUILJxvUQAdj-3d2yV9edzxFsC5RoMHA,12655
1
+ deepdoctection/__init__.py,sha256=T2sHOc6ZPpx44hWbarp0i_QlAqm0dEmzs7HVg2mL_nM,12655
2
2
  deepdoctection/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  deepdoctection/analyzer/__init__.py,sha256=icClxrd20XutD6LxLgEPIWceSs4j_QfI3szCE-9BL2w,729
4
4
  deepdoctection/analyzer/_config.py,sha256=NZl_REM8Ge2xfxvHN-mZR5KURcHfZii3xfMlKQwckbA,4864
@@ -19,7 +19,7 @@ deepdoctection/datapoint/__init__.py,sha256=3K406GbOPhoEp8koVaSbMocmSsmWifnSZ1SP
19
19
  deepdoctection/datapoint/annotation.py,sha256=FEgz4COxVDfjic0gG7kS6iHnWLBIgFnquQ63Cbj2a4Y,22531
20
20
  deepdoctection/datapoint/box.py,sha256=tkFuVM6xfx2jL7W4UED4qHXV572LSRdIsVJbrEiyIxI,23524
21
21
  deepdoctection/datapoint/convert.py,sha256=O7920pIomyEkzXwxpFsrzfhn7Pl6UzVGhNzv90VcuKU,7099
22
- deepdoctection/datapoint/image.py,sha256=AM34br9eM1syTIUXcJIrAaP7pEnejbUl-w-CK5pr9z8,33233
22
+ deepdoctection/datapoint/image.py,sha256=UDBKXJJpuKAUx0J-DGjvLGqrMV4N3kLpksJYxoVkong,33279
23
23
  deepdoctection/datapoint/view.py,sha256=1rVMuqucCrI5zlwyXMADJQBV38V_zSNFqFyBi3cMA1E,44914
24
24
  deepdoctection/datasets/__init__.py,sha256=-A3aR90aDsHPmVM35JavfnQ2itYSCn3ujl4krRni1QU,1076
25
25
  deepdoctection/datasets/adapter.py,sha256=Ly_vbOAgVI73V41FUccnSX1ECTOyesW_qsuvQuvOZbw,7796
@@ -56,7 +56,7 @@ deepdoctection/extern/deskew.py,sha256=sPoixu8S9he-0wbs-jgxtPE2V9BiP4-3uZlb6F5Y1
56
56
  deepdoctection/extern/doctrocr.py,sha256=T3_tvlih22_dVCBZypS1Y8tjQQB1fkAxIbGdUGHIapQ,24473
57
57
  deepdoctection/extern/fastlang.py,sha256=F4gK-SEwcCujjxH327ZDzMGWToJ49xS_dCKcePQ9IlY,4780
58
58
  deepdoctection/extern/hfdetr.py,sha256=1NPW_u5eH2tP3ixZ91l4WR-O-wLVcrFsLWA7BqID0oM,12055
59
- deepdoctection/extern/hflayoutlm.py,sha256=_OUeQsbNgfjbV7TPYBjkqc4HoTBQqkOINnwpewPJpl8,56494
59
+ deepdoctection/extern/hflayoutlm.py,sha256=T1IBm3C8CtG97-tauo03YqhUac6xdFc2y345BWVMajQ,56509
60
60
  deepdoctection/extern/hflm.py,sha256=kwS6kcSlY_2m9u0RzBLTRq-UMM7c1PhyUaDTvSdejus,9217
61
61
  deepdoctection/extern/model.py,sha256=ViHHKPvbGmLCPw7ZESv_rmjlkA90UiBU6oZiHOMqNSw,59869
62
62
  deepdoctection/extern/pdftext.py,sha256=KS_t27SUiYn_IOS_J2lF9lSSo22vLagxmxvYCY3CqXA,7228
@@ -106,7 +106,7 @@ deepdoctection/pipe/anngen.py,sha256=3319l4aaXzcY4w6ItVBNPX8LGS5fHFDVtyVY9KMefac
106
106
  deepdoctection/pipe/base.py,sha256=ynNg5SSRuUVxN69VWOO3Oi7WSeGrYwn3A56NQMBJDvw,14222
107
107
  deepdoctection/pipe/common.py,sha256=haOb4v0jLX3r41BSC8cVseX2E320_HkSrGlZsQiKE2g,17728
108
108
  deepdoctection/pipe/concurrency.py,sha256=AAKRsVgaBEYNluntbDa46SBF1JZ_XqnWLDSWrNvAzEo,9657
109
- deepdoctection/pipe/doctectionpipe.py,sha256=uhsrSuwaHcOMj8b8i6wCpPaZlSxCTaeHVhMokJ8vRSI,11835
109
+ deepdoctection/pipe/doctectionpipe.py,sha256=wCg96P9Pb54i5AVgG02b4FljobM64_qEML_GxiULy-4,11765
110
110
  deepdoctection/pipe/language.py,sha256=5zI0UQC6Fh12_r2pfVL42HoCGz2hpHrOhpXAn5m-rYw,5451
111
111
  deepdoctection/pipe/layout.py,sha256=xIhnJpyUSbvLbhTXyAKXY1hmG9352jihGYFSclTH_1g,5567
112
112
  deepdoctection/pipe/lm.py,sha256=Sp-b7smeslNDyioEfNjuNBUxAuFKn3-OKpCZkGXri_c,16643
@@ -120,7 +120,7 @@ deepdoctection/pipe/transform.py,sha256=9Om7X7hJeL4jgUwHM1CHa4sb5v7Qo1PtVG0ls_3n
120
120
  deepdoctection/train/__init__.py,sha256=YFTRAZF1F7cEAKTdAIi1BLyYb6rSRcwq09Ui5Lu8d6E,1071
121
121
  deepdoctection/train/d2_frcnn_train.py,sha256=sFc_G-mEpaM8d1CCE0_6Gl4nBh11X2RYRBA3p_ylFJQ,16000
122
122
  deepdoctection/train/hf_detr_train.py,sha256=8ydysxzOPE_IPoNFGaHb7PbKr9Nbl41rcY4lbylQavU,10783
123
- deepdoctection/train/hf_layoutlm_train.py,sha256=e3pekLfe2KeYAI04COiTTL3KKiLDaXxTj0A2vwTvYZo,22425
123
+ deepdoctection/train/hf_layoutlm_train.py,sha256=BNjPgPAvxm4beHULqzo58u-gW7GcTGiZAk2rF6TootM,22532
124
124
  deepdoctection/train/tp_frcnn_train.py,sha256=pEpXokSVGveqo82pRnhnAmHPmjQ_8wQWpqM4ZyNHJgs,13049
125
125
  deepdoctection/utils/__init__.py,sha256=brBceRWeov9WXMiJTjyJOF2rHMP8trGGRRjhMdZ61nI,2371
126
126
  deepdoctection/utils/concurrency.py,sha256=nIhpkSncmv0LBB8PtcOLY-BsRGlfcDpz7foVdgzZd20,4598
@@ -141,8 +141,8 @@ deepdoctection/utils/transform.py,sha256=3kCgsEeRkG1efCdkfvj7tUFMs-e2jbjbflq826F
141
141
  deepdoctection/utils/types.py,sha256=_3dmPdCIZNLbgU5QP5k_c5phDf18xLe1kYL6t2nM45s,2953
142
142
  deepdoctection/utils/utils.py,sha256=csVs_VvCq4QBETPoE2JdTTL4MFYnD4xh-Js5vRb612g,6492
143
143
  deepdoctection/utils/viz.py,sha256=Jf8ePNYWlpuyaS6SeTYQ4OyA3eNhtgjvAQZnGNdgHC0,27051
144
- deepdoctection-0.37.1.dist-info/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
145
- deepdoctection-0.37.1.dist-info/METADATA,sha256=M-HjpJpxuM4tHN0ld8DscsZPgKRUoNmsbx9slFkj6tg,19545
146
- deepdoctection-0.37.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
147
- deepdoctection-0.37.1.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
148
- deepdoctection-0.37.1.dist-info/RECORD,,
144
+ deepdoctection-0.37.2.dist-info/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
145
+ deepdoctection-0.37.2.dist-info/METADATA,sha256=XLOCkFBWynZhyZmKpDDRaomDIxPnVpy07WdUkodRF3Y,19545
146
+ deepdoctection-0.37.2.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
147
+ deepdoctection-0.37.2.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
148
+ deepdoctection-0.37.2.dist-info/RECORD,,