deepdoctection 0.37.1__py3-none-any.whl → 0.37.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +1 -1
- deepdoctection/datapoint/image.py +4 -2
- deepdoctection/extern/hflayoutlm.py +1 -1
- deepdoctection/pipe/doctectionpipe.py +6 -5
- deepdoctection/train/hf_layoutlm_train.py +4 -3
- {deepdoctection-0.37.1.dist-info → deepdoctection-0.37.2.dist-info}/METADATA +1 -1
- {deepdoctection-0.37.1.dist-info → deepdoctection-0.37.2.dist-info}/RECORD +10 -10
- {deepdoctection-0.37.1.dist-info → deepdoctection-0.37.2.dist-info}/LICENSE +0 -0
- {deepdoctection-0.37.1.dist-info → deepdoctection-0.37.2.dist-info}/WHEEL +0 -0
- {deepdoctection-0.37.1.dist-info → deepdoctection-0.37.2.dist-info}/top_level.txt +0 -0
deepdoctection/__init__.py
CHANGED
|
@@ -32,9 +32,9 @@ from numpy import uint8
|
|
|
32
32
|
|
|
33
33
|
from ..utils.error import AnnotationError, BoundingBoxError, ImageError, UUIDError
|
|
34
34
|
from ..utils.identifier import get_uuid, is_uuid_like
|
|
35
|
+
from ..utils.logger import LoggingRecord, logger
|
|
35
36
|
from ..utils.settings import ObjectTypes, SummaryType, get_type
|
|
36
37
|
from ..utils.types import ImageDict, PathLikeOrStr, PixelValues
|
|
37
|
-
from ..utils.logger import LoggingRecord, logger
|
|
38
38
|
from .annotation import Annotation, AnnotationMap, BoundingBox, CategoryAnnotation, ImageAnnotation
|
|
39
39
|
from .box import crop_box_from_image, global_to_local_coords, intersection_box
|
|
40
40
|
from .convert import as_dict, convert_b64_to_np_array, convert_np_array_to_b64, convert_pdf_bytes_to_np_array_v2
|
|
@@ -477,7 +477,9 @@ class Image:
|
|
|
477
477
|
if service_id not in service_id_to_annotation_id:
|
|
478
478
|
logger.info(
|
|
479
479
|
LoggingRecord(
|
|
480
|
-
f"Service_id {service_id} for image_id: {self.image_id} not found. Skipping removal."
|
|
480
|
+
f"Service_id {service_id} for image_id: {self.image_id} not found. Skipping removal."
|
|
481
|
+
)
|
|
482
|
+
)
|
|
481
483
|
|
|
482
484
|
annotation_ids = service_id_to_annotation_id.get(service_id, [])
|
|
483
485
|
|
|
@@ -48,7 +48,7 @@ with try_import() as pt_import_guard:
|
|
|
48
48
|
import torch.nn.functional as F
|
|
49
49
|
|
|
50
50
|
with try_import() as tr_import_guard:
|
|
51
|
-
from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
|
|
51
|
+
from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD # type:ignore
|
|
52
52
|
from transformers import (
|
|
53
53
|
LayoutLMForSequenceClassification,
|
|
54
54
|
LayoutLMForTokenClassification,
|
|
@@ -161,16 +161,17 @@ class DoctectionPipe(Pipeline):
|
|
|
161
161
|
|
|
162
162
|
super().__init__(pipeline_component_list)
|
|
163
163
|
|
|
164
|
-
def _entry(
|
|
165
|
-
|
|
164
|
+
def _entry(
|
|
165
|
+
self, **kwargs: Union[str, bytes, DataFlow, bool, int, PathLikeOrStr, Union[str, List[str]]]
|
|
166
|
+
) -> DataFlow:
|
|
166
167
|
path, file_type, shuffle, max_datapoints, doc_path, dataset_dataflow, b_bytes = _collect_from_kwargs(**kwargs)
|
|
167
168
|
|
|
168
169
|
df: DataFlow
|
|
169
170
|
|
|
170
171
|
if isinstance(b_bytes, bytes):
|
|
171
|
-
df = DoctectionPipe.bytes_to_dataflow(
|
|
172
|
-
|
|
173
|
-
|
|
172
|
+
df = DoctectionPipe.bytes_to_dataflow(
|
|
173
|
+
path=doc_path if path is None else path, b_bytes=b_bytes, file_type=file_type
|
|
174
|
+
)
|
|
174
175
|
|
|
175
176
|
elif isinstance(path, (str, Path)):
|
|
176
177
|
if not isinstance(file_type, (str, list)):
|
|
@@ -161,11 +161,12 @@ class LayoutLMTrainer(Trainer):
|
|
|
161
161
|
model: Union[PreTrainedModel, nn.Module],
|
|
162
162
|
args: TrainingArguments,
|
|
163
163
|
data_collator: LayoutLMDataCollator,
|
|
164
|
-
train_dataset:
|
|
164
|
+
train_dataset: DatasetAdapter,
|
|
165
|
+
eval_dataset: Optional[DatasetBase] = None,
|
|
165
166
|
):
|
|
166
167
|
self.evaluator: Optional[Evaluator] = None
|
|
167
168
|
self.build_eval_kwargs: Optional[dict[str, Any]] = None
|
|
168
|
-
super().__init__(model, args, data_collator, train_dataset)
|
|
169
|
+
super().__init__(model, args, data_collator, train_dataset, eval_dataset=eval_dataset)
|
|
169
170
|
|
|
170
171
|
def setup_evaluator(
|
|
171
172
|
self,
|
|
@@ -472,7 +473,7 @@ def train_hf_layoutlm(
|
|
|
472
473
|
max_batch_size=max_batch_size, # type: ignore
|
|
473
474
|
remove_bounding_box_features=remove_box_features,
|
|
474
475
|
)
|
|
475
|
-
trainer = LayoutLMTrainer(model, arguments, data_collator, dataset)
|
|
476
|
+
trainer = LayoutLMTrainer(model, arguments, data_collator, dataset, eval_dataset=dataset_val)
|
|
476
477
|
|
|
477
478
|
if arguments.evaluation_strategy in (IntervalStrategy.STEPS,):
|
|
478
479
|
assert metric is not None # silence mypy
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
deepdoctection/__init__.py,sha256=
|
|
1
|
+
deepdoctection/__init__.py,sha256=T2sHOc6ZPpx44hWbarp0i_QlAqm0dEmzs7HVg2mL_nM,12655
|
|
2
2
|
deepdoctection/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
deepdoctection/analyzer/__init__.py,sha256=icClxrd20XutD6LxLgEPIWceSs4j_QfI3szCE-9BL2w,729
|
|
4
4
|
deepdoctection/analyzer/_config.py,sha256=NZl_REM8Ge2xfxvHN-mZR5KURcHfZii3xfMlKQwckbA,4864
|
|
@@ -19,7 +19,7 @@ deepdoctection/datapoint/__init__.py,sha256=3K406GbOPhoEp8koVaSbMocmSsmWifnSZ1SP
|
|
|
19
19
|
deepdoctection/datapoint/annotation.py,sha256=FEgz4COxVDfjic0gG7kS6iHnWLBIgFnquQ63Cbj2a4Y,22531
|
|
20
20
|
deepdoctection/datapoint/box.py,sha256=tkFuVM6xfx2jL7W4UED4qHXV572LSRdIsVJbrEiyIxI,23524
|
|
21
21
|
deepdoctection/datapoint/convert.py,sha256=O7920pIomyEkzXwxpFsrzfhn7Pl6UzVGhNzv90VcuKU,7099
|
|
22
|
-
deepdoctection/datapoint/image.py,sha256=
|
|
22
|
+
deepdoctection/datapoint/image.py,sha256=UDBKXJJpuKAUx0J-DGjvLGqrMV4N3kLpksJYxoVkong,33279
|
|
23
23
|
deepdoctection/datapoint/view.py,sha256=1rVMuqucCrI5zlwyXMADJQBV38V_zSNFqFyBi3cMA1E,44914
|
|
24
24
|
deepdoctection/datasets/__init__.py,sha256=-A3aR90aDsHPmVM35JavfnQ2itYSCn3ujl4krRni1QU,1076
|
|
25
25
|
deepdoctection/datasets/adapter.py,sha256=Ly_vbOAgVI73V41FUccnSX1ECTOyesW_qsuvQuvOZbw,7796
|
|
@@ -56,7 +56,7 @@ deepdoctection/extern/deskew.py,sha256=sPoixu8S9he-0wbs-jgxtPE2V9BiP4-3uZlb6F5Y1
|
|
|
56
56
|
deepdoctection/extern/doctrocr.py,sha256=T3_tvlih22_dVCBZypS1Y8tjQQB1fkAxIbGdUGHIapQ,24473
|
|
57
57
|
deepdoctection/extern/fastlang.py,sha256=F4gK-SEwcCujjxH327ZDzMGWToJ49xS_dCKcePQ9IlY,4780
|
|
58
58
|
deepdoctection/extern/hfdetr.py,sha256=1NPW_u5eH2tP3ixZ91l4WR-O-wLVcrFsLWA7BqID0oM,12055
|
|
59
|
-
deepdoctection/extern/hflayoutlm.py,sha256=
|
|
59
|
+
deepdoctection/extern/hflayoutlm.py,sha256=T1IBm3C8CtG97-tauo03YqhUac6xdFc2y345BWVMajQ,56509
|
|
60
60
|
deepdoctection/extern/hflm.py,sha256=kwS6kcSlY_2m9u0RzBLTRq-UMM7c1PhyUaDTvSdejus,9217
|
|
61
61
|
deepdoctection/extern/model.py,sha256=ViHHKPvbGmLCPw7ZESv_rmjlkA90UiBU6oZiHOMqNSw,59869
|
|
62
62
|
deepdoctection/extern/pdftext.py,sha256=KS_t27SUiYn_IOS_J2lF9lSSo22vLagxmxvYCY3CqXA,7228
|
|
@@ -106,7 +106,7 @@ deepdoctection/pipe/anngen.py,sha256=3319l4aaXzcY4w6ItVBNPX8LGS5fHFDVtyVY9KMefac
|
|
|
106
106
|
deepdoctection/pipe/base.py,sha256=ynNg5SSRuUVxN69VWOO3Oi7WSeGrYwn3A56NQMBJDvw,14222
|
|
107
107
|
deepdoctection/pipe/common.py,sha256=haOb4v0jLX3r41BSC8cVseX2E320_HkSrGlZsQiKE2g,17728
|
|
108
108
|
deepdoctection/pipe/concurrency.py,sha256=AAKRsVgaBEYNluntbDa46SBF1JZ_XqnWLDSWrNvAzEo,9657
|
|
109
|
-
deepdoctection/pipe/doctectionpipe.py,sha256=
|
|
109
|
+
deepdoctection/pipe/doctectionpipe.py,sha256=wCg96P9Pb54i5AVgG02b4FljobM64_qEML_GxiULy-4,11765
|
|
110
110
|
deepdoctection/pipe/language.py,sha256=5zI0UQC6Fh12_r2pfVL42HoCGz2hpHrOhpXAn5m-rYw,5451
|
|
111
111
|
deepdoctection/pipe/layout.py,sha256=xIhnJpyUSbvLbhTXyAKXY1hmG9352jihGYFSclTH_1g,5567
|
|
112
112
|
deepdoctection/pipe/lm.py,sha256=Sp-b7smeslNDyioEfNjuNBUxAuFKn3-OKpCZkGXri_c,16643
|
|
@@ -120,7 +120,7 @@ deepdoctection/pipe/transform.py,sha256=9Om7X7hJeL4jgUwHM1CHa4sb5v7Qo1PtVG0ls_3n
|
|
|
120
120
|
deepdoctection/train/__init__.py,sha256=YFTRAZF1F7cEAKTdAIi1BLyYb6rSRcwq09Ui5Lu8d6E,1071
|
|
121
121
|
deepdoctection/train/d2_frcnn_train.py,sha256=sFc_G-mEpaM8d1CCE0_6Gl4nBh11X2RYRBA3p_ylFJQ,16000
|
|
122
122
|
deepdoctection/train/hf_detr_train.py,sha256=8ydysxzOPE_IPoNFGaHb7PbKr9Nbl41rcY4lbylQavU,10783
|
|
123
|
-
deepdoctection/train/hf_layoutlm_train.py,sha256=
|
|
123
|
+
deepdoctection/train/hf_layoutlm_train.py,sha256=BNjPgPAvxm4beHULqzo58u-gW7GcTGiZAk2rF6TootM,22532
|
|
124
124
|
deepdoctection/train/tp_frcnn_train.py,sha256=pEpXokSVGveqo82pRnhnAmHPmjQ_8wQWpqM4ZyNHJgs,13049
|
|
125
125
|
deepdoctection/utils/__init__.py,sha256=brBceRWeov9WXMiJTjyJOF2rHMP8trGGRRjhMdZ61nI,2371
|
|
126
126
|
deepdoctection/utils/concurrency.py,sha256=nIhpkSncmv0LBB8PtcOLY-BsRGlfcDpz7foVdgzZd20,4598
|
|
@@ -141,8 +141,8 @@ deepdoctection/utils/transform.py,sha256=3kCgsEeRkG1efCdkfvj7tUFMs-e2jbjbflq826F
|
|
|
141
141
|
deepdoctection/utils/types.py,sha256=_3dmPdCIZNLbgU5QP5k_c5phDf18xLe1kYL6t2nM45s,2953
|
|
142
142
|
deepdoctection/utils/utils.py,sha256=csVs_VvCq4QBETPoE2JdTTL4MFYnD4xh-Js5vRb612g,6492
|
|
143
143
|
deepdoctection/utils/viz.py,sha256=Jf8ePNYWlpuyaS6SeTYQ4OyA3eNhtgjvAQZnGNdgHC0,27051
|
|
144
|
-
deepdoctection-0.37.
|
|
145
|
-
deepdoctection-0.37.
|
|
146
|
-
deepdoctection-0.37.
|
|
147
|
-
deepdoctection-0.37.
|
|
148
|
-
deepdoctection-0.37.
|
|
144
|
+
deepdoctection-0.37.2.dist-info/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
|
|
145
|
+
deepdoctection-0.37.2.dist-info/METADATA,sha256=XLOCkFBWynZhyZmKpDDRaomDIxPnVpy07WdUkodRF3Y,19545
|
|
146
|
+
deepdoctection-0.37.2.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
147
|
+
deepdoctection-0.37.2.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
|
|
148
|
+
deepdoctection-0.37.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|