deepdoctection 0.31__py3-none-any.whl → 0.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +16 -29
- deepdoctection/analyzer/dd.py +70 -59
- deepdoctection/configs/conf_dd_one.yaml +34 -31
- deepdoctection/dataflow/common.py +9 -5
- deepdoctection/dataflow/custom.py +5 -5
- deepdoctection/dataflow/custom_serialize.py +75 -18
- deepdoctection/dataflow/parallel_map.py +3 -3
- deepdoctection/dataflow/serialize.py +4 -4
- deepdoctection/dataflow/stats.py +3 -3
- deepdoctection/datapoint/annotation.py +41 -56
- deepdoctection/datapoint/box.py +9 -8
- deepdoctection/datapoint/convert.py +6 -6
- deepdoctection/datapoint/image.py +56 -44
- deepdoctection/datapoint/view.py +245 -150
- deepdoctection/datasets/__init__.py +1 -4
- deepdoctection/datasets/adapter.py +35 -26
- deepdoctection/datasets/base.py +14 -12
- deepdoctection/datasets/dataflow_builder.py +3 -3
- deepdoctection/datasets/info.py +24 -26
- deepdoctection/datasets/instances/doclaynet.py +51 -51
- deepdoctection/datasets/instances/fintabnet.py +46 -46
- deepdoctection/datasets/instances/funsd.py +25 -24
- deepdoctection/datasets/instances/iiitar13k.py +13 -10
- deepdoctection/datasets/instances/layouttest.py +4 -3
- deepdoctection/datasets/instances/publaynet.py +5 -5
- deepdoctection/datasets/instances/pubtables1m.py +24 -21
- deepdoctection/datasets/instances/pubtabnet.py +32 -30
- deepdoctection/datasets/instances/rvlcdip.py +30 -30
- deepdoctection/datasets/instances/xfund.py +26 -26
- deepdoctection/datasets/save.py +6 -6
- deepdoctection/eval/__init__.py +1 -4
- deepdoctection/eval/accmetric.py +32 -33
- deepdoctection/eval/base.py +8 -9
- deepdoctection/eval/cocometric.py +15 -13
- deepdoctection/eval/eval.py +41 -37
- deepdoctection/eval/tedsmetric.py +30 -23
- deepdoctection/eval/tp_eval_callback.py +16 -19
- deepdoctection/extern/__init__.py +2 -7
- deepdoctection/extern/base.py +339 -134
- deepdoctection/extern/d2detect.py +85 -113
- deepdoctection/extern/deskew.py +14 -11
- deepdoctection/extern/doctrocr.py +141 -130
- deepdoctection/extern/fastlang.py +27 -18
- deepdoctection/extern/hfdetr.py +71 -62
- deepdoctection/extern/hflayoutlm.py +504 -211
- deepdoctection/extern/hflm.py +230 -0
- deepdoctection/extern/model.py +488 -302
- deepdoctection/extern/pdftext.py +23 -19
- deepdoctection/extern/pt/__init__.py +1 -3
- deepdoctection/extern/pt/nms.py +6 -2
- deepdoctection/extern/pt/ptutils.py +29 -19
- deepdoctection/extern/tessocr.py +39 -38
- deepdoctection/extern/texocr.py +18 -18
- deepdoctection/extern/tp/tfutils.py +57 -9
- deepdoctection/extern/tp/tpcompat.py +21 -14
- deepdoctection/extern/tp/tpfrcnn/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/common.py +7 -3
- deepdoctection/extern/tp/tpfrcnn/config/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/config/config.py +13 -10
- deepdoctection/extern/tp/tpfrcnn/modeling/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/modeling/backbone.py +18 -8
- deepdoctection/extern/tp/tpfrcnn/modeling/generalized_rcnn.py +12 -6
- deepdoctection/extern/tp/tpfrcnn/modeling/model_box.py +14 -9
- deepdoctection/extern/tp/tpfrcnn/modeling/model_cascade.py +8 -5
- deepdoctection/extern/tp/tpfrcnn/modeling/model_fpn.py +22 -17
- deepdoctection/extern/tp/tpfrcnn/modeling/model_frcnn.py +21 -14
- deepdoctection/extern/tp/tpfrcnn/modeling/model_mrcnn.py +19 -11
- deepdoctection/extern/tp/tpfrcnn/modeling/model_rpn.py +15 -10
- deepdoctection/extern/tp/tpfrcnn/predict.py +9 -4
- deepdoctection/extern/tp/tpfrcnn/preproc.py +12 -8
- deepdoctection/extern/tp/tpfrcnn/utils/__init__.py +20 -0
- deepdoctection/extern/tp/tpfrcnn/utils/box_ops.py +10 -2
- deepdoctection/extern/tpdetect.py +45 -53
- deepdoctection/mapper/__init__.py +3 -8
- deepdoctection/mapper/cats.py +27 -29
- deepdoctection/mapper/cocostruct.py +10 -10
- deepdoctection/mapper/d2struct.py +27 -26
- deepdoctection/mapper/hfstruct.py +13 -8
- deepdoctection/mapper/laylmstruct.py +178 -37
- deepdoctection/mapper/maputils.py +12 -11
- deepdoctection/mapper/match.py +2 -2
- deepdoctection/mapper/misc.py +11 -9
- deepdoctection/mapper/pascalstruct.py +4 -4
- deepdoctection/mapper/prodigystruct.py +5 -5
- deepdoctection/mapper/pubstruct.py +84 -92
- deepdoctection/mapper/tpstruct.py +5 -5
- deepdoctection/mapper/xfundstruct.py +33 -33
- deepdoctection/pipe/__init__.py +1 -1
- deepdoctection/pipe/anngen.py +12 -14
- deepdoctection/pipe/base.py +52 -106
- deepdoctection/pipe/common.py +72 -59
- deepdoctection/pipe/concurrency.py +16 -11
- deepdoctection/pipe/doctectionpipe.py +24 -21
- deepdoctection/pipe/language.py +20 -25
- deepdoctection/pipe/layout.py +20 -16
- deepdoctection/pipe/lm.py +75 -105
- deepdoctection/pipe/order.py +194 -89
- deepdoctection/pipe/refine.py +111 -124
- deepdoctection/pipe/segment.py +156 -161
- deepdoctection/pipe/{cell.py → sub_layout.py} +50 -40
- deepdoctection/pipe/text.py +37 -36
- deepdoctection/pipe/transform.py +19 -16
- deepdoctection/train/__init__.py +6 -12
- deepdoctection/train/d2_frcnn_train.py +48 -41
- deepdoctection/train/hf_detr_train.py +41 -30
- deepdoctection/train/hf_layoutlm_train.py +153 -135
- deepdoctection/train/tp_frcnn_train.py +32 -31
- deepdoctection/utils/concurrency.py +1 -1
- deepdoctection/utils/context.py +13 -6
- deepdoctection/utils/develop.py +4 -4
- deepdoctection/utils/env_info.py +87 -125
- deepdoctection/utils/file_utils.py +6 -11
- deepdoctection/utils/fs.py +22 -18
- deepdoctection/utils/identifier.py +2 -2
- deepdoctection/utils/logger.py +16 -15
- deepdoctection/utils/metacfg.py +7 -7
- deepdoctection/utils/mocks.py +93 -0
- deepdoctection/utils/pdf_utils.py +11 -11
- deepdoctection/utils/settings.py +185 -181
- deepdoctection/utils/tqdm.py +1 -1
- deepdoctection/utils/transform.py +14 -9
- deepdoctection/utils/types.py +104 -0
- deepdoctection/utils/utils.py +7 -7
- deepdoctection/utils/viz.py +74 -72
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/METADATA +30 -21
- deepdoctection-0.33.dist-info/RECORD +146 -0
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/WHEEL +1 -1
- deepdoctection/utils/detection_types.py +0 -68
- deepdoctection-0.31.dist-info/RECORD +0 -144
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/LICENSE +0 -0
- {deepdoctection-0.31.dist-info → deepdoctection-0.33.dist-info}/top_level.txt +0 -0
|
@@ -19,26 +19,28 @@
|
|
|
19
19
|
Module for mapping annotations into standard Detectron2 dataset dict. Also providing some tools for W&B mapping and
|
|
20
20
|
visualising
|
|
21
21
|
"""
|
|
22
|
-
|
|
22
|
+
from __future__ import annotations
|
|
23
23
|
|
|
24
24
|
import os.path
|
|
25
|
-
from typing import
|
|
25
|
+
from typing import Mapping, Optional, Sequence, Union
|
|
26
26
|
|
|
27
27
|
import numpy as np
|
|
28
|
-
import
|
|
28
|
+
from lazy_imports import try_import
|
|
29
29
|
|
|
30
|
-
from ..datapoint.annotation import ImageAnnotation
|
|
30
|
+
from ..datapoint.annotation import DEFAULT_CATEGORY_ID, ImageAnnotation
|
|
31
31
|
from ..datapoint.image import Image
|
|
32
32
|
from ..extern.pt.nms import batched_nms
|
|
33
33
|
from ..mapper.maputils import curry
|
|
34
|
-
from ..utils.
|
|
35
|
-
from ..utils.
|
|
36
|
-
|
|
34
|
+
from ..utils.settings import DefaultType, ObjectTypes, TypeOrStr, get_type
|
|
35
|
+
from ..utils.types import Detectron2Dict
|
|
36
|
+
|
|
37
|
+
with try_import() as pt_import_guard:
|
|
38
|
+
import torch
|
|
37
39
|
|
|
38
|
-
|
|
40
|
+
with try_import() as d2_import_guard:
|
|
39
41
|
from detectron2.structures import BoxMode
|
|
40
42
|
|
|
41
|
-
|
|
43
|
+
with try_import() as wb_import_guard:
|
|
42
44
|
from wandb import Classes
|
|
43
45
|
from wandb import Image as Wbimage
|
|
44
46
|
|
|
@@ -47,8 +49,8 @@ if wandb_available():
|
|
|
47
49
|
def image_to_d2_frcnn_training(
|
|
48
50
|
dp: Image,
|
|
49
51
|
add_mask: bool = False,
|
|
50
|
-
category_names: Optional[Union[
|
|
51
|
-
) -> Optional[
|
|
52
|
+
category_names: Optional[Union[TypeOrStr, Sequence[TypeOrStr]]] = None,
|
|
53
|
+
) -> Optional[Detectron2Dict]:
|
|
52
54
|
"""
|
|
53
55
|
Maps an image to a standard dataset dict as described in
|
|
54
56
|
<https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html>. It further checks if the image is physically
|
|
@@ -64,7 +66,7 @@ def image_to_d2_frcnn_training(
|
|
|
64
66
|
if not os.path.isfile(dp.location) and dp.image is None:
|
|
65
67
|
return None
|
|
66
68
|
|
|
67
|
-
output:
|
|
69
|
+
output: Detectron2Dict = {"file_name": str(dp.location)}
|
|
68
70
|
|
|
69
71
|
if dp.image is not None:
|
|
70
72
|
output["image"] = dp.image.astype("float32")
|
|
@@ -85,10 +87,10 @@ def image_to_d2_frcnn_training(
|
|
|
85
87
|
box = box.transform(dp.width, dp.height, absolute_coords=True)
|
|
86
88
|
|
|
87
89
|
# Detectron2 does not fully support BoxMode.XYXY_REL
|
|
88
|
-
mapped_ann:
|
|
90
|
+
mapped_ann: dict[str, Union[str, int, list[float]]] = {
|
|
89
91
|
"bbox_mode": BoxMode.XYXY_ABS,
|
|
90
92
|
"bbox": box.to_list(mode="xyxy"),
|
|
91
|
-
"category_id":
|
|
93
|
+
"category_id": ann.category_id - 1,
|
|
92
94
|
}
|
|
93
95
|
annotations.append(mapped_ann)
|
|
94
96
|
|
|
@@ -147,23 +149,23 @@ def pt_nms_image_annotations(
|
|
|
147
149
|
|
|
148
150
|
def _get_category_attributes(
|
|
149
151
|
ann: ImageAnnotation, cat_to_sub_cat: Optional[Mapping[ObjectTypes, ObjectTypes]] = None
|
|
150
|
-
) ->
|
|
152
|
+
) -> tuple[ObjectTypes, int, Optional[float]]:
|
|
151
153
|
if cat_to_sub_cat:
|
|
152
154
|
sub_cat_key = cat_to_sub_cat.get(get_type(ann.category_name))
|
|
153
155
|
if sub_cat_key in ann.sub_categories:
|
|
154
156
|
sub_cat = ann.get_sub_category(sub_cat_key)
|
|
155
|
-
return sub_cat.category_name, sub_cat.category_id, sub_cat.score
|
|
156
|
-
return
|
|
157
|
-
return ann.category_name, ann.category_id, ann.score
|
|
157
|
+
return get_type(sub_cat.category_name), sub_cat.category_id, sub_cat.score
|
|
158
|
+
return DefaultType.DEFAULT_TYPE, DEFAULT_CATEGORY_ID, 0.0
|
|
159
|
+
return get_type(ann.category_name), ann.category_id, ann.score
|
|
158
160
|
|
|
159
161
|
|
|
160
162
|
@curry
|
|
161
163
|
def to_wandb_image(
|
|
162
164
|
dp: Image,
|
|
163
|
-
categories: Mapping[
|
|
164
|
-
sub_categories: Optional[Mapping[
|
|
165
|
+
categories: Mapping[int, TypeOrStr],
|
|
166
|
+
sub_categories: Optional[Mapping[int, TypeOrStr]] = None,
|
|
165
167
|
cat_to_sub_cat: Optional[Mapping[ObjectTypes, ObjectTypes]] = None,
|
|
166
|
-
) ->
|
|
168
|
+
) -> tuple[str, Wbimage]:
|
|
167
169
|
"""
|
|
168
170
|
Converting a deepdoctection image into a wandb image
|
|
169
171
|
|
|
@@ -183,11 +185,10 @@ def to_wandb_image(
|
|
|
183
185
|
anns = dp.get_annotation(category_names=list(categories.values()))
|
|
184
186
|
|
|
185
187
|
if sub_categories:
|
|
186
|
-
class_labels =
|
|
187
|
-
class_set = Classes([{"name": val, "id":
|
|
188
|
+
class_labels = dict(sub_categories.items())
|
|
189
|
+
class_set = Classes([{"name": val, "id": key} for key, val in sub_categories.items()])
|
|
188
190
|
else:
|
|
189
|
-
|
|
190
|
-
class_set = Classes([{"name": val, "id": int(key)} for key, val in categories.items()])
|
|
191
|
+
class_set = Classes([{"name": val, "id": key} for key, val in categories.items()])
|
|
191
192
|
|
|
192
193
|
for ann in anns:
|
|
193
194
|
bounding_box = ann.get_bounding_box(dp.image_id)
|
|
@@ -198,7 +199,7 @@ def to_wandb_image(
|
|
|
198
199
|
box = {
|
|
199
200
|
"position": {"middle": bounding_box.center, "width": bounding_box.width, "height": bounding_box.height},
|
|
200
201
|
"domain": "pixel",
|
|
201
|
-
"class_id":
|
|
202
|
+
"class_id": category_id,
|
|
202
203
|
"box_caption": category_name,
|
|
203
204
|
}
|
|
204
205
|
if score:
|
|
@@ -19,26 +19,31 @@
|
|
|
19
19
|
Module for mapping annotations into standard Huggingface Detr input structure for training
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
22
24
|
import os
|
|
23
25
|
from dataclasses import dataclass, field
|
|
24
|
-
from typing import
|
|
26
|
+
from typing import Literal, Optional, Sequence, Union
|
|
25
27
|
|
|
26
28
|
import numpy as np
|
|
27
|
-
from
|
|
29
|
+
from lazy_imports import try_import
|
|
28
30
|
|
|
29
31
|
from ..datapoint.image import Image
|
|
30
32
|
from ..mapper.maputils import curry
|
|
31
33
|
from ..mapper.misc import get_load_image_func
|
|
32
|
-
from ..utils.
|
|
33
|
-
from ..utils.settings import ObjectTypes
|
|
34
|
+
from ..utils.settings import TypeOrStr
|
|
34
35
|
from ..utils.transform import PadTransform
|
|
36
|
+
from ..utils.types import JsonDict
|
|
37
|
+
|
|
38
|
+
with try_import() as tr_import_guard:
|
|
39
|
+
from transformers import BatchFeature, DetrFeatureExtractor
|
|
35
40
|
|
|
36
41
|
|
|
37
42
|
@curry
|
|
38
43
|
def image_to_hf_detr_training(
|
|
39
44
|
dp: Image,
|
|
40
45
|
add_mask: bool = False,
|
|
41
|
-
category_names: Optional[Union[
|
|
46
|
+
category_names: Optional[Union[TypeOrStr, Sequence[Union[TypeOrStr]]]] = None,
|
|
42
47
|
) -> Optional[JsonDict]:
|
|
43
48
|
"""
|
|
44
49
|
Maps an image to a detr input datapoint dict, that, after collating can be used for training.
|
|
@@ -71,11 +76,11 @@ def image_to_hf_detr_training(
|
|
|
71
76
|
for ann in anns:
|
|
72
77
|
box = ann.get_bounding_box(dp.image_id)
|
|
73
78
|
|
|
74
|
-
mapped_ann:
|
|
79
|
+
mapped_ann: dict[str, Union[str, int, float, list[float]]] = {
|
|
75
80
|
"id": "".join([c for c in ann.annotation_id if c.isdigit()])[:8],
|
|
76
81
|
"image_id": "".join([c for c in dp.image_id if c.isdigit()])[:8],
|
|
77
82
|
"bbox": box.to_list(mode="xywh"),
|
|
78
|
-
"category_id":
|
|
83
|
+
"category_id": ann.category_id - 1,
|
|
79
84
|
"area": box.area,
|
|
80
85
|
}
|
|
81
86
|
annotations.append(mapped_ann)
|
|
@@ -103,7 +108,7 @@ class DetrDataCollator:
|
|
|
103
108
|
padder: Optional[PadTransform] = None
|
|
104
109
|
return_tensors: Optional[Literal["pt"]] = field(default="pt")
|
|
105
110
|
|
|
106
|
-
def __call__(self, raw_features:
|
|
111
|
+
def __call__(self, raw_features: list[JsonDict]) -> BatchFeature:
|
|
107
112
|
"""
|
|
108
113
|
Creating BatchFeature from a list of dict of raw features.
|
|
109
114
|
|
|
@@ -20,32 +20,30 @@ Module for mapping annotations from image to layout lm input structure. Heavily
|
|
|
20
20
|
<https://github.com/NielsRogge/Transformers-Tutorials>
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
23
25
|
import random
|
|
24
26
|
from dataclasses import dataclass, field
|
|
25
|
-
from typing import Any, Callable,
|
|
27
|
+
from typing import Any, Callable, Literal, NewType, Optional, Sequence, Union
|
|
26
28
|
|
|
27
29
|
import numpy as np
|
|
28
30
|
import numpy.typing as npt
|
|
31
|
+
from lazy_imports import try_import
|
|
29
32
|
|
|
30
33
|
from ..datapoint.annotation import ContainerAnnotation
|
|
31
34
|
from ..datapoint.convert import box_to_point4, point4_to_box
|
|
32
35
|
from ..datapoint.image import Image
|
|
33
|
-
from ..
|
|
34
|
-
from ..utils.file_utils import pytorch_available, transformers_available
|
|
36
|
+
from ..datapoint.view import Page
|
|
35
37
|
from ..utils.settings import DatasetType, LayoutType, PageType, Relationships, WordType
|
|
36
38
|
from ..utils.transform import ResizeTransform, normalize_image
|
|
39
|
+
from ..utils.types import JsonDict
|
|
37
40
|
from .maputils import curry
|
|
38
41
|
|
|
39
|
-
|
|
42
|
+
with try_import() as import_guard:
|
|
40
43
|
import torch
|
|
41
44
|
|
|
42
|
-
|
|
43
|
-
from transformers import
|
|
44
|
-
BatchEncoding,
|
|
45
|
-
PreTrainedTokenizerFast,
|
|
46
|
-
RobertaTokenizerFast,
|
|
47
|
-
XLMRobertaTokenizerFast,
|
|
48
|
-
)
|
|
45
|
+
with try_import() as tr_import_guard:
|
|
46
|
+
from transformers import BatchEncoding, PreTrainedTokenizerFast # pylint: disable=W0611
|
|
49
47
|
|
|
50
48
|
__all__ = [
|
|
51
49
|
"image_to_raw_layoutlm_features",
|
|
@@ -54,19 +52,24 @@ __all__ = [
|
|
|
54
52
|
"image_to_layoutlm_features",
|
|
55
53
|
"DataCollator",
|
|
56
54
|
"LayoutLMFeatures",
|
|
55
|
+
"image_to_raw_lm_features",
|
|
56
|
+
"image_to_lm_features",
|
|
57
57
|
]
|
|
58
58
|
|
|
59
59
|
RawLayoutLMFeatures = NewType("RawLayoutLMFeatures", JsonDict)
|
|
60
|
+
RawLMFeatures = NewType("RawLMFeatures", JsonDict)
|
|
60
61
|
LayoutLMFeatures = NewType("LayoutLMFeatures", JsonDict)
|
|
62
|
+
LMFeatures = NewType("LMFeatures", JsonDict)
|
|
61
63
|
InputDataClass = NewType("InputDataClass", JsonDict)
|
|
62
64
|
|
|
65
|
+
|
|
63
66
|
"""
|
|
64
67
|
<https://github.com/huggingface/transformers/src/transformers/data/data_collator.py>
|
|
65
68
|
A DataCollator is a function that takes a list of samples from a Dataset and collate them into a batch, as a dictionary
|
|
66
69
|
of PyTorch/TensorFlow tensors or NumPy arrays.
|
|
67
70
|
"""
|
|
68
71
|
|
|
69
|
-
DataCollator = NewType("DataCollator", Callable[[
|
|
72
|
+
DataCollator = NewType("DataCollator", Callable[[list[InputDataClass]], dict[str, Any]]) # type: ignore
|
|
70
73
|
|
|
71
74
|
_CLS_BOX = [0.0, 0.0, 1000.0, 1000.0]
|
|
72
75
|
_SEP_BOX = [1000.0, 1000.0, 1000.0, 1000.0]
|
|
@@ -122,9 +125,9 @@ def image_to_raw_layoutlm_features(
|
|
|
122
125
|
all_ann_ids = []
|
|
123
126
|
all_words = []
|
|
124
127
|
all_boxes = []
|
|
125
|
-
all_labels:
|
|
128
|
+
all_labels: list[int] = []
|
|
126
129
|
|
|
127
|
-
anns = dp.get_annotation_iter(category_names=LayoutType.
|
|
130
|
+
anns = dp.get_annotation_iter(category_names=LayoutType.WORD)
|
|
128
131
|
|
|
129
132
|
word_id_to_segment_box = {}
|
|
130
133
|
if segment_positions:
|
|
@@ -136,12 +139,12 @@ def image_to_raw_layoutlm_features(
|
|
|
136
139
|
if not bounding_box.absolute_coords:
|
|
137
140
|
bounding_box = bounding_box.transform(dp.width, dp.height, absolute_coords=True)
|
|
138
141
|
word_id_to_segment_box.update(
|
|
139
|
-
{word_ann: bounding_box for word_ann in segm_ann.get_relationship(Relationships.
|
|
142
|
+
{word_ann: bounding_box for word_ann in segm_ann.get_relationship(Relationships.CHILD)}
|
|
140
143
|
)
|
|
141
144
|
|
|
142
145
|
for ann in anns:
|
|
143
146
|
all_ann_ids.append(ann.annotation_id)
|
|
144
|
-
char_cat = ann.get_sub_category(WordType.
|
|
147
|
+
char_cat = ann.get_sub_category(WordType.CHARACTERS)
|
|
145
148
|
if not isinstance(char_cat, ContainerAnnotation):
|
|
146
149
|
raise TypeError(f"char_cat must be of type ContainerAnnotation but is of type {type(char_cat)}")
|
|
147
150
|
word = char_cat.value
|
|
@@ -155,15 +158,15 @@ def image_to_raw_layoutlm_features(
|
|
|
155
158
|
all_boxes.append(word_id_to_segment_box.get(ann.annotation_id, box).to_list(mode="xyxy"))
|
|
156
159
|
|
|
157
160
|
if (
|
|
158
|
-
WordType.
|
|
159
|
-
) and dataset_type == DatasetType.
|
|
161
|
+
WordType.TOKEN_TAG in ann.sub_categories or WordType.TOKEN_CLASS in ann.sub_categories
|
|
162
|
+
) and dataset_type == DatasetType.TOKEN_CLASSIFICATION:
|
|
160
163
|
if use_token_tag:
|
|
161
|
-
all_labels.append(
|
|
164
|
+
all_labels.append(ann.get_sub_category(WordType.TOKEN_TAG).category_id - 1)
|
|
162
165
|
else:
|
|
163
|
-
all_labels.append(
|
|
166
|
+
all_labels.append(ann.get_sub_category(WordType.TOKEN_CLASS).category_id - 1)
|
|
164
167
|
|
|
165
|
-
if
|
|
166
|
-
all_labels.append(
|
|
168
|
+
if dataset_type == DatasetType.SEQUENCE_CLASSIFICATION:
|
|
169
|
+
all_labels.append(dp.summary.get_sub_category(PageType.DOCUMENT_TYPE).category_id - 1)
|
|
167
170
|
|
|
168
171
|
boxes = np.asarray(all_boxes, dtype="float32")
|
|
169
172
|
if boxes.ndim == 1:
|
|
@@ -208,7 +211,7 @@ def image_to_raw_layoutlm_features(
|
|
|
208
211
|
return raw_features
|
|
209
212
|
|
|
210
213
|
|
|
211
|
-
def
|
|
214
|
+
def layoutlm_features_to_pt_tensors(features: LayoutLMFeatures) -> LayoutLMFeatures:
|
|
212
215
|
"""
|
|
213
216
|
Converting list of floats to pytorch tensors
|
|
214
217
|
:param features: LayoutLMFeatures
|
|
@@ -216,7 +219,8 @@ def features_to_pt_tensors(features: LayoutLMFeatures) -> LayoutLMFeatures:
|
|
|
216
219
|
"""
|
|
217
220
|
|
|
218
221
|
_image_key = "pixel_values" if "pixel_values" in features else "image"
|
|
219
|
-
|
|
222
|
+
if "bbox" in features:
|
|
223
|
+
features["bbox"] = torch.tensor(features["bbox"], dtype=torch.long)
|
|
220
224
|
if "labels" in features:
|
|
221
225
|
features["labels"] = torch.tensor(features["labels"], dtype=torch.long)
|
|
222
226
|
if _image_key in features:
|
|
@@ -230,12 +234,12 @@ def features_to_pt_tensors(features: LayoutLMFeatures) -> LayoutLMFeatures:
|
|
|
230
234
|
|
|
231
235
|
|
|
232
236
|
def _tokenize_with_sliding_window(
|
|
233
|
-
raw_features:
|
|
234
|
-
tokenizer:
|
|
237
|
+
raw_features: list[Union[RawLayoutLMFeatures, RawLMFeatures]],
|
|
238
|
+
tokenizer: PreTrainedTokenizerFast,
|
|
235
239
|
sliding_window_stride: int,
|
|
236
240
|
max_batch_size: int,
|
|
237
241
|
return_tensors: Optional[Literal["pt"]] = None,
|
|
238
|
-
) -> Union[JsonDict,
|
|
242
|
+
) -> Union[JsonDict, BatchEncoding]:
|
|
239
243
|
"""
|
|
240
244
|
Runs a tokenizer: If there are no overflowing tokens, the tokenizer output will be returned as it is.
|
|
241
245
|
If there are overflowing tokens, sliding windows have to be built. As it is easier to prepare the sliding windows
|
|
@@ -381,7 +385,7 @@ def _tokenize_with_sliding_window(
|
|
|
381
385
|
)
|
|
382
386
|
)
|
|
383
387
|
|
|
384
|
-
slided_tokenized_inputs:
|
|
388
|
+
slided_tokenized_inputs: dict[str, Union[list[Union[str, int]], torch.Tensor]] = {}
|
|
385
389
|
if return_tensors == "pt":
|
|
386
390
|
slided_tokenized_inputs["overflow_to_sample_mapping"] = torch.tensor(overflow_to_sample_mapping)
|
|
387
391
|
slided_tokenized_inputs["input_ids"] = torch.tensor(all_input_ids)
|
|
@@ -398,8 +402,8 @@ def _tokenize_with_sliding_window(
|
|
|
398
402
|
|
|
399
403
|
|
|
400
404
|
def raw_features_to_layoutlm_features(
|
|
401
|
-
raw_features: Union[RawLayoutLMFeatures,
|
|
402
|
-
tokenizer:
|
|
405
|
+
raw_features: Union[RawLayoutLMFeatures, RawLMFeatures, list[Union[RawLayoutLMFeatures, RawLMFeatures]]],
|
|
406
|
+
tokenizer: PreTrainedTokenizerFast,
|
|
403
407
|
padding: Literal["max_length", "do_not_pad", "longest"] = "max_length",
|
|
404
408
|
truncation: bool = True,
|
|
405
409
|
return_overflowing_tokens: bool = False,
|
|
@@ -407,6 +411,7 @@ def raw_features_to_layoutlm_features(
|
|
|
407
411
|
remove_columns_for_training: bool = False,
|
|
408
412
|
sliding_window_stride: int = 0,
|
|
409
413
|
max_batch_size: int = 0,
|
|
414
|
+
remove_bounding_boxes: bool = False,
|
|
410
415
|
) -> LayoutLMFeatures:
|
|
411
416
|
"""
|
|
412
417
|
Mapping raw features to tokenized input sequences for LayoutLM models.
|
|
@@ -442,11 +447,11 @@ def raw_features_to_layoutlm_features(
|
|
|
442
447
|
raw_features = [raw_features]
|
|
443
448
|
|
|
444
449
|
_has_token_labels = (
|
|
445
|
-
raw_features[0]["dataset_type"] == DatasetType.
|
|
450
|
+
raw_features[0]["dataset_type"] == DatasetType.TOKEN_CLASSIFICATION
|
|
446
451
|
and raw_features[0].get("labels") is not None
|
|
447
452
|
)
|
|
448
453
|
_has_sequence_labels = (
|
|
449
|
-
raw_features[0]["dataset_type"] == DatasetType.
|
|
454
|
+
raw_features[0]["dataset_type"] == DatasetType.SEQUENCE_CLASSIFICATION
|
|
450
455
|
and raw_features[0].get("labels") is not None
|
|
451
456
|
)
|
|
452
457
|
_has_labels = bool(_has_token_labels or _has_sequence_labels)
|
|
@@ -563,8 +568,11 @@ def raw_features_to_layoutlm_features(
|
|
|
563
568
|
input_dict.pop("ann_ids")
|
|
564
569
|
input_dict.pop("tokens")
|
|
565
570
|
|
|
571
|
+
if remove_bounding_boxes:
|
|
572
|
+
input_dict.pop("bbox")
|
|
573
|
+
|
|
566
574
|
if return_tensors == "pt":
|
|
567
|
-
return
|
|
575
|
+
return layoutlm_features_to_pt_tensors(LayoutLMFeatures(input_dict))
|
|
568
576
|
return LayoutLMFeatures(input_dict)
|
|
569
577
|
|
|
570
578
|
|
|
@@ -595,13 +603,14 @@ class LayoutLMDataCollator:
|
|
|
595
603
|
with windows shifted `sliding_window_stride` to the right.
|
|
596
604
|
"""
|
|
597
605
|
|
|
598
|
-
tokenizer:
|
|
606
|
+
tokenizer: PreTrainedTokenizerFast
|
|
599
607
|
padding: Literal["max_length", "do_not_pad", "longest"] = field(default="max_length")
|
|
600
608
|
truncation: bool = field(default=True)
|
|
601
609
|
return_overflowing_tokens: bool = field(default=False)
|
|
602
610
|
return_tensors: Optional[Literal["pt"]] = field(default=None)
|
|
603
611
|
sliding_window_stride: int = field(default=0)
|
|
604
612
|
max_batch_size: int = field(default=0)
|
|
613
|
+
remove_bounding_box_features: bool = field(default=False)
|
|
605
614
|
|
|
606
615
|
def __post_init__(self) -> None:
|
|
607
616
|
assert isinstance(self.tokenizer, PreTrainedTokenizerFast), "Tokenizer must be a fast tokenizer"
|
|
@@ -611,7 +620,7 @@ class LayoutLMDataCollator:
|
|
|
611
620
|
if self.return_overflowing_tokens:
|
|
612
621
|
assert self.truncation, self.truncation
|
|
613
622
|
|
|
614
|
-
def __call__(self, raw_features: Union[RawLayoutLMFeatures,
|
|
623
|
+
def __call__(self, raw_features: Union[RawLayoutLMFeatures, list[RawLayoutLMFeatures]]) -> LayoutLMFeatures:
|
|
615
624
|
"""
|
|
616
625
|
Calling the DataCollator to form model inputs for training and inference. Takes a single raw
|
|
617
626
|
:param raw_features: A dictionary with the following arguments: `image_id, width, height, ann_ids, words,
|
|
@@ -620,7 +629,7 @@ class LayoutLMDataCollator:
|
|
|
620
629
|
token_type_ids, attention_masks, boxes, labels`.
|
|
621
630
|
"""
|
|
622
631
|
return raw_features_to_layoutlm_features(
|
|
623
|
-
raw_features,
|
|
632
|
+
raw_features, # type: ignore
|
|
624
633
|
self.tokenizer,
|
|
625
634
|
self.padding,
|
|
626
635
|
self.truncation,
|
|
@@ -629,13 +638,14 @@ class LayoutLMDataCollator:
|
|
|
629
638
|
True,
|
|
630
639
|
self.sliding_window_stride,
|
|
631
640
|
self.max_batch_size,
|
|
641
|
+
self.remove_bounding_box_features,
|
|
632
642
|
)
|
|
633
643
|
|
|
634
644
|
|
|
635
645
|
@curry
|
|
636
646
|
def image_to_layoutlm_features(
|
|
637
647
|
dp: Image,
|
|
638
|
-
tokenizer:
|
|
648
|
+
tokenizer: PreTrainedTokenizerFast,
|
|
639
649
|
padding: Literal["max_length", "do_not_pad", "longest"] = "max_length",
|
|
640
650
|
truncation: bool = True,
|
|
641
651
|
return_overflowing_tokens: bool = False,
|
|
@@ -724,3 +734,134 @@ def image_to_layoutlm_features(
|
|
|
724
734
|
sliding_window_stride=sliding_window_stride,
|
|
725
735
|
)
|
|
726
736
|
return features
|
|
737
|
+
|
|
738
|
+
|
|
739
|
+
@curry
|
|
740
|
+
def image_to_raw_lm_features(
|
|
741
|
+
dp: Image,
|
|
742
|
+
dataset_type: Optional[Literal["sequence_classification", "token_classification"]] = None,
|
|
743
|
+
use_token_tag: bool = True,
|
|
744
|
+
text_container: Optional[LayoutType] = LayoutType.WORD,
|
|
745
|
+
floating_text_block_categories: Optional[Sequence[LayoutType]] = None,
|
|
746
|
+
include_residual_text_container: bool = False,
|
|
747
|
+
) -> Optional[RawLMFeatures]:
|
|
748
|
+
"""
|
|
749
|
+
Mapping a datapoint into an intermediate format for bert-like models. Features will be provided into a dict and
|
|
750
|
+
this mapping can be used for sequence or token classification as well as for inference. To generate input features
|
|
751
|
+
for the model please `use raw_features_to_layoutlm_features`.
|
|
752
|
+
|
|
753
|
+
|
|
754
|
+
:param dp: Image
|
|
755
|
+
:param dataset_type: Either SEQUENCE_CLASSIFICATION or TOKEN_CLASSIFICATION. When using a built-in dataset use
|
|
756
|
+
:param use_token_tag: Will only be used for dataset_type="token_classification". If use_token_tag=True, will use
|
|
757
|
+
labels from sub category `WordType.token_tag` (with `B,I,O` suffix), otherwise
|
|
758
|
+
`WordType.token_class`.
|
|
759
|
+
:param text_container: A LayoutType to get the text from. It will steer the output of `Layout.words`.
|
|
760
|
+
:param floating_text_block_categories: A list of top level layout objects
|
|
761
|
+
:param include_residual_text_container: This will regard synthetic text line annotations as floating text
|
|
762
|
+
blocks and therefore incorporate all image annotations of category
|
|
763
|
+
`word` when building text strings.
|
|
764
|
+
:return: dictionary with the following arguments:
|
|
765
|
+
'image_id', 'width', 'height', 'ann_ids', 'words', 'bbox' and 'dataset_type'.
|
|
766
|
+
"""
|
|
767
|
+
|
|
768
|
+
raw_features: RawLMFeatures = RawLMFeatures({})
|
|
769
|
+
|
|
770
|
+
page = Page.from_image(dp, text_container, floating_text_block_categories, include_residual_text_container)
|
|
771
|
+
|
|
772
|
+
text_ = page.text_
|
|
773
|
+
|
|
774
|
+
# pylint: disable=E1137 #3162
|
|
775
|
+
raw_features["image_id"] = page.image_id
|
|
776
|
+
raw_features["width"] = page.width
|
|
777
|
+
raw_features["height"] = page.height
|
|
778
|
+
raw_features["ann_ids"] = text_["ann_ids"]
|
|
779
|
+
raw_features["words"] = text_["words"]
|
|
780
|
+
# We use a dummy bounding box for all bounding boxes so that we can pass the raw features to
|
|
781
|
+
# raw_features_to_layoutlm_features
|
|
782
|
+
raw_features["bbox"] = [_CLS_BOX] * len(text_["words"])
|
|
783
|
+
raw_features["dataset_type"] = dataset_type
|
|
784
|
+
|
|
785
|
+
if use_token_tag and text_["token_tags"]:
|
|
786
|
+
raw_features["labels"] = text_["token_tags"]
|
|
787
|
+
elif text_["token_classes"]:
|
|
788
|
+
raw_features["labels"] = text_["token_classes"]
|
|
789
|
+
elif page.document_type is not None:
|
|
790
|
+
document_type_id = page.image_orig.summary.get_sub_category(PageType.DOCUMENT_TYPE).category_id - 1
|
|
791
|
+
raw_features["labels"] = [document_type_id]
|
|
792
|
+
|
|
793
|
+
raw_features["dataset_type"] = dataset_type
|
|
794
|
+
# pylint: enable=E1137
|
|
795
|
+
return raw_features
|
|
796
|
+
|
|
797
|
+
|
|
798
|
+
@curry
|
|
799
|
+
def image_to_lm_features(
|
|
800
|
+
dp: Image,
|
|
801
|
+
tokenizer: PreTrainedTokenizerFast,
|
|
802
|
+
padding: Literal["max_length", "do_not_pad", "longest"] = "max_length",
|
|
803
|
+
truncation: bool = True,
|
|
804
|
+
return_overflowing_tokens: bool = False,
|
|
805
|
+
return_tensors: Optional[Literal["pt"]] = "pt",
|
|
806
|
+
sliding_window_stride: int = 0,
|
|
807
|
+
text_container: Optional[LayoutType] = LayoutType.WORD,
|
|
808
|
+
floating_text_block_categories: Optional[Sequence[LayoutType]] = None,
|
|
809
|
+
include_residual_text_container: bool = False,
|
|
810
|
+
) -> Optional[LayoutLMFeatures]:
|
|
811
|
+
"""
|
|
812
|
+
Mapping function to generate layoutlm features from `Image` to be used for inference in a pipeline component.
|
|
813
|
+
`LanguageModelPipelineComponent` has a positional argument `mapping_to_lm_input_func` that must be chosen
|
|
814
|
+
with respect to the language model chosen. This mapper is devoted to generating features for LayoutLM. It will be
|
|
815
|
+
used internally in `LMTokenClassifierService`.
|
|
816
|
+
|
|
817
|
+
tokenizer = LayoutLMTokenizer.from_pretrained("mrm8488/layoutlm-finetuned-funsd")
|
|
818
|
+
layoutlm = HFLayoutLmTokenClassifier("path/to/config.json","path/to/model.bin",
|
|
819
|
+
categories_explicit=['B-ANSWER', 'B-QUESTION', 'O'])
|
|
820
|
+
|
|
821
|
+
layoutlm_service = LMTokenClassifierService(tokenizer,layoutlm)
|
|
822
|
+
|
|
823
|
+
:param dp: Image datapoint
|
|
824
|
+
:param tokenizer: Tokenizer compatible with the language model
|
|
825
|
+
:param padding: A padding strategy to be passed to the tokenizer. Must bei either `max_length, longest` or
|
|
826
|
+
`do_not_pad`.
|
|
827
|
+
:param truncation: If "True" will truncate to a maximum length specified with the argument max_length or to the
|
|
828
|
+
maximum acceptable input length for the model if that argument is not provided. This will
|
|
829
|
+
truncate token by token, removing a token from the longest sequence in the pair if a pair of
|
|
830
|
+
sequences (or a batch of pairs) is provided.
|
|
831
|
+
If `False` then no truncation (i.e., can output batch with sequence lengths greater than the
|
|
832
|
+
model maximum admissible input size).
|
|
833
|
+
:param return_overflowing_tokens: If a sequence (due to a truncation strategy) overflows the overflowing tokens
|
|
834
|
+
can be returned as an additional batch element. Not that in this case, the number
|
|
835
|
+
of input batch samples will be smaller than the output batch samples.
|
|
836
|
+
:param return_tensors: Output tensor features. Either 'pt' for PyTorch models or None, if features should be
|
|
837
|
+
returned in list objects.
|
|
838
|
+
:param sliding_window_stride: If the output of the tokenizer exceeds the max_length sequence length a sliding
|
|
839
|
+
windows will be created with each window having max_length sequence input. When using
|
|
840
|
+
`sliding_window_stride=0` no strides will be created, otherwise it will create slides
|
|
841
|
+
with windows shifted `sliding_window_stride` to the right.
|
|
842
|
+
:param text_container: A LayoutType to get the text from. It will steer the output of `Layout.words`.
|
|
843
|
+
:param floating_text_block_categories: A list of top level layout objects
|
|
844
|
+
:param include_residual_text_container: This will regard synthetic text line annotations as floating text
|
|
845
|
+
blocks and therefore incorporate all image annotations of category
|
|
846
|
+
`word` when building text strings.
|
|
847
|
+
:return: A dict of lm features
|
|
848
|
+
"""
|
|
849
|
+
raw_features = image_to_raw_lm_features( # pylint: disable=E1102
|
|
850
|
+
dataset_type=None,
|
|
851
|
+
use_token_tag=True,
|
|
852
|
+
text_container=text_container,
|
|
853
|
+
floating_text_block_categories=floating_text_block_categories,
|
|
854
|
+
include_residual_text_container=include_residual_text_container,
|
|
855
|
+
)(dp)
|
|
856
|
+
if raw_features is None:
|
|
857
|
+
return None
|
|
858
|
+
features = raw_features_to_layoutlm_features(
|
|
859
|
+
raw_features,
|
|
860
|
+
tokenizer,
|
|
861
|
+
padding,
|
|
862
|
+
truncation,
|
|
863
|
+
return_overflowing_tokens,
|
|
864
|
+
return_tensors=return_tensors,
|
|
865
|
+
sliding_window_stride=sliding_window_stride,
|
|
866
|
+
)
|
|
867
|
+
return features
|
|
@@ -18,20 +18,22 @@
|
|
|
18
18
|
"""
|
|
19
19
|
Utility functions related to mapping tasks
|
|
20
20
|
"""
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
21
23
|
import functools
|
|
22
24
|
import itertools
|
|
23
25
|
import traceback
|
|
24
26
|
from types import TracebackType
|
|
25
|
-
from typing import Any, Callable,
|
|
27
|
+
from typing import Any, Callable, Mapping, Optional, Sequence, Union
|
|
26
28
|
|
|
27
29
|
import numpy as np
|
|
28
30
|
from tabulate import tabulate
|
|
29
31
|
from termcolor import colored
|
|
30
32
|
|
|
31
|
-
from ..utils.detection_types import DP, BaseExceptionType, S, T
|
|
32
33
|
from ..utils.error import AnnotationError, BoundingBoxError, ImageError, UUIDError
|
|
33
34
|
from ..utils.logger import LoggingRecord, logger
|
|
34
35
|
from ..utils.settings import ObjectTypes
|
|
36
|
+
from ..utils.types import DP, BaseExceptionType, S, T
|
|
35
37
|
|
|
36
38
|
__all__ = ["MappingContextManager", "DefaultMapper", "maybe_get_fake_score", "LabelSummarizer", "curry"]
|
|
37
39
|
|
|
@@ -43,7 +45,7 @@ class MappingContextManager:
|
|
|
43
45
|
"""
|
|
44
46
|
|
|
45
47
|
def __init__(
|
|
46
|
-
self, dp_name: Optional[str] = None, filter_level: str = "image", **kwargs:
|
|
48
|
+
self, dp_name: Optional[str] = None, filter_level: str = "image", **kwargs: dict[str, Optional[str]]
|
|
47
49
|
) -> None:
|
|
48
50
|
"""
|
|
49
51
|
:param dp_name: A name for the datapoint to be mapped
|
|
@@ -55,7 +57,7 @@ class MappingContextManager:
|
|
|
55
57
|
self.context_error = True
|
|
56
58
|
self.kwargs = kwargs
|
|
57
59
|
|
|
58
|
-
def __enter__(self) ->
|
|
60
|
+
def __enter__(self) -> MappingContextManager:
|
|
59
61
|
"""
|
|
60
62
|
context enter
|
|
61
63
|
"""
|
|
@@ -79,6 +81,7 @@ class MappingContextManager:
|
|
|
79
81
|
AssertionError,
|
|
80
82
|
TypeError,
|
|
81
83
|
FileNotFoundError,
|
|
84
|
+
AttributeError,
|
|
82
85
|
BoundingBoxError,
|
|
83
86
|
AnnotationError,
|
|
84
87
|
ImageError,
|
|
@@ -190,7 +193,7 @@ class LabelSummarizer:
|
|
|
190
193
|
|
|
191
194
|
"""
|
|
192
195
|
|
|
193
|
-
def __init__(self, categories: Mapping[
|
|
196
|
+
def __init__(self, categories: Mapping[int, ObjectTypes]) -> None:
|
|
194
197
|
"""
|
|
195
198
|
:param categories: A dict of categories as given as in categories.get_categories().
|
|
196
199
|
"""
|
|
@@ -208,11 +211,11 @@ class LabelSummarizer:
|
|
|
208
211
|
np_item = np.asarray(item, dtype="int8")
|
|
209
212
|
self.summary += np.histogram(np_item, bins=self.hist_bins)[0]
|
|
210
213
|
|
|
211
|
-
def get_summary(self) ->
|
|
214
|
+
def get_summary(self) -> dict[int, int]:
|
|
212
215
|
"""
|
|
213
216
|
Get a dictionary with category ids and the number dumped
|
|
214
217
|
"""
|
|
215
|
-
return dict(list(zip(self.categories.keys(), self.summary.
|
|
218
|
+
return dict(list(zip(self.categories.keys(), self.summary.tolist())))
|
|
216
219
|
|
|
217
220
|
def print_summary_histogram(self, dd_logic: bool = True) -> None:
|
|
218
221
|
"""
|
|
@@ -221,11 +224,9 @@ class LabelSummarizer:
|
|
|
221
224
|
:param dd_logic: Follow dd category convention when printing histogram (last background bucket omitted).
|
|
222
225
|
"""
|
|
223
226
|
if dd_logic:
|
|
224
|
-
data = list(itertools.chain(*[[self.categories[
|
|
227
|
+
data = list(itertools.chain(*[[self.categories[i].value, v] for i, v in enumerate(self.summary, 1)]))
|
|
225
228
|
else:
|
|
226
|
-
data = list(
|
|
227
|
-
itertools.chain(*[[self.categories[str(i + 1)].value, v] for i, v in enumerate(self.summary[:-1])])
|
|
228
|
-
)
|
|
229
|
+
data = list(itertools.chain(*[[self.categories[i + 1].value, v] for i, v in enumerate(self.summary[:-1])]))
|
|
229
230
|
num_columns = min(6, len(data))
|
|
230
231
|
total_img_anns = sum(data[1::2])
|
|
231
232
|
data.extend([None] * ((num_columns - len(data) % num_columns) % num_columns))
|