deepdoctection 0.42.0__py3-none-any.whl → 0.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +2 -1
- deepdoctection/analyzer/__init__.py +2 -1
- deepdoctection/analyzer/config.py +904 -0
- deepdoctection/analyzer/dd.py +36 -62
- deepdoctection/analyzer/factory.py +311 -141
- deepdoctection/configs/conf_dd_one.yaml +100 -44
- deepdoctection/configs/profiles.jsonl +32 -0
- deepdoctection/dataflow/__init__.py +9 -6
- deepdoctection/dataflow/base.py +33 -15
- deepdoctection/dataflow/common.py +96 -75
- deepdoctection/dataflow/custom.py +36 -29
- deepdoctection/dataflow/custom_serialize.py +135 -91
- deepdoctection/dataflow/parallel_map.py +33 -31
- deepdoctection/dataflow/serialize.py +15 -10
- deepdoctection/dataflow/stats.py +41 -28
- deepdoctection/datapoint/__init__.py +4 -6
- deepdoctection/datapoint/annotation.py +104 -66
- deepdoctection/datapoint/box.py +190 -130
- deepdoctection/datapoint/convert.py +66 -39
- deepdoctection/datapoint/image.py +151 -95
- deepdoctection/datapoint/view.py +383 -236
- deepdoctection/datasets/__init__.py +2 -6
- deepdoctection/datasets/adapter.py +11 -11
- deepdoctection/datasets/base.py +118 -81
- deepdoctection/datasets/dataflow_builder.py +18 -12
- deepdoctection/datasets/info.py +76 -57
- deepdoctection/datasets/instances/__init__.py +6 -2
- deepdoctection/datasets/instances/doclaynet.py +17 -14
- deepdoctection/datasets/instances/fintabnet.py +16 -22
- deepdoctection/datasets/instances/funsd.py +11 -6
- deepdoctection/datasets/instances/iiitar13k.py +9 -9
- deepdoctection/datasets/instances/layouttest.py +9 -9
- deepdoctection/datasets/instances/publaynet.py +9 -9
- deepdoctection/datasets/instances/pubtables1m.py +13 -13
- deepdoctection/datasets/instances/pubtabnet.py +13 -15
- deepdoctection/datasets/instances/rvlcdip.py +8 -8
- deepdoctection/datasets/instances/xfund.py +11 -9
- deepdoctection/datasets/registry.py +18 -11
- deepdoctection/datasets/save.py +12 -11
- deepdoctection/eval/__init__.py +3 -2
- deepdoctection/eval/accmetric.py +72 -52
- deepdoctection/eval/base.py +29 -10
- deepdoctection/eval/cocometric.py +14 -12
- deepdoctection/eval/eval.py +56 -41
- deepdoctection/eval/registry.py +6 -3
- deepdoctection/eval/tedsmetric.py +24 -9
- deepdoctection/eval/tp_eval_callback.py +13 -12
- deepdoctection/extern/__init__.py +1 -1
- deepdoctection/extern/base.py +176 -97
- deepdoctection/extern/d2detect.py +127 -92
- deepdoctection/extern/deskew.py +19 -10
- deepdoctection/extern/doctrocr.py +157 -106
- deepdoctection/extern/fastlang.py +25 -17
- deepdoctection/extern/hfdetr.py +137 -60
- deepdoctection/extern/hflayoutlm.py +329 -248
- deepdoctection/extern/hflm.py +67 -33
- deepdoctection/extern/model.py +108 -762
- deepdoctection/extern/pdftext.py +37 -12
- deepdoctection/extern/pt/nms.py +15 -1
- deepdoctection/extern/pt/ptutils.py +13 -9
- deepdoctection/extern/tessocr.py +87 -54
- deepdoctection/extern/texocr.py +29 -14
- deepdoctection/extern/tp/tfutils.py +36 -8
- deepdoctection/extern/tp/tpcompat.py +54 -16
- deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
- deepdoctection/extern/tpdetect.py +4 -2
- deepdoctection/mapper/__init__.py +1 -1
- deepdoctection/mapper/cats.py +117 -76
- deepdoctection/mapper/cocostruct.py +35 -17
- deepdoctection/mapper/d2struct.py +56 -29
- deepdoctection/mapper/hfstruct.py +32 -19
- deepdoctection/mapper/laylmstruct.py +221 -185
- deepdoctection/mapper/maputils.py +71 -35
- deepdoctection/mapper/match.py +76 -62
- deepdoctection/mapper/misc.py +68 -44
- deepdoctection/mapper/pascalstruct.py +13 -12
- deepdoctection/mapper/prodigystruct.py +33 -19
- deepdoctection/mapper/pubstruct.py +42 -32
- deepdoctection/mapper/tpstruct.py +39 -19
- deepdoctection/mapper/xfundstruct.py +20 -13
- deepdoctection/pipe/__init__.py +1 -2
- deepdoctection/pipe/anngen.py +104 -62
- deepdoctection/pipe/base.py +226 -107
- deepdoctection/pipe/common.py +206 -123
- deepdoctection/pipe/concurrency.py +74 -47
- deepdoctection/pipe/doctectionpipe.py +108 -47
- deepdoctection/pipe/language.py +41 -24
- deepdoctection/pipe/layout.py +45 -18
- deepdoctection/pipe/lm.py +146 -78
- deepdoctection/pipe/order.py +196 -113
- deepdoctection/pipe/refine.py +111 -63
- deepdoctection/pipe/registry.py +1 -1
- deepdoctection/pipe/segment.py +213 -142
- deepdoctection/pipe/sub_layout.py +76 -46
- deepdoctection/pipe/text.py +52 -33
- deepdoctection/pipe/transform.py +8 -6
- deepdoctection/train/d2_frcnn_train.py +87 -69
- deepdoctection/train/hf_detr_train.py +72 -40
- deepdoctection/train/hf_layoutlm_train.py +85 -46
- deepdoctection/train/tp_frcnn_train.py +56 -28
- deepdoctection/utils/concurrency.py +59 -16
- deepdoctection/utils/context.py +40 -19
- deepdoctection/utils/develop.py +25 -17
- deepdoctection/utils/env_info.py +85 -36
- deepdoctection/utils/error.py +16 -10
- deepdoctection/utils/file_utils.py +246 -62
- deepdoctection/utils/fs.py +162 -43
- deepdoctection/utils/identifier.py +29 -16
- deepdoctection/utils/logger.py +49 -32
- deepdoctection/utils/metacfg.py +83 -21
- deepdoctection/utils/pdf_utils.py +119 -62
- deepdoctection/utils/settings.py +24 -10
- deepdoctection/utils/tqdm.py +10 -5
- deepdoctection/utils/transform.py +182 -46
- deepdoctection/utils/utils.py +61 -28
- deepdoctection/utils/viz.py +150 -104
- deepdoctection-0.43.dist-info/METADATA +376 -0
- deepdoctection-0.43.dist-info/RECORD +149 -0
- {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/WHEEL +1 -1
- deepdoctection/analyzer/_config.py +0 -146
- deepdoctection-0.42.0.dist-info/METADATA +0 -431
- deepdoctection-0.42.0.dist-info/RECORD +0 -148
- {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
- {deepdoctection-0.42.0.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0
|
@@ -48,9 +48,13 @@ class MappingContextManager:
|
|
|
48
48
|
self, dp_name: Optional[str] = None, filter_level: str = "image", **kwargs: dict[str, Optional[str]]
|
|
49
49
|
) -> None:
|
|
50
50
|
"""
|
|
51
|
-
:
|
|
52
|
-
|
|
53
|
-
|
|
51
|
+
Args:
|
|
52
|
+
dp_name: A name for the datapoint to be mapped.
|
|
53
|
+
filter_level: Indicates if the `MappingContextManager` is used on datapoint level, annotation level etc.
|
|
54
|
+
`filter_level` will only be used for logging.
|
|
55
|
+
|
|
56
|
+
Note:
|
|
57
|
+
Use this context manager to catch and log exceptions during mapping.
|
|
54
58
|
"""
|
|
55
59
|
self.dp_name = dp_name if dp_name is not None else ""
|
|
56
60
|
self.filter_level = filter_level
|
|
@@ -59,7 +63,10 @@ class MappingContextManager:
|
|
|
59
63
|
|
|
60
64
|
def __enter__(self) -> MappingContextManager:
|
|
61
65
|
"""
|
|
62
|
-
|
|
66
|
+
Context enter.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
The `MappingContextManager` instance.
|
|
63
70
|
"""
|
|
64
71
|
return self
|
|
65
72
|
|
|
@@ -70,7 +77,15 @@ class MappingContextManager:
|
|
|
70
77
|
exc_tb: Optional[TracebackType],
|
|
71
78
|
) -> Optional[bool]:
|
|
72
79
|
"""
|
|
73
|
-
|
|
80
|
+
Context exit.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
exc_type: The exception type.
|
|
84
|
+
exc_val: The exception value.
|
|
85
|
+
exc_tb: The traceback object.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
`True` if the exception was handled, otherwise `None`.
|
|
74
89
|
"""
|
|
75
90
|
if (
|
|
76
91
|
exc_type
|
|
@@ -121,9 +136,10 @@ class DefaultMapper:
|
|
|
121
136
|
|
|
122
137
|
def __init__(self, func: Callable[[DP, S], T], *args: Any, **kwargs: Any) -> None:
|
|
123
138
|
"""
|
|
124
|
-
:
|
|
125
|
-
|
|
126
|
-
|
|
139
|
+
Args:
|
|
140
|
+
func: A mapping function
|
|
141
|
+
args: Default `args` to pass to the function
|
|
142
|
+
kwargs: Default `kwargs` to pass to the function
|
|
127
143
|
"""
|
|
128
144
|
self.func = func
|
|
129
145
|
self.argument_args = args
|
|
@@ -131,34 +147,42 @@ class DefaultMapper:
|
|
|
131
147
|
|
|
132
148
|
def __call__(self, dp: Any) -> Any:
|
|
133
149
|
"""
|
|
134
|
-
|
|
135
|
-
|
|
150
|
+
Call the wrapped function with the given datapoint and default arguments.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
dp: Datapoint within a dataflow.
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
The return value of the invoked function with default arguments.
|
|
136
157
|
"""
|
|
137
158
|
return self.func(dp, *self.argument_args, **self.argument_kwargs)
|
|
138
159
|
|
|
139
160
|
|
|
140
161
|
def curry(func: Callable[..., T]) -> Callable[..., Callable[[DP], T]]:
|
|
141
162
|
"""
|
|
142
|
-
Decorator for converting functions that
|
|
163
|
+
Decorator for converting functions that map
|
|
143
164
|
|
|
144
|
-
|
|
165
|
+
```python
|
|
166
|
+
dps: Union[JsonDict, Image] -> Union[JsonDict, Image]
|
|
167
|
+
```
|
|
145
168
|
|
|
146
|
-
to `DefaultMapper`s. They will be initialized with all arguments except dp and can be called later with only the
|
|
169
|
+
to `DefaultMapper`s. They will be initialized with all arguments except `dp` and can be called later with only the
|
|
147
170
|
datapoint as argument. This setting is useful when incorporating the function within a dataflow.
|
|
148
171
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
172
|
+
Example:
|
|
173
|
+
```python
|
|
174
|
+
@curry
|
|
175
|
+
def json_to_image(dp, config_arg_1, config_arg_2, ...) -> Image:
|
|
153
176
|
...
|
|
177
|
+
df = ...
|
|
178
|
+
df = MapData(df, json_to_image(config_arg_1=val_1, config_arg_2=val_2))
|
|
179
|
+
```
|
|
154
180
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
df = ...
|
|
158
|
-
df = MapData(df,json_to_image(config_arg_1=val_1,config_arg_2=val_2))
|
|
181
|
+
Args:
|
|
182
|
+
func: A callable [[`Image`], [Any]] -> [`Image`]
|
|
159
183
|
|
|
160
|
-
:
|
|
161
|
-
|
|
184
|
+
Returns:
|
|
185
|
+
A `DefaultMapper`.
|
|
162
186
|
"""
|
|
163
187
|
|
|
164
188
|
@functools.wraps(func)
|
|
@@ -170,10 +194,13 @@ def curry(func: Callable[..., T]) -> Callable[..., Callable[[DP], T]]:
|
|
|
170
194
|
|
|
171
195
|
def maybe_get_fake_score(add_fake_score: bool) -> Optional[float]:
|
|
172
196
|
"""
|
|
173
|
-
Returns a fake score, if add_fake_score
|
|
197
|
+
Returns a fake score, if `add_fake_score` is `True`. Will otherwise return `None`.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
add_fake_score: Boolean.
|
|
174
201
|
|
|
175
|
-
:
|
|
176
|
-
|
|
202
|
+
Returns:
|
|
203
|
+
A uniform random variable in `(0,1)` or `None`.
|
|
177
204
|
"""
|
|
178
205
|
if add_fake_score:
|
|
179
206
|
return np.random.uniform(0.0, 1.0, 1)[0]
|
|
@@ -182,20 +209,24 @@ def maybe_get_fake_score(add_fake_score: bool) -> Optional[float]:
|
|
|
182
209
|
|
|
183
210
|
class LabelSummarizer:
|
|
184
211
|
"""
|
|
185
|
-
A class for generating label statistics. Useful
|
|
186
|
-
|
|
187
|
-
summarizer = LabelSummarizer({"1": "label_1","2":"label_2"})
|
|
212
|
+
A class for generating label statistics. Useful when mapping and generating a `SummaryAnnotation`.
|
|
188
213
|
|
|
214
|
+
Example:
|
|
215
|
+
```python
|
|
216
|
+
summarizer = LabelSummarizer({"1": "label_1", "2": "label_2"})
|
|
189
217
|
for dp in some_dataflow:
|
|
190
218
|
summarizer.dump(dp["label_id"])
|
|
191
|
-
|
|
192
219
|
summarizer.print_summary_histogram()
|
|
220
|
+
```
|
|
193
221
|
|
|
222
|
+
Args:
|
|
223
|
+
categories: A dict of categories as given as in `categories.get_categories()`.
|
|
194
224
|
"""
|
|
195
225
|
|
|
196
226
|
def __init__(self, categories: Mapping[int, ObjectTypes]) -> None:
|
|
197
227
|
"""
|
|
198
|
-
:
|
|
228
|
+
Args:
|
|
229
|
+
categories: A dict of categories as given as in `categories.get_categories()`.
|
|
199
230
|
"""
|
|
200
231
|
self.categories = categories
|
|
201
232
|
cat_numbers = len(self.categories.keys())
|
|
@@ -204,16 +235,20 @@ class LabelSummarizer:
|
|
|
204
235
|
|
|
205
236
|
def dump(self, item: Union[Sequence[Union[str, int]], str, int]) -> None:
|
|
206
237
|
"""
|
|
207
|
-
Dump a category number
|
|
238
|
+
Dump a category number.
|
|
208
239
|
|
|
209
|
-
:
|
|
240
|
+
Args:
|
|
241
|
+
item: A category number.
|
|
210
242
|
"""
|
|
211
243
|
np_item = np.asarray(item, dtype="int8")
|
|
212
244
|
self.summary += np.histogram(np_item, bins=self.hist_bins)[0]
|
|
213
245
|
|
|
214
246
|
def get_summary(self) -> dict[int, int]:
|
|
215
247
|
"""
|
|
216
|
-
Get a dictionary with category ids and the number dumped
|
|
248
|
+
Get a dictionary with category ids and the number dumped.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
A dictionary mapping category ids to counts.
|
|
217
252
|
"""
|
|
218
253
|
return dict(list(zip(self.categories.keys(), self.summary.tolist())))
|
|
219
254
|
|
|
@@ -221,7 +256,8 @@ class LabelSummarizer:
|
|
|
221
256
|
"""
|
|
222
257
|
Prints a summary from all dumps.
|
|
223
258
|
|
|
224
|
-
:
|
|
259
|
+
Args:
|
|
260
|
+
dd_logic: Follow dd category convention when printing histogram (last background bucket omitted).
|
|
225
261
|
"""
|
|
226
262
|
if dd_logic:
|
|
227
263
|
data = list(itertools.chain(*[[self.categories[i].value, v] for i, v in enumerate(self.summary, 1)]))
|
deepdoctection/mapper/match.py
CHANGED
|
@@ -46,55 +46,64 @@ def match_anns_by_intersection(
|
|
|
46
46
|
max_parent_only: bool = False,
|
|
47
47
|
) -> tuple[Any, Any, Sequence[ImageAnnotation], Sequence[ImageAnnotation]]:
|
|
48
48
|
"""
|
|
49
|
-
Generates an iou/ioa-matrix for parent_ann_categories and child_ann_categories and returns pairs of child/parent
|
|
50
|
-
indices that are above some intersection threshold. It will also return a list of all pre
|
|
49
|
+
Generates an iou/ioa-matrix for `parent_ann_categories` and `child_ann_categories` and returns pairs of child/parent
|
|
50
|
+
indices that are above some intersection threshold. It will also return a list of all pre-selected parent and child
|
|
51
51
|
annotations.
|
|
52
52
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
53
|
+
Example:
|
|
54
|
+
```python
|
|
55
|
+
match_anns_by_intersection()
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Let `p_i, c_j` be annotation ids of parent and children according to some category names.
|
|
59
|
+
|
|
60
|
+
| ioa | c_1 | c_2 |
|
|
61
|
+
|-------|-----|-----|
|
|
62
|
+
| p_1 | 0.3 | 0.8 |
|
|
63
|
+
| p_2 | 0.4 | 0.1 |
|
|
64
|
+
| p_3 | 1.0 | 0.4 |
|
|
65
|
+
|
|
66
|
+
With `ioa_threshold = 0.5` it will return:
|
|
67
|
+
|
|
68
|
+
`[[2],[0]], [[1],[],[1]], [c_1,c_2], [p_1,p_2,p_3]`.
|
|
69
|
+
|
|
70
|
+
For each child, the sum of all ioas with all parents sum up to 1. Hence, the ioa with one parent will in general
|
|
71
|
+
decrease if one child intersects with more parents. Take two children, one matching two parents with an ioa of
|
|
72
|
+
0.5 each, while the second matches four parents with an ioa of 0.25 each. In this situation, it is difficult to
|
|
73
|
+
assign children according to a given threshold and one also has to take into account the number of parental
|
|
74
|
+
intersections for each child.
|
|
75
|
+
|
|
76
|
+
Note:
|
|
77
|
+
Setting `use_weighted_intersections` to True will multiply each ioa with the number of intersections,
|
|
78
|
+
making it easier to work with an absolute threshold.
|
|
79
|
+
|
|
80
|
+
Note:
|
|
81
|
+
In some situations, you want to assign to each child at most one parent. Setting `max_parent_only` to `True`
|
|
82
|
+
will select the parent with the highest ioa. There is currently no implementation for iou.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
dp: Image datapoint.
|
|
86
|
+
matching_rule: Intersection measure type, either `iou` or `ioa`.
|
|
87
|
+
threshold: Threshold for the given matching rule. Will assign every child annotation with iou/ioa above the
|
|
88
|
+
threshold to the parental annotation.
|
|
89
|
+
use_weighted_intersections: This is currently only implemented for matching_rule `ioa`. Instead of using
|
|
90
|
+
the ioa_matrix, it will use a weighted ioa in order to take into account that intersections with more cells
|
|
91
|
+
will likely decrease the ioa value. By multiplying the ioa with the number of all intersections for each
|
|
92
|
+
child, this value calibrates the ioa.
|
|
93
|
+
parent_ann_category_names: Single str or list of category names.
|
|
94
|
+
child_ann_category_names: Single str or list of category names.
|
|
95
|
+
parent_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other parent
|
|
96
|
+
candidates which are not in the list.
|
|
97
|
+
child_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other children
|
|
98
|
+
candidates which are not in the list.
|
|
99
|
+
parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other parent
|
|
100
|
+
candidates which are not in the list.
|
|
101
|
+
child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other children
|
|
102
|
+
candidates which are not in the list.
|
|
103
|
+
max_parent_only: Will assign to each child at most one parent with maximum ioa.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
child indices, parent indices (see Example), list of parent ids and list of children ids.
|
|
98
107
|
"""
|
|
99
108
|
|
|
100
109
|
assert matching_rule in ["iou", "ioa"], "matching rule must be either iou or ioa"
|
|
@@ -166,21 +175,26 @@ def match_anns_by_distance(
|
|
|
166
175
|
) -> list[tuple[ImageAnnotation, ImageAnnotation]]:
|
|
167
176
|
"""
|
|
168
177
|
Generates pairs of parent and child annotations by calculating the euclidean distance between the centers of the
|
|
169
|
-
parent and child bounding boxes. It will return the closest child for each parent.
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
:
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
178
|
+
parent and child bounding boxes. It will return the closest child for each parent.
|
|
179
|
+
|
|
180
|
+
Note:
|
|
181
|
+
A child can be assigned multiple times to different parents.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
dp: Image datapoint.
|
|
185
|
+
parent_ann_category_names: Single str or list of category names.
|
|
186
|
+
child_ann_category_names: Single str or list of category names.
|
|
187
|
+
parent_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other parent
|
|
188
|
+
candidates which are not in the list.
|
|
189
|
+
child_ann_ids: Additional filter condition. If some ids are selected, it will ignore all other children
|
|
190
|
+
candidates which are not in the list.
|
|
191
|
+
parent_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other parent
|
|
192
|
+
candidates which are not in the list.
|
|
193
|
+
child_ann_service_ids: Additional filter condition. If some ids are selected, it will ignore all other children
|
|
194
|
+
candidates which are not in the list.
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
List of tuples of parent and child annotations.
|
|
184
198
|
"""
|
|
185
199
|
|
|
186
200
|
parent_anns = dp.get_annotation(
|
deepdoctection/mapper/misc.py
CHANGED
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
# limitations under the License.
|
|
17
17
|
|
|
18
18
|
"""
|
|
19
|
-
|
|
19
|
+
Small mapping functions
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
22
|
from __future__ import annotations
|
|
@@ -45,14 +45,19 @@ def to_image(
|
|
|
45
45
|
height: Optional[int] = None,
|
|
46
46
|
) -> Optional[Image]:
|
|
47
47
|
"""
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
:
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
48
|
+
Maps an input from `dataflow.SerializerFiles` or similar to an `Image`.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
dp: Image.
|
|
52
|
+
dpi: Dot per inch definition for PDF resolution when converting to `np.array`.
|
|
53
|
+
width: Target width of the image. This option only works when using Poppler as PDF renderer.
|
|
54
|
+
height: Target height of the image. This option only works when using Poppler as PDF renderer.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
Image
|
|
58
|
+
|
|
59
|
+
Raises:
|
|
60
|
+
TypeError: If `dp` is not of the expected type for converting to image.
|
|
56
61
|
"""
|
|
57
62
|
|
|
58
63
|
file_name: Optional[str]
|
|
@@ -101,10 +106,13 @@ def to_image(
|
|
|
101
106
|
|
|
102
107
|
def maybe_load_image(dp: Image) -> Image:
|
|
103
108
|
"""
|
|
104
|
-
If `image` is None
|
|
109
|
+
If `image` is `None`, loads the image.
|
|
105
110
|
|
|
106
|
-
:
|
|
107
|
-
|
|
111
|
+
Args:
|
|
112
|
+
dp: An `Image`.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
Image with attribute `image` not `None`.
|
|
108
116
|
"""
|
|
109
117
|
|
|
110
118
|
if dp.image is None:
|
|
@@ -116,10 +124,13 @@ def maybe_load_image(dp: Image) -> Image:
|
|
|
116
124
|
|
|
117
125
|
def maybe_remove_image(dp: Image) -> Image:
|
|
118
126
|
"""
|
|
119
|
-
|
|
127
|
+
Removes `image` if a location is provided.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
dp: An `Image`.
|
|
120
131
|
|
|
121
|
-
:
|
|
122
|
-
|
|
132
|
+
Returns:
|
|
133
|
+
Image with attribute `image` set to `None`.
|
|
123
134
|
"""
|
|
124
135
|
|
|
125
136
|
if dp.location is not None:
|
|
@@ -130,11 +141,14 @@ def maybe_remove_image(dp: Image) -> Image:
|
|
|
130
141
|
@curry
|
|
131
142
|
def maybe_remove_image_from_category(dp: Image, category_names: Optional[Union[str, Sequence[str]]] = None) -> Image:
|
|
132
143
|
"""
|
|
133
|
-
Removes image from image annotation for some
|
|
144
|
+
Removes `image` from image annotation for some `category_name`s.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
dp: An `Image`.
|
|
148
|
+
category_names: Category names.
|
|
134
149
|
|
|
135
|
-
:
|
|
136
|
-
|
|
137
|
-
:return: Image with image attributes from image annotations removed
|
|
150
|
+
Returns:
|
|
151
|
+
Image with `image` attributes from image annotations removed.
|
|
138
152
|
"""
|
|
139
153
|
if category_names is None:
|
|
140
154
|
category_names = []
|
|
@@ -151,12 +165,15 @@ def maybe_remove_image_from_category(dp: Image, category_names: Optional[Union[s
|
|
|
151
165
|
|
|
152
166
|
def image_ann_to_image(dp: Image, category_names: Union[str, list[str]], crop_image: bool = True) -> Image:
|
|
153
167
|
"""
|
|
154
|
-
Adds `image` to annotations with given category names
|
|
168
|
+
Adds `image` to annotations with given category names.
|
|
155
169
|
|
|
156
|
-
:
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
170
|
+
Args:
|
|
171
|
+
dp: `Image`.
|
|
172
|
+
category_names: A single or a list of category names.
|
|
173
|
+
crop_image: If `True`, will add `np.array` to `image.image`.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
Image
|
|
160
177
|
"""
|
|
161
178
|
|
|
162
179
|
img_anns = dp.get_annotation(category_names=category_names)
|
|
@@ -171,15 +188,18 @@ def maybe_ann_to_sub_image(
|
|
|
171
188
|
dp: Image, category_names_sub_image: Union[str, list[str]], category_names: Union[str, list[str]], add_summary: bool
|
|
172
189
|
) -> Image:
|
|
173
190
|
"""
|
|
174
|
-
Assigns to sub image with given category names all annotations with given category names whose bounding box
|
|
191
|
+
Assigns to sub image with given category names all annotations with given category names whose bounding box lies
|
|
175
192
|
within the bounding box of the sub image.
|
|
176
193
|
|
|
177
|
-
:
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
194
|
+
Args:
|
|
195
|
+
dp: `Image`.
|
|
196
|
+
category_names_sub_image: A single or a list of category names that will form a sub image.
|
|
197
|
+
category_names: A single or a list of category names that may be assigned to a sub image, conditioned on the
|
|
198
|
+
bounding box lying within the sub image.
|
|
199
|
+
add_summary: If `True`, will add the whole summary annotation to the sub image.
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
Image
|
|
183
203
|
"""
|
|
184
204
|
|
|
185
205
|
anns = dp.get_annotation(category_names=category_names_sub_image)
|
|
@@ -194,19 +214,23 @@ def maybe_ann_to_sub_image(
|
|
|
194
214
|
@curry
|
|
195
215
|
def xml_to_dict(dp: JsonDict, xslt_obj: etree.XSLT) -> JsonDict:
|
|
196
216
|
"""
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
:
|
|
209
|
-
|
|
217
|
+
Converts an XML object into a dict using an XSL style sheet.
|
|
218
|
+
|
|
219
|
+
Example:
|
|
220
|
+
```python
|
|
221
|
+
with open(path_xslt) as xsl_file:
|
|
222
|
+
xslt_file = xsl_file.read().encode('utf-8')
|
|
223
|
+
xml_obj = etree.XML(xslt_file, parser=etree.XMLParser(encoding='utf-8'))
|
|
224
|
+
xslt_obj = etree.XSLT(xml_obj)
|
|
225
|
+
df = MapData(df, xml_to_dict(xslt_obj))
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
dp: String representing the XML.
|
|
230
|
+
xslt_obj: XSLT object to parse the string.
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
Parsed XML as a dict.
|
|
210
234
|
"""
|
|
211
235
|
|
|
212
236
|
output = str(xslt_obj(dp["xml"]))
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
# limitations under the License.
|
|
17
17
|
|
|
18
18
|
"""
|
|
19
|
-
|
|
19
|
+
Mapping for PASCAL VOC dataset structure to `Image` format.
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
22
|
import os
|
|
@@ -41,17 +41,18 @@ def pascal_voc_dict_to_image(
|
|
|
41
41
|
category_name_mapping: Optional[dict[str, str]] = None,
|
|
42
42
|
) -> Optional[Image]:
|
|
43
43
|
"""
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
:
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
:
|
|
44
|
+
Maps a dataset in a structure equivalent to the PASCAL VOC annotation style to the `Image` format.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
dp: A datapoint in PASCAL VOC format. Note that another conversion from XML to a dict structure is required.
|
|
48
|
+
categories_name_as_key: A dict of categories, e.g. `DatasetCategories.get_categories(name_as_key=True)`.
|
|
49
|
+
load_image: If `True`, it will load the image to the attribute `Image.image`.
|
|
50
|
+
filter_empty_image: Will return `None` if the datapoint has no annotations.
|
|
51
|
+
fake_score: If `dp` does not contain a score, a fake score with uniform random variables in (0,1) will be added.
|
|
52
|
+
category_name_mapping: Map incoming category names, e.g. `{"source_name": "target_name"}`.
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
`Image` or `None`.
|
|
55
56
|
"""
|
|
56
57
|
|
|
57
58
|
anns = dp.get("objects", [])
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
# limitations under the License.
|
|
17
17
|
|
|
18
18
|
"""
|
|
19
|
-
Module for mapping annotations to and from prodigy data structure
|
|
19
|
+
Module for mapping annotations to and from prodigy data structure.
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
22
|
import os
|
|
@@ -41,19 +41,24 @@ def prodigy_to_image(
|
|
|
41
41
|
category_name_mapping: Optional[Mapping[str, str]] = None,
|
|
42
42
|
) -> Optional[Image]:
|
|
43
43
|
"""
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
:
|
|
44
|
+
Maps a datapoint of annotation structure from Prodigy database to an `Image` structure.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
dp: A datapoint in dict structure as returned from Prodigy database.
|
|
48
|
+
categories_name_as_key: A dict of categories, e.g. `DatasetCategories.get_categories(name_as_key=True)`.
|
|
49
|
+
load_image: If `True`, it will load image to `Image.image`.
|
|
50
|
+
fake_score: If `dp` does not contain a score, a fake score with uniform random variables in (0,1) will be added.
|
|
51
|
+
path_reference_ds: A path to a reference-dataset. It must point to the basedir where the file of the datapoint
|
|
52
|
+
can be found.
|
|
53
|
+
accept_only_answer: Filter every datapoint that has the answer `reject` or `ignore`.
|
|
54
|
+
category_name_mapping: Map incoming category names, e.g. `{"source_name":"target_name"}`.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
`Image`
|
|
58
|
+
|
|
59
|
+
Note:
|
|
60
|
+
If `accept_only_answer` is `True`, only datapoints with the answer `accept` will be processed.
|
|
61
|
+
|
|
57
62
|
"""
|
|
58
63
|
|
|
59
64
|
if accept_only_answer and dp.get("answer") != "accept":
|
|
@@ -147,12 +152,21 @@ def prodigy_to_image(
|
|
|
147
152
|
@curry
|
|
148
153
|
def image_to_prodigy(dp: Image, category_names: Optional[Sequence[ObjectTypes]] = None) -> JsonDict:
|
|
149
154
|
"""
|
|
150
|
-
|
|
151
|
-
|
|
155
|
+
Transforms the normalized image representation of datasets into the format for visualizing the annotation
|
|
156
|
+
components in Prodigy.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
dp: An `Image`.
|
|
160
|
+
category_names: A list of category names to filter the annotations.
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
A dictionary with compulsory keys: `text` and `spans`.
|
|
164
|
+
|
|
165
|
+
Example:
|
|
166
|
+
```python
|
|
167
|
+
image_to_prodigy(image_instance)
|
|
168
|
+
```
|
|
152
169
|
|
|
153
|
-
:param dp: An image
|
|
154
|
-
:param category_names: A list of category names to filter the annotations
|
|
155
|
-
:return: A dictionary with compulsory keys: "text" and "spans"
|
|
156
170
|
"""
|
|
157
171
|
|
|
158
172
|
output: JsonDict = {}
|