deepdoctection 0.42.1__py3-none-any.whl → 0.43.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +4 -2
- deepdoctection/analyzer/__init__.py +2 -1
- deepdoctection/analyzer/config.py +919 -0
- deepdoctection/analyzer/dd.py +36 -62
- deepdoctection/analyzer/factory.py +311 -141
- deepdoctection/configs/conf_dd_one.yaml +100 -44
- deepdoctection/configs/profiles.jsonl +32 -0
- deepdoctection/dataflow/__init__.py +9 -6
- deepdoctection/dataflow/base.py +33 -15
- deepdoctection/dataflow/common.py +96 -75
- deepdoctection/dataflow/custom.py +36 -29
- deepdoctection/dataflow/custom_serialize.py +135 -91
- deepdoctection/dataflow/parallel_map.py +33 -31
- deepdoctection/dataflow/serialize.py +15 -10
- deepdoctection/dataflow/stats.py +41 -28
- deepdoctection/datapoint/__init__.py +4 -6
- deepdoctection/datapoint/annotation.py +104 -66
- deepdoctection/datapoint/box.py +190 -130
- deepdoctection/datapoint/convert.py +66 -39
- deepdoctection/datapoint/image.py +151 -95
- deepdoctection/datapoint/view.py +383 -236
- deepdoctection/datasets/__init__.py +2 -6
- deepdoctection/datasets/adapter.py +11 -11
- deepdoctection/datasets/base.py +118 -81
- deepdoctection/datasets/dataflow_builder.py +18 -12
- deepdoctection/datasets/info.py +76 -57
- deepdoctection/datasets/instances/__init__.py +6 -2
- deepdoctection/datasets/instances/doclaynet.py +17 -14
- deepdoctection/datasets/instances/fintabnet.py +16 -22
- deepdoctection/datasets/instances/funsd.py +11 -6
- deepdoctection/datasets/instances/iiitar13k.py +9 -9
- deepdoctection/datasets/instances/layouttest.py +9 -9
- deepdoctection/datasets/instances/publaynet.py +9 -9
- deepdoctection/datasets/instances/pubtables1m.py +13 -13
- deepdoctection/datasets/instances/pubtabnet.py +13 -15
- deepdoctection/datasets/instances/rvlcdip.py +8 -8
- deepdoctection/datasets/instances/xfund.py +11 -9
- deepdoctection/datasets/registry.py +18 -11
- deepdoctection/datasets/save.py +12 -11
- deepdoctection/eval/__init__.py +3 -2
- deepdoctection/eval/accmetric.py +72 -52
- deepdoctection/eval/base.py +29 -10
- deepdoctection/eval/cocometric.py +14 -12
- deepdoctection/eval/eval.py +56 -41
- deepdoctection/eval/registry.py +6 -3
- deepdoctection/eval/tedsmetric.py +24 -9
- deepdoctection/eval/tp_eval_callback.py +13 -12
- deepdoctection/extern/__init__.py +1 -1
- deepdoctection/extern/base.py +176 -97
- deepdoctection/extern/d2detect.py +127 -92
- deepdoctection/extern/deskew.py +19 -10
- deepdoctection/extern/doctrocr.py +162 -108
- deepdoctection/extern/fastlang.py +25 -17
- deepdoctection/extern/hfdetr.py +137 -60
- deepdoctection/extern/hflayoutlm.py +329 -248
- deepdoctection/extern/hflm.py +67 -33
- deepdoctection/extern/model.py +108 -762
- deepdoctection/extern/pdftext.py +37 -12
- deepdoctection/extern/pt/nms.py +15 -1
- deepdoctection/extern/pt/ptutils.py +13 -9
- deepdoctection/extern/tessocr.py +87 -54
- deepdoctection/extern/texocr.py +29 -14
- deepdoctection/extern/tp/tfutils.py +36 -8
- deepdoctection/extern/tp/tpcompat.py +54 -16
- deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
- deepdoctection/extern/tpdetect.py +4 -2
- deepdoctection/mapper/__init__.py +1 -1
- deepdoctection/mapper/cats.py +117 -76
- deepdoctection/mapper/cocostruct.py +35 -17
- deepdoctection/mapper/d2struct.py +56 -29
- deepdoctection/mapper/hfstruct.py +32 -19
- deepdoctection/mapper/laylmstruct.py +221 -185
- deepdoctection/mapper/maputils.py +71 -35
- deepdoctection/mapper/match.py +76 -62
- deepdoctection/mapper/misc.py +68 -44
- deepdoctection/mapper/pascalstruct.py +13 -12
- deepdoctection/mapper/prodigystruct.py +33 -19
- deepdoctection/mapper/pubstruct.py +42 -32
- deepdoctection/mapper/tpstruct.py +39 -19
- deepdoctection/mapper/xfundstruct.py +20 -13
- deepdoctection/pipe/__init__.py +1 -2
- deepdoctection/pipe/anngen.py +104 -62
- deepdoctection/pipe/base.py +226 -107
- deepdoctection/pipe/common.py +206 -123
- deepdoctection/pipe/concurrency.py +74 -47
- deepdoctection/pipe/doctectionpipe.py +108 -47
- deepdoctection/pipe/language.py +41 -24
- deepdoctection/pipe/layout.py +45 -18
- deepdoctection/pipe/lm.py +146 -78
- deepdoctection/pipe/order.py +205 -119
- deepdoctection/pipe/refine.py +111 -63
- deepdoctection/pipe/registry.py +1 -1
- deepdoctection/pipe/segment.py +213 -142
- deepdoctection/pipe/sub_layout.py +76 -46
- deepdoctection/pipe/text.py +52 -33
- deepdoctection/pipe/transform.py +8 -6
- deepdoctection/train/d2_frcnn_train.py +87 -69
- deepdoctection/train/hf_detr_train.py +72 -40
- deepdoctection/train/hf_layoutlm_train.py +85 -46
- deepdoctection/train/tp_frcnn_train.py +56 -28
- deepdoctection/utils/concurrency.py +59 -16
- deepdoctection/utils/context.py +40 -19
- deepdoctection/utils/develop.py +26 -17
- deepdoctection/utils/env_info.py +86 -37
- deepdoctection/utils/error.py +16 -10
- deepdoctection/utils/file_utils.py +246 -71
- deepdoctection/utils/fs.py +162 -43
- deepdoctection/utils/identifier.py +29 -16
- deepdoctection/utils/logger.py +49 -32
- deepdoctection/utils/metacfg.py +83 -21
- deepdoctection/utils/pdf_utils.py +119 -62
- deepdoctection/utils/settings.py +24 -10
- deepdoctection/utils/tqdm.py +10 -5
- deepdoctection/utils/transform.py +182 -46
- deepdoctection/utils/utils.py +61 -28
- deepdoctection/utils/viz.py +150 -104
- deepdoctection-0.43.1.dist-info/METADATA +376 -0
- deepdoctection-0.43.1.dist-info/RECORD +149 -0
- deepdoctection/analyzer/_config.py +0 -146
- deepdoctection-0.42.1.dist-info/METADATA +0 -431
- deepdoctection-0.42.1.dist-info/RECORD +0 -148
- {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/WHEEL +0 -0
- {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/licenses/LICENSE +0 -0
- {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.1.dist-info}/top_level.txt +0 -0
|
@@ -1,18 +1,69 @@
|
|
|
1
1
|
DEVICE: null
|
|
2
2
|
LANGUAGE: null
|
|
3
3
|
LAYOUT_LINK:
|
|
4
|
-
CHILD_CATEGORIES:
|
|
5
|
-
|
|
4
|
+
CHILD_CATEGORIES:
|
|
5
|
+
- caption
|
|
6
|
+
PARENTAL_CATEGORIES:
|
|
7
|
+
- figure
|
|
8
|
+
- table
|
|
6
9
|
LAYOUT_NMS_PAIRS:
|
|
7
|
-
COMBINATIONS:
|
|
8
|
-
|
|
9
|
-
|
|
10
|
+
COMBINATIONS:
|
|
11
|
+
- - table
|
|
12
|
+
- title
|
|
13
|
+
- - table
|
|
14
|
+
- text
|
|
15
|
+
- - table
|
|
16
|
+
- key_value_area
|
|
17
|
+
- - table
|
|
18
|
+
- list_item
|
|
19
|
+
- - table
|
|
20
|
+
- list
|
|
21
|
+
- - table
|
|
22
|
+
- figure
|
|
23
|
+
- - title
|
|
24
|
+
- text
|
|
25
|
+
- - text
|
|
26
|
+
- key_value_area
|
|
27
|
+
- - text
|
|
28
|
+
- list_item
|
|
29
|
+
- - text
|
|
30
|
+
- caption
|
|
31
|
+
- - key_value_area
|
|
32
|
+
- list_item
|
|
33
|
+
- - figure
|
|
34
|
+
- caption
|
|
35
|
+
PRIORITY:
|
|
36
|
+
- table
|
|
37
|
+
- table
|
|
38
|
+
- table
|
|
39
|
+
- table
|
|
40
|
+
- table
|
|
41
|
+
- table
|
|
42
|
+
- text
|
|
43
|
+
- text
|
|
44
|
+
- null
|
|
45
|
+
- caption
|
|
46
|
+
- key_value_area
|
|
47
|
+
- figure
|
|
48
|
+
THRESHOLDS:
|
|
49
|
+
- 0.001
|
|
50
|
+
- 0.01
|
|
51
|
+
- 0.01
|
|
52
|
+
- 0.001
|
|
53
|
+
- 0.01
|
|
54
|
+
- 0.01
|
|
55
|
+
- 0.05
|
|
56
|
+
- 0.01
|
|
57
|
+
- 0.01
|
|
58
|
+
- 0.01
|
|
59
|
+
- 0.01
|
|
60
|
+
- 0.001
|
|
10
61
|
LIB: null
|
|
11
62
|
OCR:
|
|
12
63
|
CONFIG:
|
|
13
64
|
TESSERACT: dd/conf_tesseract.yaml
|
|
14
|
-
USE_DOCTR:
|
|
15
|
-
USE_TESSERACT:
|
|
65
|
+
USE_DOCTR: true
|
|
66
|
+
USE_TESSERACT: false
|
|
16
67
|
USE_TEXTRACT: false
|
|
17
68
|
WEIGHTS:
|
|
18
69
|
DOCTR_RECOGNITION:
|
|
@@ -27,29 +78,41 @@ PDF_MINER:
|
|
|
27
78
|
PT:
|
|
28
79
|
CELL:
|
|
29
80
|
FILTER: null
|
|
81
|
+
PAD:
|
|
82
|
+
BOTTOM: 60
|
|
83
|
+
LEFT: 60
|
|
84
|
+
RIGHT: 60
|
|
85
|
+
TOP: 60
|
|
86
|
+
PADDING: false
|
|
30
87
|
WEIGHTS: cell/d2_model_1849999_cell_inf_only.pt
|
|
31
88
|
WEIGHTS_TS: cell/d2_model_1849999_cell_inf_only.ts
|
|
89
|
+
ENFORCE_WEIGHTS:
|
|
90
|
+
CELL: true
|
|
91
|
+
ITEM: true
|
|
92
|
+
LAYOUT: true
|
|
32
93
|
ITEM:
|
|
33
|
-
FILTER:
|
|
94
|
+
FILTER:
|
|
95
|
+
- table
|
|
34
96
|
PAD:
|
|
35
97
|
BOTTOM: 60
|
|
36
98
|
LEFT: 60
|
|
37
99
|
RIGHT: 60
|
|
38
100
|
TOP: 60
|
|
39
|
-
|
|
101
|
+
PADDING: false
|
|
102
|
+
WEIGHTS: deepdoctection/tatr_tab_struct_v2/pytorch_model.bin
|
|
40
103
|
WEIGHTS_TS: item/d2_model_1639999_item_inf_only.ts
|
|
41
104
|
LAYOUT:
|
|
42
105
|
FILTER: null
|
|
43
106
|
PAD:
|
|
44
|
-
BOTTOM:
|
|
45
|
-
LEFT:
|
|
46
|
-
RIGHT:
|
|
47
|
-
TOP:
|
|
48
|
-
|
|
107
|
+
BOTTOM: 0
|
|
108
|
+
LEFT: 0
|
|
109
|
+
RIGHT: 0
|
|
110
|
+
TOP: 0
|
|
111
|
+
PADDING: false
|
|
112
|
+
WEIGHTS: Aryn/deformable-detr-DocLayNet/model.safetensors
|
|
49
113
|
WEIGHTS_TS: layout/d2_model_0829999_layout_inf_only.ts
|
|
50
114
|
SEGMENTATION:
|
|
51
115
|
ASSIGNMENT_RULE: ioa
|
|
52
|
-
CELL_CATEGORY_ID: 12
|
|
53
116
|
CELL_NAMES:
|
|
54
117
|
- header
|
|
55
118
|
- body
|
|
@@ -59,30 +122,24 @@ SEGMENTATION:
|
|
|
59
122
|
- row
|
|
60
123
|
- column
|
|
61
124
|
PUBTABLES_CELL_NAMES:
|
|
62
|
-
-
|
|
63
|
-
|
|
125
|
+
- cell
|
|
126
|
+
PUBTABLES_ITEM_HEADER_CELL_NAMES:
|
|
64
127
|
- column_header
|
|
128
|
+
- row_header
|
|
65
129
|
- projected_row_header
|
|
66
|
-
|
|
130
|
+
PUBTABLES_ITEM_HEADER_THRESHOLDS:
|
|
131
|
+
- 0.6
|
|
132
|
+
- 0.0001
|
|
67
133
|
PUBTABLES_ITEM_NAMES:
|
|
68
134
|
- row
|
|
69
135
|
- column
|
|
70
136
|
PUBTABLES_SPANNING_CELL_NAMES:
|
|
71
137
|
- spanning
|
|
72
|
-
- row_header
|
|
73
|
-
- column_header
|
|
74
|
-
- projected_row_header
|
|
75
138
|
PUBTABLES_SUB_ITEM_NAMES:
|
|
76
139
|
- row_number
|
|
77
140
|
- column_number
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
- row_header
|
|
81
|
-
PUBTABLES_ITEM_HEADER_THRESHOLDS:
|
|
82
|
-
- 0.6
|
|
83
|
-
- 0.0001
|
|
84
|
-
REMOVE_IOU_THRESHOLD_COLS: 0.001
|
|
85
|
-
REMOVE_IOU_THRESHOLD_ROWS: 0.001
|
|
141
|
+
REMOVE_IOU_THRESHOLD_COLS: 0.2
|
|
142
|
+
REMOVE_IOU_THRESHOLD_ROWS: 0.2
|
|
86
143
|
STRETCH_RULE: equal
|
|
87
144
|
SUB_ITEM_NAMES:
|
|
88
145
|
- row_number
|
|
@@ -96,22 +153,29 @@ TEXT_ORDERING:
|
|
|
96
153
|
FLOATING_TEXT_BLOCK_CATEGORIES:
|
|
97
154
|
- text
|
|
98
155
|
- title
|
|
99
|
-
- figure
|
|
100
156
|
- list
|
|
157
|
+
- key_value_area
|
|
101
158
|
HEIGHT_TOLERANCE: 2.0
|
|
102
|
-
INCLUDE_RESIDUAL_TEXT_CONTAINER:
|
|
159
|
+
INCLUDE_RESIDUAL_TEXT_CONTAINER: true
|
|
103
160
|
PARAGRAPH_BREAK: 0.035
|
|
104
161
|
STARTING_POINT_TOLERANCE: 0.005
|
|
105
162
|
TEXT_BLOCK_CATEGORIES:
|
|
106
163
|
- text
|
|
107
164
|
- title
|
|
165
|
+
- list_item
|
|
108
166
|
- list
|
|
109
|
-
-
|
|
167
|
+
- caption
|
|
168
|
+
- page_header
|
|
169
|
+
- page_footer
|
|
170
|
+
- page_number
|
|
171
|
+
- mark
|
|
172
|
+
- key_value_area
|
|
110
173
|
- figure
|
|
111
174
|
- column_header
|
|
112
175
|
- projected_row_header
|
|
113
176
|
- spanning
|
|
114
177
|
- row_header
|
|
178
|
+
- cell
|
|
115
179
|
TF:
|
|
116
180
|
CELL:
|
|
117
181
|
FILTER: null
|
|
@@ -124,22 +188,14 @@ TF:
|
|
|
124
188
|
WEIGHTS: layout/model-800000_inf_only.data-00000-of-00001
|
|
125
189
|
USE_LAYOUT: true
|
|
126
190
|
USE_LAYOUT_LINK: false
|
|
127
|
-
USE_LAYOUT_NMS:
|
|
191
|
+
USE_LAYOUT_NMS: true
|
|
192
|
+
USE_LINE_MATCHER: false
|
|
128
193
|
USE_OCR: true
|
|
129
194
|
USE_PDF_MINER: false
|
|
130
195
|
USE_ROTATOR: false
|
|
131
|
-
USE_TABLE_REFINEMENT:
|
|
196
|
+
USE_TABLE_REFINEMENT: false
|
|
132
197
|
USE_TABLE_SEGMENTATION: true
|
|
133
198
|
WORD_MATCHING:
|
|
134
199
|
MAX_PARENT_ONLY: true
|
|
135
|
-
PARENTAL_CATEGORIES:
|
|
136
|
-
- text
|
|
137
|
-
- title
|
|
138
|
-
- list
|
|
139
|
-
- cell
|
|
140
|
-
- column_header
|
|
141
|
-
- projected_row_header
|
|
142
|
-
- spanning
|
|
143
|
-
- row_header
|
|
144
200
|
RULE: ioa
|
|
145
|
-
THRESHOLD: 0.
|
|
201
|
+
THRESHOLD: 0.3
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{"name": "layout/model-800000_inf_only.data-00000-of-00001", "description": "Tensorpack layout model for inference purposes trained on Publaynet", "size": [274552244, 7907], "tp_model": true, "config": "dd/tp/conf_frcnn_layout.yaml", "preprocessor_config": null, "hf_repo_id": "deepdoctection/tp_casc_rcnn_X_32xd4_50_FPN_GN_2FC_publaynet_inference_only", "hf_model_name": "model-800000_inf_only", "hf_config_file": ["conf_frcnn_layout.yaml"], "urls": null, "categories": {"1": "text", "2": "title", "3": "list", "4": "table", "5": "figure"}, "categories_orig": null, "dl_library": "TF", "model_wrapper": "TPFrcnnDetector", "architecture": null, "padding": null}
|
|
2
|
+
{"name": "cell/model-1800000_inf_only.data-00000-of-00001", "description": "Tensorpack cell detection model for inference purposes trained on Pubtabnet", "size": [274503056, 8056], "tp_model": true, "config": "dd/tp/conf_frcnn_cell.yaml", "preprocessor_config": null, "hf_repo_id": "deepdoctection/tp_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c_inference_only", "hf_model_name": "model-1800000_inf_only", "hf_config_file": ["conf_frcnn_cell.yaml"], "urls": null, "categories": {"1": "cell"}, "categories_orig": null, "dl_library": "TF", "model_wrapper": "TPFrcnnDetector", "architecture": null, "padding": null}
|
|
3
|
+
{"name": "item/model-1620000_inf_only.data-00000-of-00001", "description": "Tensorpack row/column detection model for inference purposes trained on Pubtabnet", "size": [274515344, 7904], "tp_model": true, "config": "dd/tp/conf_frcnn_rows.yaml", "preprocessor_config": null, "hf_repo_id": "deepdoctection/tp_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc_inference_only", "hf_model_name": "model-1620000_inf_only", "hf_config_file": ["conf_frcnn_rows.yaml"], "urls": null, "categories": {"1": "row", "2": "column"}, "categories_orig": null, "dl_library": "TF", "model_wrapper": "TPFrcnnDetector", "architecture": null, "padding": null}
|
|
4
|
+
{"name": "layout/d2_model_0829999_layout_inf_only.pt", "description": "Detectron2 layout detection model trained on Publaynet", "size": [274632215], "tp_model": false, "config": "dd/d2/layout/CASCADE_RCNN_R_50_FPN_GN.yaml", "preprocessor_config": null, "hf_repo_id": "deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_publaynet_inference_only", "hf_model_name": "d2_model_0829999_layout_inf_only.pt", "hf_config_file": ["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"], "urls": null, "categories": {"1": "text", "2": "title", "3": "list", "4": "table", "5": "figure"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "D2FrcnnDetector", "architecture": null, "padding": null}
|
|
5
|
+
{"name": "layout/d2_model_0829999_layout_inf_only.ts", "description": "Detectron2 layout detection model trained on Publaynet. Torchscript export", "size": [274974842], "tp_model": false, "config": "dd/d2/layout/CASCADE_RCNN_R_50_FPN_GN_TS.yaml", "preprocessor_config": null, "hf_repo_id": "deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_publaynet_inference_only", "hf_model_name": "d2_model_0829999_layout_inf_only.ts", "hf_config_file": ["CASCADE_RCNN_R_50_FPN_GN_TS.yaml"], "urls": null, "categories": {"1": "text", "2": "title", "3": "list", "4": "table", "5": "figure"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "D2FrcnnTracingDetector", "architecture": null, "padding": null}
|
|
6
|
+
{"name": "cell/d2_model_1849999_cell_inf_only.pt", "description": "Detectron2 cell detection inference only model trained on Pubtabnet", "size": [274583063], "tp_model": false, "config": "dd/d2/cell/CASCADE_RCNN_R_50_FPN_GN.yaml", "preprocessor_config": null, "hf_repo_id": "deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c_inference_only", "hf_model_name": "d2_model_1849999_cell_inf_only.pt", "hf_config_file": ["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"], "urls": null, "categories": {"1": "cell"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "D2FrcnnDetector", "architecture": null, "padding": null}
|
|
7
|
+
{"name": "cell/d2_model_1849999_cell_inf_only.ts", "description": "Detectron2 cell detection inference only model trained on Pubtabnet. Torchscript export", "size": [274898682], "tp_model": false, "config": "dd/d2/cell/CASCADE_RCNN_R_50_FPN_GN_TS.yaml", "preprocessor_config": null, "hf_repo_id": "deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c_inference_only", "hf_model_name": "d2_model_1849999_cell_inf_only.ts", "hf_config_file": ["CASCADE_RCNN_R_50_FPN_GN_TS.yaml"], "urls": null, "categories": {"1": "cell"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "D2FrcnnTracingDetector", "architecture": null, "padding": null}
|
|
8
|
+
{"name": "item/d2_model_1639999_item_inf_only.pt", "description": "Detectron2 item detection model inference only trained on Pubtabnet", "size": [274595351], "tp_model": false, "config": "dd/d2/item/CASCADE_RCNN_R_50_FPN_GN.yaml", "preprocessor_config": null, "hf_repo_id": "deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc_inference_only", "hf_model_name": "d2_model_1639999_item_inf_only.pt", "hf_config_file": ["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"], "urls": null, "categories": {"1": "row", "2": "column"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "D2FrcnnDetector", "architecture": null, "padding": null}
|
|
9
|
+
{"name": "item/d2_model_1639999_item_inf_only.ts", "description": "Detectron2 cell detection inference only model trained on Pubtabnet. Torchscript export", "size": [274910970], "tp_model": false, "config": "dd/d2/item/CASCADE_RCNN_R_50_FPN_GN_TS.yaml", "preprocessor_config": null, "hf_repo_id": "deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc_inference_only", "hf_model_name": "d2_model_1639999_item_inf_only.ts", "hf_config_file": ["CASCADE_RCNN_R_50_FPN_GN_TS.yaml"], "urls": null, "categories": {"1": "row", "2": "column"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "D2FrcnnTracingDetector", "architecture": null, "padding": null}
|
|
10
|
+
{"name": "nielsr/lilt-xlm-roberta-base/pytorch_model.bin", "description": "LiLT build with a RobertaXLM base model", "size": [1136743583], "tp_model": false, "config": "nielsr/lilt-xlm-roberta-base/config.json", "preprocessor_config": null, "hf_repo_id": "nielsr/lilt-xlm-roberta-base", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json"], "urls": null, "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": null, "architecture": null, "padding": null}
|
|
11
|
+
{"name": "SCUT-DLVCLab/lilt-infoxlm-base/pytorch_model.bin", "description": "Language-Independent Layout Transformer - InfoXLM model by stitching a pre-trained InfoXLM and a pre-trained Language-Independent Layout Transformer (LiLT) together. It was introduced in the paper LiLT: A Simple yet Effective Language-Independent Layout Transformer for Structured Document Understanding by Wang et al. and first released in this repository.", "size": [1136743583], "tp_model": false, "config": "SCUT-DLVCLab/lilt-infoxlm-base/config.json", "preprocessor_config": null, "hf_repo_id": "SCUT-DLVCLab/lilt-infoxlm-base", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json"], "urls": null, "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": null, "architecture": null, "padding": null}
|
|
12
|
+
{"name": "SCUT-DLVCLab/lilt-roberta-en-base/pytorch_model.bin", "description": "Language-Independent Layout Transformer - RoBERTa model by stitching a pre-trained RoBERTa (English) and a pre-trained Language-Independent Layout Transformer (LiLT) together. It was introduced in the paper LiLT: A Simple yet Effective Language-Independent Layout Transformer for Structured Document Understanding by Wang et al. and first released in this repository.", "size": [523151519], "tp_model": false, "config": "SCUT-DLVCLab/lilt-roberta-en-base/config.json", "preprocessor_config": null, "hf_repo_id": "SCUT-DLVCLab/lilt-roberta-en-base", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json"], "urls": null, "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": null, "architecture": null, "padding": null}
|
|
13
|
+
{"name": "microsoft/layoutlm-base-uncased/pytorch_model.bin", "description": "LayoutLM is a simple but effective pre-training method of text and layout for document image understanding and information extraction tasks, such as form understanding and receipt understanding. LayoutLM archived the SOTA results on multiple datasets. This model does notcontain any head and has to be fine tuned on a downstream task. This is model has been trained on 11M documents for 2 epochs. Configuration: 12-layer, 768-hidden, 12-heads, 113M parameters", "size": [453093832], "tp_model": false, "config": "microsoft/layoutlm-base-uncased/config.json", "preprocessor_config": null, "hf_repo_id": "microsoft/layoutlm-base-uncased", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json"], "urls": null, "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": null, "architecture": null, "padding": null}
|
|
14
|
+
{"name": "microsoft/layoutlm-large-uncased/pytorch_model.bin", "description": "LayoutLM is a simple but effective pre-training method of text and layout for document image understanding and information extraction tasks, such as form understanding and receipt understanding. LayoutLM archived the SOTA results on multiple datasets. This model does notcontain any head and has to be fine tuned on a downstream task. This is model has been trained on 11M documents for 2 epochs. Configuration: 24-layer, 1024-hidden, 16-heads, 343M parameters", "size": [1361845448], "tp_model": false, "config": "microsoft/layoutlm-large-uncased/config.json", "preprocessor_config": null, "hf_repo_id": "microsoft/layoutlm-large-uncased", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json"], "urls": null, "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": null, "architecture": null, "padding": null}
|
|
15
|
+
{"name": "microsoft/layoutlmv2-base-uncased/pytorch_model.bin", "description": "LayoutLMv2 is an improved version of LayoutLM with new pre-training tasks to model the interaction among text, layout, and image in a single multi-modal framework. It outperforms strong baselines and achieves new state-of-the-art results on a wide variety of downstream visually-rich document understanding tasks, including , including FUNSD (0.7895 → 0.8420), CORD (0.9493 → 0.9601), SROIE (0.9524 → 0.9781), Kleister-NDA (0.834 → 0.852), RVL-CDIP (0.9443 → 0.9564), and DocVQA (0.7295 → 0.8672). The license is cc-by-nc-sa-4.0", "size": [802243295], "tp_model": false, "config": "microsoft/layoutlmv2-base-uncased/config.json", "preprocessor_config": null, "hf_repo_id": "microsoft/layoutlmv2-base-uncased", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json"], "urls": null, "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": null, "architecture": null, "padding": null}
|
|
16
|
+
{"name": "microsoft/layoutxlm-base/pytorch_model.bin", "description": "Multimodal pre-training with text, layout, and image has achieved SOTA performance for visually-rich document understanding tasks recently, which demonstrates the great potential for joint learning across different modalities. In this paper, we present LayoutXLM, a multimodal pre-trained model for multilingual document understanding, which aims to bridge the language barriers for visually-rich document understanding. To accurately evaluate LayoutXLM, we also introduce a multilingual form understanding benchmark dataset named XFUN, which includes form understanding samples in 7 languages (Chinese, Japanese, Spanish, French, Italian, German, Portuguese), and key-value pairs are manually labeled for each language. Experiment results show that the LayoutXLM model has significantly outperformed the existing SOTA cross-lingual pre-trained models on the XFUN dataset. The license is cc-by-nc-sa-4.0", "size": [1476537178], "tp_model": false, "config": "microsoft/layoutxlm-base/config.json", "preprocessor_config": null, "hf_repo_id": "microsoft/layoutxlm-base", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json"], "urls": null, "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": null, "architecture": null, "padding": null}
|
|
17
|
+
{"name": "microsoft/layoutlmv3-base/pytorch_model.bin", "description": "LayoutLMv3 is a pre-trained multimodal Transformer for Document AI with unified text and image masking. The simple unified architecture and training objectives make LayoutLMv3 a general-purpose pre-trained model. For example, LayoutLMv3 can be fine-tuned for both text-centric tasks, including form understanding, receipt understanding, and document visual question answering, and image-centric tasks such as document image classification and document layout analysis. The license is cc-by-nc-sa-4.0", "size": [501380823], "tp_model": false, "config": "microsoft/layoutlmv3-base/config.json", "preprocessor_config": null, "hf_repo_id": "microsoft/layoutlmv3-base", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json"], "urls": null, "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": null, "architecture": null, "padding": null}
|
|
18
|
+
{"name": "microsoft/table-transformer-detection/pytorch_model.bin", "description": "Table Transformer (DETR) model trained on PubTables1M. It was introduced in the paper PubTables-1M: Towards Comprehensive Table Extraction From Unstructured Documents by Smock et al. This model is devoted to table detection", "size": [115393245], "tp_model": false, "config": "microsoft/table-transformer-detection/config.json", "preprocessor_config": "microsoft/table-transformer-detection/preprocessor_config.json", "hf_repo_id": "microsoft/table-transformer-detection", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json", "preprocessor_config.json"], "urls": null, "categories": {"1": "table", "2": "table_rotated"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "HFDetrDerivedDetector", "architecture": null, "padding": null}
|
|
19
|
+
{"name": "microsoft/table-transformer-structure-recognition/pytorch_model.bin", "description": "Table Transformer (DETR) model trained on PubTables1M. It was introduced in the paper PubTables-1M: Towards Comprehensive Table Extraction From Unstructured Documents by Smock et al. This model is devoted to table structure recognition and assumes to receive a croppedtable as input. It will predict rows, column and spanning cells", "size": [115509981], "tp_model": false, "config": "microsoft/table-transformer-structure-recognition/config.json", "preprocessor_config": "microsoft/table-transformer-structure-recognition/preprocessor_config.json", "hf_repo_id": "microsoft/table-transformer-structure-recognition", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json", "preprocessor_config.json"], "urls": null, "categories": {"1": "table", "2": "column", "3": "row", "4": "column_header", "5": "projected_row_header", "6": "spanning"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "HFDetrDerivedDetector", "architecture": null, "padding": null}
|
|
20
|
+
{"name": "doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt", "description": "Doctr implementation of DBNet from “Real-time Scene Text Detection with Differentiable Binarization”. For more information please check https://mindee.github.io/doctr/using_doctr/using_models.html#. This is the Pytorch artefact.", "size": [101971449], "tp_model": false, "config": null, "preprocessor_config": null, "hf_repo_id": null, "hf_model_name": null, "hf_config_file": null, "urls": ["https://doctr-static.mindee.com/models?id=v0.3.1/db_resnet50-ac60cadc.pt&src=0"], "categories": {"1": "word"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "DoctrTextlineDetector", "architecture": "db_resnet50", "padding": null}
|
|
21
|
+
{"name": "doctr/db_resnet50/tf/db_resnet50-adcafc63.zip", "description": "Doctr implementation of DBNet from “Real-time Scene Text Detection with Differentiable Binarization”. For more information please check https://mindee.github.io/doctr/using_doctr/using_models.html#. This is the Tensorflow artefact.", "size": [94178964], "tp_model": false, "config": null, "preprocessor_config": null, "hf_repo_id": null, "hf_model_name": null, "hf_config_file": null, "urls": ["https://doctr-static.mindee.com/models?id=v0.2.0/db_resnet50-adcafc63.zip&src=0"], "categories": {"1": "word"}, "categories_orig": null, "dl_library": "TF", "model_wrapper": "DoctrTextlineDetector", "architecture": "db_resnet50", "padding": null}
|
|
22
|
+
{"name": "doctr/crnn_vgg16_bn/pt/crnn_vgg16_bn-9762b0b0.pt", "description": "Doctr implementation of CRNN from “An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition”. For more information please check https://mindee.github.io/doctr/using_doctr/using_models.html#. This is the Pytorch artefact.", "size": [63286381], "tp_model": false, "config": null, "preprocessor_config": null, "hf_repo_id": null, "hf_model_name": null, "hf_config_file": null, "urls": ["https://doctr-static.mindee.com/models?id=v0.3.1/crnn_vgg16_bn-9762b0b0.pt&src=0"], "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "DoctrTextRecognizer", "architecture": "crnn_vgg16_bn", "padding": null}
|
|
23
|
+
{"name": "doctr/crnn_vgg16_bn/tf/crnn_vgg16_bn-76b7f2c6.zip", "description": "Doctr implementation of CRNN from “An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition”. For more information please check https://mindee.github.io/doctr/using_doctr/using_models.html#. This is the Tensorflow artefact.", "size": [58758994], "tp_model": false, "config": null, "preprocessor_config": null, "hf_repo_id": null, "hf_model_name": null, "hf_config_file": null, "urls": ["https://doctr-static.mindee.com/models?id=v0.3.0/crnn_vgg16_bn-76b7f2c6.zip&src=0"], "categories": {}, "categories_orig": null, "dl_library": "TF", "model_wrapper": "DoctrTextRecognizer", "architecture": "crnn_vgg16_bn", "padding": null}
|
|
24
|
+
{"name": "FacebookAI/xlm-roberta-base/pytorch_model.bin", "description": "XLM-RoBERTa model pre-trained on 2.5TB of filtered CommonCrawl data containing 100 languages. It was introduced in the paper Unsupervised Cross-lingual Representation Learning at Scale by Conneau et al. and first released in this repository.", "size": [1115590446], "tp_model": false, "config": "FacebookAI/xlm-roberta-base/config.json", "preprocessor_config": null, "hf_repo_id": "FacebookAI/xlm-roberta-base", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json"], "urls": null, "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": null, "architecture": null, "padding": null}
|
|
25
|
+
{"name": "fasttext/lid.176.bin", "description": "Fasttext language detection model", "size": [131266198], "tp_model": false, "config": null, "preprocessor_config": null, "hf_repo_id": null, "hf_model_name": null, "hf_config_file": null, "urls": ["https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin"], "categories": {"1": "eng", "2": "rus", "3": "deu", "4": "fre", "5": "ita", "6": "jpn", "7": "spa", "8": "ceb", "9": "tur", "10": "por", "11": "ukr", "12": "epo", "13": "pol", "14": "swe", "15": "dut", "16": "heb", "17": "chi", "18": "hun", "19": "ara", "20": "cat", "21": "fin", "22": "cze", "23": "per", "24": "srp", "25": "gre", "26": "vie", "27": "bul", "28": "kor", "29": "nor", "30": "mac", "31": "rum", "32": "ind", "33": "tha", "34": "arm", "35": "dan", "36": "tam", "37": "hin", "38": "hrv", "39": "nn", "40": "bel", "41": "geo", "42": "tel", "43": "kaz", "44": "war", "45": "lit", "46": "glg", "47": "slo", "48": "ben", "49": "baq", "50": "slv", "51": "nn", "52": "mal", "53": "mar", "54": "est", "55": "aze", "56": "nn", "57": "alb", "58": "lat", "59": "bos", "60": "nno", "61": "urd", "62": "nn", "63": "nn", "64": "nn", "65": "nn", "66": "nn", "67": "nn", "68": "nn", "69": "nn", "70": "nn", "71": "nn", "72": "nn", "73": "nn", "74": "nn", "75": "nn", "76": "nn", "77": "nn", "78": "nn", "79": "nn", "80": "nn", "81": "nn", "82": "nn", "83": "nn", "84": "nn", "85": "nn", "86": "nn", "87": "nn", "88": "nn", "89": "nn", "90": "nn", "91": "nn", "92": "nn", "93": "nn", "94": "nn", "95": "nn", "96": "nn", "97": "nn", "98": "nn", "99": "nn", "100": "nn", "101": "nn", "102": "nn", "103": "nn", "104": "nn", "105": "nn", "106": "nn", "107": "nn", "108": "nn", "109": "nn", "110": "nn", "111": "nn", "112": "nn", "113": "nn", "114": "nn", "115": "nn", "116": "nn", "117": "nn", "118": "nn", "119": "nn", "120": "nn", "121": "nn", "122": "nn", "123": "nn", "124": "nn", "125": "nn", "126": "nn", "127": "nn", "128": "nn", "129": "nn", "130": "nn", "131": "nn", "132": "nn", "133": "nn", "134": "nn", "135": "nn", "136": "nn", "137": "nn", "138": "nn", "139": "nn", "140": "nn", "141": "nn", "142": "nn", "143": "nn", "144": "nn", "145": "nn", "146": "nn", "147": "nn", "148": "nn", "149": "nn", "150": "nn", "151": "nn", "152": "nn", "153": "nn", "154": "nn", "155": "nn", "156": "nn", "157": "nn", "158": "nn", "159": "nn", "160": "nn", "161": "nn", "162": "nn", "163": "nn", "164": "nn", "165": "nn", "166": "nn", "167": "nn", "168": "nn", "169": "nn", "170": "nn", "171": "nn", "172": "nn", "173": "nn", "174": "nn", "175": "nn", "176": "nn"}, "categories_orig": {"__label__en": "eng", "__label__ru": "rus", "__label__de": "deu", "__label__fr": "fre", "__label__it": "ita", "__label__ja": "jpn", "__label__es": "spa", "__label__ceb": "ceb", "__label__tr": "tur", "__label__pt": "por", "__label__uk": "ukr", "__label__eo": "epo", "__label__pl": "pol", "__label__sv": "swe", "__label__nl": "dut", "__label__he": "heb", "__label__zh": "chi", "__label__hu": "hun", "__label__ar": "ara", "__label__ca": "cat", "__label__fi": "fin", "__label__cs": "cze", "__label__fa": "per", "__label__sr": "srp", "__label__el": "gre", "__label__vi": "vie", "__label__bg": "bul", "__label__ko": "kor", "__label__no": "nor", "__label__mk": "mac", "__label__ro": "rum", "__label__id": "ind", "__label__th": "tha", "__label__hy": "arm", "__label__da": "dan", "__label__ta": "tam", "__label__hi": "hin", "__label__hr": "hrv", "__label__sh": "nn", "__label__be": "bel", "__label__ka": "geo", "__label__te": "tel", "__label__kk": "kaz", "__label__war": "war", "__label__lt": "lit", "__label__gl": "glg", "__label__sk": "slo", "__label__bn": "ben", "__label__eu": "baq", "__label__sl": "slv", "__label__kn": "nn", "__label__ml": "mal", "__label__mr": "mar", "__label__et": "est", "__label__az": "aze", "__label__ms": "nn", "__label__sq": "alb", "__label__la": "lat", "__label__bs": "bos", "__label__nn": "nno", "__label__ur": "urd", "__label__lv": "nn", "__label__my": "nn", "__label__tt": "nn", "__label__af": "nn", "__label__oc": "nn", "__label__nds": "nn", "__label__ky": "nn", "__label__ast": "nn", "__label__tl": "nn", "__label__is": "nn", "__label__ia": "nn", "__label__si": "nn", "__label__gu": "nn", "__label__km": "nn", "__label__br": "nn", "__label__ba": "nn", "__label__uz": "nn", "__label__bo": "nn", "__label__pa": "nn", "__label__vo": "nn", "__label__als": "nn", "__label__ne": "nn", "__label__cy": "nn", "__label__jbo": "nn", "__label__fy": "nn", "__label__mn": "nn", "__label__lb": "nn", "__label__ce": "nn", "__label__ug": "nn", "__label__tg": "nn", "__label__sco": "nn", "__label__sa": "nn", "__label__cv": "nn", "__label__jv": "nn", "__label__min": "nn", "__label__io": "nn", "__label__or": "nn", "__label__as": "nn", "__label__new": "nn", "__label__ga": "nn", "__label__mg": "nn", "__label__an": "nn", "__label__ckb": "nn", "__label__sw": "nn", "__label__bar": "nn", "__label__lmo": "nn", "__label__yi": "nn", "__label__arz": "nn", "__label__mhr": "nn", "__label__azb": "nn", "__label__sah": "nn", "__label__pnb": "nn", "__label__su": "nn", "__label__bpy": "nn", "__label__pms": "nn", "__label__ilo": "nn", "__label__wuu": "nn", "__label__ku": "nn", "__label__ps": "nn", "__label__ie": "nn", "__label__xmf": "nn", "__label__yue": "nn", "__label__gom": "nn", "__label__li": "nn", "__label__mwl": "nn", "__label__kw": "nn", "__label__sd": "nn", "__label__hsb": "nn", "__label__scn": "nn", "__label__gd": "nn", "__label__pam": "nn", "__label__bh": "nn", "__label__mai": "nn", "__label__vec": "nn", "__label__mt": "nn", "__label__dv": "nn", "__label__wa": "nn", "__label__mzn": "nn", "__label__am": "nn", "__label__qu": "nn", "__label__eml": "nn", "__label__cbk": "nn", "__label__tk": "nn", "__label__rm": "nn", "__label__os": "nn", "__label__vls": "nn", "__label__yo": "nn", "__label__lo": "nn", "__label__lez": "nn", "__label__so": "nn", "__label__myv": "nn", "__label__diq": "nn", "__label__mrj": "nn", "__label__dsb": "nn", "__label__frr": "nn", "__label__ht": "nn", "__label__gn": "nn", "__label__bxr": "nn", "__label__kv": "nn", "__label__sc": "nn", "__label__nah": "nn", "__label__krc": "nn", "__label__bcl": "nn", "__label__nap": "nn", "__label__gv": "nn", "__label__av": "nn", "__label__rue": "nn", "__label__xal": "nn", "__label__pfl": "nn", "__label__dty": "nn", "__label__hif": "nn", "__label__co": "nn", "__label__lrc": "nn", "__label__vep": "nn", "__label__tyv": "nn"}, "dl_library": null, "model_wrapper": "FasttextLangDetector", "architecture": null, "padding": null}
|
|
26
|
+
{"name": "deepdoctection/tatr_tab_struct_v2/pytorch_model.bin", "description": "Table Transformer (DETR) model trained on PubTables1M. It was introduced in the paper Aligning benchmark datasets for table structure recognition by Smock et al. This model is devoted to table structure recognition and assumes to receive a slightly croppedtable as input. It will predict rows, column and spanning cells. Use a padding of around 5 pixels", "size": [115511753], "tp_model": false, "config": "deepdoctection/tatr_tab_struct_v2/config.json", "preprocessor_config": "deepdoctection/tatr_tab_struct_v2/preprocessor_config.json", "hf_repo_id": "deepdoctection/tatr_tab_struct_v2", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json", "preprocessor_config.json"], "urls": null, "categories": {"1": "table", "2": "column", "3": "row", "4": "column_header", "5": "projected_row_header", "6": "spanning"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "HFDetrDerivedDetector", "architecture": null, "padding": null}
|
|
27
|
+
{"name": "layout/d2_model_0829999_layout.pth", "description": "Detectron2 layout detection model trained on Publaynet. Checkpoint for resuming training", "size": [548377327], "tp_model": false, "config": "dd/d2/layout/CASCADE_RCNN_R_50_FPN_GN.yaml", "preprocessor_config": null, "hf_repo_id": "deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_publaynet_inference_only", "hf_model_name": "d2_model_0829999_layout.pth", "hf_config_file": ["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"], "urls": null, "categories": {"1": "text", "2": "title", "3": "list", "4": "table", "5": "figure"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "D2FrcnnDetector", "architecture": null, "padding": null}
|
|
28
|
+
{"name": "cell/d2_model_1849999_cell.pth", "description": "Detectron2 cell detection inference only model trained on Pubtabnet", "size": [548279023], "tp_model": false, "config": "dd/d2/cell/CASCADE_RCNN_R_50_FPN_GN.yaml", "preprocessor_config": null, "hf_repo_id": "deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c_inference_only", "hf_model_name": "cell/d2_model_1849999_cell.pth", "hf_config_file": ["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"], "urls": null, "categories": {"1": "cell"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "D2FrcnnDetector", "architecture": null, "padding": null}
|
|
29
|
+
{"name": "item/d2_model_1639999_item.pth", "description": "Detectron2 item detection model trained on Pubtabnet", "size": [548303599], "tp_model": false, "config": "dd/d2/item/CASCADE_RCNN_R_50_FPN_GN.yaml", "preprocessor_config": null, "hf_repo_id": "deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc_inference_only", "hf_model_name": "d2_model_1639999_item.pth", "hf_config_file": ["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"], "urls": null, "categories": {"1": "row", "2": "column"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "D2FrcnnDetector", "architecture": null, "padding": null}
|
|
30
|
+
{"name": "Felix92/doctr-torch-parseq-multilingual-v1/pytorch_model.bin", "description": "", "size": [63286381], "tp_model": false, "config": "Felix92/doctr-torch-parseq-multilingual-v1/config.json", "preprocessor_config": null, "hf_repo_id": "Felix92/doctr-torch-parseq-multilingual-v1", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json"], "urls": null, "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "DoctrTextRecognizer", "architecture": "parseq", "padding": null}
|
|
31
|
+
{"name": "doctr/crnn_vgg16_bn/pt/master-fde31e4a.pt", "description": "MASTER", "size": [63286381], "tp_model": false, "config": null, "preprocessor_config": null, "hf_repo_id": null, "hf_model_name": null, "hf_config_file": null, "urls": ["https://doctr-static.mindee.com/models?id=v0.7.0/master-fde31e4a.pt&src=0"], "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "DoctrTextRecognizer", "architecture": "master", "padding": null}
|
|
32
|
+
{"name": "Aryn/deformable-detr-DocLayNet/model.safetensors", "description": "Deformable DEtection TRansformer (DETR), trained on DocLayNet (including 80k annotated pages in 11 classes).", "size": [115511753], "tp_model": false, "config": "Aryn/deformable-detr-DocLayNet/config.json", "preprocessor_config": "Aryn/deformable-detr-DocLayNet/preprocessor_config.json", "hf_repo_id": "Aryn/deformable-detr-DocLayNet", "hf_model_name": "model.safetensors", "hf_config_file": ["config.json", "preprocessor_config.json"], "urls": null, "categories": {"1": "default_type", "2": "caption", "11": "text", "12": "title", "3": "footnote", "4": "formula", "5": "list_item", "6": "page_footer", "7": "page_header", "8": "figure", "9": "section_header", "10": "table"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "HFDetrDerivedDetector", "architecture": null, "padding": null}
|
|
@@ -2,14 +2,17 @@
|
|
|
2
2
|
# File: __init__.py
|
|
3
3
|
|
|
4
4
|
"""
|
|
5
|
-
|
|
6
|
-
is essentially pure Python and, with a simple API, it contains a variety of methods for parallelling complex
|
|
7
|
-
transformations. Due to the fact that DataFlow is not available through pypi we have integrated the most important
|
|
8
|
-
DataFlow classes into deepdoctection in order to avoid installing the package separately from source.
|
|
5
|
+
# Dataflows
|
|
9
6
|
|
|
10
|
-
|
|
7
|
+
Info:
|
|
8
|
+
Dataflow is a package for loading and processing data in both training and prediction environments. Dataflow
|
|
9
|
+
is essentially pure Python and, with a simple API, it contains a variety of methods for parallelling complex
|
|
10
|
+
transformations. We have integrated the most important DataFlow classes into deepdoctection in order to avoid
|
|
11
|
+
installing the package separately from source.
|
|
11
12
|
|
|
12
|
-
|
|
13
|
+
Further information (including several tutorials about performance) can be found in the excellent documentation:
|
|
14
|
+
|
|
15
|
+
<https://tensorpack.readthedocs.io/en/latest/tutorial/dataflow.html>
|
|
13
16
|
"""
|
|
14
17
|
|
|
15
18
|
|
deepdoctection/dataflow/base.py
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
# Licensed under the Apache License, Version 2.0 (the "License")
|
|
6
6
|
|
|
7
7
|
"""
|
|
8
|
-
|
|
8
|
+
Most of the code has been taken from
|
|
9
9
|
|
|
10
10
|
<https://github.com/tensorpack/dataflow/blob/master/dataflow/dataflow/base.py>
|
|
11
11
|
"""
|
|
@@ -44,12 +44,12 @@ class DataFlow:
|
|
|
44
44
|
@abstractmethod
|
|
45
45
|
def __iter__(self) -> Iterator[Any]:
|
|
46
46
|
"""
|
|
47
|
-
|
|
47
|
+
- A dataflow is an iterable. The `__iter__` method should yield a list or dict each time.
|
|
48
48
|
Note that dict is **partially** supported at the moment: certain dataflow does not support dict.
|
|
49
|
-
|
|
49
|
+
- The `__iter__` method can be either finite (will stop iteration) or infinite
|
|
50
50
|
(will not stop iteration). For a finite dataflow, `__iter__` can be called
|
|
51
51
|
again immediately after the previous call returned.
|
|
52
|
-
|
|
52
|
+
- For many dataflow, the `__iter__` method is non-reentrant, which means for a dataflow
|
|
53
53
|
instance ``df``, `df.__iter__` cannot be called before the previous
|
|
54
54
|
`df.__iter__` call has finished (iteration has stopped).
|
|
55
55
|
When a dataflow is non-reentrant, `df.__iter__` should throw an exception if
|
|
@@ -62,18 +62,18 @@ class DataFlow:
|
|
|
62
62
|
|
|
63
63
|
def __len__(self) -> int:
|
|
64
64
|
"""
|
|
65
|
-
|
|
65
|
+
- A dataflow can optionally implement `__len__`. If not implemented, it will
|
|
66
66
|
throw `NotImplementedError`.
|
|
67
|
-
|
|
67
|
+
- It returns an integer representing the size of the dataflow.
|
|
68
68
|
The return value **may not be accurate or meaningful** at all.
|
|
69
69
|
When saying the length is "accurate", it means that
|
|
70
70
|
`__iter__` will always yield this many of datapoints before it stops iteration.
|
|
71
|
-
|
|
71
|
+
- There could be many reasons why `__len__` is inaccurate.
|
|
72
72
|
For example, some dataflow has dynamic size, if it throws away datapoints on the fly.
|
|
73
73
|
Some dataflow mixes the datapoints between consecutive passes over
|
|
74
74
|
the dataset, due to parallelism and buffering.
|
|
75
75
|
In this case it does not make sense to stop the iteration anywhere.
|
|
76
|
-
|
|
76
|
+
- Due to the above reasons, the length is only a rough guidance.
|
|
77
77
|
And it's up to the user how to interpret it.
|
|
78
78
|
Inside tensorpack it's only used in these places:
|
|
79
79
|
+ A default ``steps_per_epoch`` in training, but you probably want to customize
|
|
@@ -82,28 +82,30 @@ class DataFlow:
|
|
|
82
82
|
+ Used by `InferenceRunner` to get the number of iterations in inference.
|
|
83
83
|
In this case users are **responsible** for making sure that `__len__` is "accurate".
|
|
84
84
|
This is to guarantee that inference is run on a fixed set of images.
|
|
85
|
+
|
|
85
86
|
Returns:
|
|
86
87
|
int: rough size of this dataflow.
|
|
88
|
+
|
|
87
89
|
Raises:
|
|
88
|
-
|
|
90
|
+
NotImplementedError: if this DataFlow doesn't have a size.
|
|
89
91
|
"""
|
|
90
92
|
raise NotImplementedError
|
|
91
93
|
|
|
92
94
|
def reset_state(self) -> None:
|
|
93
95
|
"""
|
|
94
|
-
|
|
96
|
+
- The caller must guarantee that `reset_state` should be called **once and only once**
|
|
95
97
|
by the **process that uses the dataflow** before `__iter__` is called.
|
|
96
98
|
The caller thread of this method should stay alive to keep this dataflow alive.
|
|
97
|
-
|
|
99
|
+
- It is meant for certain initialization that involves processes,
|
|
98
100
|
e.g., initialize random number generators (RNG), create worker processes.
|
|
99
101
|
Because it's very common to use RNG in data processing,
|
|
100
102
|
developers of dataflow can also subclass `RNGDataFlow` to have easier access to
|
|
101
103
|
a properly-initialized RNG.
|
|
102
|
-
|
|
104
|
+
- A dataflow is not fork-safe after `reset_state` is called (because this will violate the guarantee).
|
|
103
105
|
There are a few other dataflows that are not fork-safe anytime, which will be mentioned in the docs.
|
|
104
|
-
|
|
106
|
+
- You should take the responsibility and follow the above guarantee if you're the caller of a dataflow yourself
|
|
105
107
|
(either when you're using dataflow outside tensorpack, or if you're writing a wrapper dataflow).
|
|
106
|
-
|
|
108
|
+
- Tensorpack's built-in forking dataflows (`MultiProcessRunner`, `MultiProcessMapData`, etc)
|
|
107
109
|
and other component that uses dataflows (`InputSource`)
|
|
108
110
|
already take care of the responsibility of calling this method.
|
|
109
111
|
"""
|
|
@@ -131,15 +133,31 @@ class ProxyDataFlow(DataFlow):
|
|
|
131
133
|
|
|
132
134
|
def __init__(self, df: DataFlow) -> None:
|
|
133
135
|
"""
|
|
134
|
-
|
|
136
|
+
Initializes the ProxyDataFlow.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
df: DataFlow to proxy.
|
|
135
140
|
"""
|
|
136
141
|
self.df = df
|
|
137
142
|
|
|
138
143
|
def reset_state(self) -> None:
|
|
144
|
+
"""Resets the state of the proxied DataFlow."""
|
|
139
145
|
self.df.reset_state()
|
|
140
146
|
|
|
141
147
|
def __len__(self) -> int:
|
|
148
|
+
"""
|
|
149
|
+
Returns the size of the proxied DataFlow.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
int: Size of the proxied DataFlow.
|
|
153
|
+
"""
|
|
142
154
|
return self.df.__len__()
|
|
143
155
|
|
|
144
156
|
def __iter__(self) -> Iterator[Any]:
|
|
157
|
+
"""
|
|
158
|
+
Iterates over the proxied DataFlow.
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
Iterator[Any]: Iterator of the proxied DataFlow.
|
|
162
|
+
"""
|
|
145
163
|
return self.df.__iter__()
|