deepdoctection 0.42.1__py3-none-any.whl → 0.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (124) hide show
  1. deepdoctection/__init__.py +2 -1
  2. deepdoctection/analyzer/__init__.py +2 -1
  3. deepdoctection/analyzer/config.py +904 -0
  4. deepdoctection/analyzer/dd.py +36 -62
  5. deepdoctection/analyzer/factory.py +311 -141
  6. deepdoctection/configs/conf_dd_one.yaml +100 -44
  7. deepdoctection/configs/profiles.jsonl +32 -0
  8. deepdoctection/dataflow/__init__.py +9 -6
  9. deepdoctection/dataflow/base.py +33 -15
  10. deepdoctection/dataflow/common.py +96 -75
  11. deepdoctection/dataflow/custom.py +36 -29
  12. deepdoctection/dataflow/custom_serialize.py +135 -91
  13. deepdoctection/dataflow/parallel_map.py +33 -31
  14. deepdoctection/dataflow/serialize.py +15 -10
  15. deepdoctection/dataflow/stats.py +41 -28
  16. deepdoctection/datapoint/__init__.py +4 -6
  17. deepdoctection/datapoint/annotation.py +104 -66
  18. deepdoctection/datapoint/box.py +190 -130
  19. deepdoctection/datapoint/convert.py +66 -39
  20. deepdoctection/datapoint/image.py +151 -95
  21. deepdoctection/datapoint/view.py +383 -236
  22. deepdoctection/datasets/__init__.py +2 -6
  23. deepdoctection/datasets/adapter.py +11 -11
  24. deepdoctection/datasets/base.py +118 -81
  25. deepdoctection/datasets/dataflow_builder.py +18 -12
  26. deepdoctection/datasets/info.py +76 -57
  27. deepdoctection/datasets/instances/__init__.py +6 -2
  28. deepdoctection/datasets/instances/doclaynet.py +17 -14
  29. deepdoctection/datasets/instances/fintabnet.py +16 -22
  30. deepdoctection/datasets/instances/funsd.py +11 -6
  31. deepdoctection/datasets/instances/iiitar13k.py +9 -9
  32. deepdoctection/datasets/instances/layouttest.py +9 -9
  33. deepdoctection/datasets/instances/publaynet.py +9 -9
  34. deepdoctection/datasets/instances/pubtables1m.py +13 -13
  35. deepdoctection/datasets/instances/pubtabnet.py +13 -15
  36. deepdoctection/datasets/instances/rvlcdip.py +8 -8
  37. deepdoctection/datasets/instances/xfund.py +11 -9
  38. deepdoctection/datasets/registry.py +18 -11
  39. deepdoctection/datasets/save.py +12 -11
  40. deepdoctection/eval/__init__.py +3 -2
  41. deepdoctection/eval/accmetric.py +72 -52
  42. deepdoctection/eval/base.py +29 -10
  43. deepdoctection/eval/cocometric.py +14 -12
  44. deepdoctection/eval/eval.py +56 -41
  45. deepdoctection/eval/registry.py +6 -3
  46. deepdoctection/eval/tedsmetric.py +24 -9
  47. deepdoctection/eval/tp_eval_callback.py +13 -12
  48. deepdoctection/extern/__init__.py +1 -1
  49. deepdoctection/extern/base.py +176 -97
  50. deepdoctection/extern/d2detect.py +127 -92
  51. deepdoctection/extern/deskew.py +19 -10
  52. deepdoctection/extern/doctrocr.py +157 -106
  53. deepdoctection/extern/fastlang.py +25 -17
  54. deepdoctection/extern/hfdetr.py +137 -60
  55. deepdoctection/extern/hflayoutlm.py +329 -248
  56. deepdoctection/extern/hflm.py +67 -33
  57. deepdoctection/extern/model.py +108 -762
  58. deepdoctection/extern/pdftext.py +37 -12
  59. deepdoctection/extern/pt/nms.py +15 -1
  60. deepdoctection/extern/pt/ptutils.py +13 -9
  61. deepdoctection/extern/tessocr.py +87 -54
  62. deepdoctection/extern/texocr.py +29 -14
  63. deepdoctection/extern/tp/tfutils.py +36 -8
  64. deepdoctection/extern/tp/tpcompat.py +54 -16
  65. deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
  66. deepdoctection/extern/tpdetect.py +4 -2
  67. deepdoctection/mapper/__init__.py +1 -1
  68. deepdoctection/mapper/cats.py +117 -76
  69. deepdoctection/mapper/cocostruct.py +35 -17
  70. deepdoctection/mapper/d2struct.py +56 -29
  71. deepdoctection/mapper/hfstruct.py +32 -19
  72. deepdoctection/mapper/laylmstruct.py +221 -185
  73. deepdoctection/mapper/maputils.py +71 -35
  74. deepdoctection/mapper/match.py +76 -62
  75. deepdoctection/mapper/misc.py +68 -44
  76. deepdoctection/mapper/pascalstruct.py +13 -12
  77. deepdoctection/mapper/prodigystruct.py +33 -19
  78. deepdoctection/mapper/pubstruct.py +42 -32
  79. deepdoctection/mapper/tpstruct.py +39 -19
  80. deepdoctection/mapper/xfundstruct.py +20 -13
  81. deepdoctection/pipe/__init__.py +1 -2
  82. deepdoctection/pipe/anngen.py +104 -62
  83. deepdoctection/pipe/base.py +226 -107
  84. deepdoctection/pipe/common.py +206 -123
  85. deepdoctection/pipe/concurrency.py +74 -47
  86. deepdoctection/pipe/doctectionpipe.py +108 -47
  87. deepdoctection/pipe/language.py +41 -24
  88. deepdoctection/pipe/layout.py +45 -18
  89. deepdoctection/pipe/lm.py +146 -78
  90. deepdoctection/pipe/order.py +196 -113
  91. deepdoctection/pipe/refine.py +111 -63
  92. deepdoctection/pipe/registry.py +1 -1
  93. deepdoctection/pipe/segment.py +213 -142
  94. deepdoctection/pipe/sub_layout.py +76 -46
  95. deepdoctection/pipe/text.py +52 -33
  96. deepdoctection/pipe/transform.py +8 -6
  97. deepdoctection/train/d2_frcnn_train.py +87 -69
  98. deepdoctection/train/hf_detr_train.py +72 -40
  99. deepdoctection/train/hf_layoutlm_train.py +85 -46
  100. deepdoctection/train/tp_frcnn_train.py +56 -28
  101. deepdoctection/utils/concurrency.py +59 -16
  102. deepdoctection/utils/context.py +40 -19
  103. deepdoctection/utils/develop.py +25 -17
  104. deepdoctection/utils/env_info.py +85 -36
  105. deepdoctection/utils/error.py +16 -10
  106. deepdoctection/utils/file_utils.py +246 -62
  107. deepdoctection/utils/fs.py +162 -43
  108. deepdoctection/utils/identifier.py +29 -16
  109. deepdoctection/utils/logger.py +49 -32
  110. deepdoctection/utils/metacfg.py +83 -21
  111. deepdoctection/utils/pdf_utils.py +119 -62
  112. deepdoctection/utils/settings.py +24 -10
  113. deepdoctection/utils/tqdm.py +10 -5
  114. deepdoctection/utils/transform.py +182 -46
  115. deepdoctection/utils/utils.py +61 -28
  116. deepdoctection/utils/viz.py +150 -104
  117. deepdoctection-0.43.dist-info/METADATA +376 -0
  118. deepdoctection-0.43.dist-info/RECORD +149 -0
  119. deepdoctection/analyzer/_config.py +0 -146
  120. deepdoctection-0.42.1.dist-info/METADATA +0 -431
  121. deepdoctection-0.42.1.dist-info/RECORD +0 -148
  122. {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/WHEEL +0 -0
  123. {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
  124. {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0
@@ -1,18 +1,69 @@
1
1
  DEVICE: null
2
2
  LANGUAGE: null
3
3
  LAYOUT_LINK:
4
- CHILD_CATEGORIES: []
5
- PARENTAL_CATEGORIES: []
4
+ CHILD_CATEGORIES:
5
+ - caption
6
+ PARENTAL_CATEGORIES:
7
+ - figure
8
+ - table
6
9
  LAYOUT_NMS_PAIRS:
7
- COMBINATIONS: null
8
- PRIORITY: null
9
- THRESHOLDS: null
10
+ COMBINATIONS:
11
+ - - table
12
+ - title
13
+ - - table
14
+ - text
15
+ - - table
16
+ - key_value_area
17
+ - - table
18
+ - list_item
19
+ - - table
20
+ - list
21
+ - - table
22
+ - figure
23
+ - - title
24
+ - text
25
+ - - text
26
+ - key_value_area
27
+ - - text
28
+ - list_item
29
+ - - text
30
+ - caption
31
+ - - key_value_area
32
+ - list_item
33
+ - - figure
34
+ - caption
35
+ PRIORITY:
36
+ - table
37
+ - table
38
+ - table
39
+ - table
40
+ - table
41
+ - table
42
+ - text
43
+ - text
44
+ - null
45
+ - caption
46
+ - key_value_area
47
+ - figure
48
+ THRESHOLDS:
49
+ - 0.001
50
+ - 0.01
51
+ - 0.01
52
+ - 0.001
53
+ - 0.01
54
+ - 0.01
55
+ - 0.05
56
+ - 0.01
57
+ - 0.01
58
+ - 0.01
59
+ - 0.01
60
+ - 0.001
10
61
  LIB: null
11
62
  OCR:
12
63
  CONFIG:
13
64
  TESSERACT: dd/conf_tesseract.yaml
14
- USE_DOCTR: false
15
- USE_TESSERACT: true
65
+ USE_DOCTR: true
66
+ USE_TESSERACT: false
16
67
  USE_TEXTRACT: false
17
68
  WEIGHTS:
18
69
  DOCTR_RECOGNITION:
@@ -27,29 +78,41 @@ PDF_MINER:
27
78
  PT:
28
79
  CELL:
29
80
  FILTER: null
81
+ PAD:
82
+ BOTTOM: 60
83
+ LEFT: 60
84
+ RIGHT: 60
85
+ TOP: 60
86
+ PADDING: false
30
87
  WEIGHTS: cell/d2_model_1849999_cell_inf_only.pt
31
88
  WEIGHTS_TS: cell/d2_model_1849999_cell_inf_only.ts
89
+ ENFORCE_WEIGHTS:
90
+ CELL: true
91
+ ITEM: true
92
+ LAYOUT: true
32
93
  ITEM:
33
- FILTER: null
94
+ FILTER:
95
+ - table
34
96
  PAD:
35
97
  BOTTOM: 60
36
98
  LEFT: 60
37
99
  RIGHT: 60
38
100
  TOP: 60
39
- WEIGHTS: item/d2_model_1639999_item_inf_only.pt
101
+ PADDING: false
102
+ WEIGHTS: deepdoctection/tatr_tab_struct_v2/pytorch_model.bin
40
103
  WEIGHTS_TS: item/d2_model_1639999_item_inf_only.ts
41
104
  LAYOUT:
42
105
  FILTER: null
43
106
  PAD:
44
- BOTTOM: 60
45
- LEFT: 60
46
- RIGHT: 60
47
- TOP: 60
48
- WEIGHTS: layout/d2_model_0829999_layout_inf_only.pt
107
+ BOTTOM: 0
108
+ LEFT: 0
109
+ RIGHT: 0
110
+ TOP: 0
111
+ PADDING: false
112
+ WEIGHTS: Aryn/deformable-detr-DocLayNet/model.safetensors
49
113
  WEIGHTS_TS: layout/d2_model_0829999_layout_inf_only.ts
50
114
  SEGMENTATION:
51
115
  ASSIGNMENT_RULE: ioa
52
- CELL_CATEGORY_ID: 12
53
116
  CELL_NAMES:
54
117
  - header
55
118
  - body
@@ -59,30 +122,24 @@ SEGMENTATION:
59
122
  - row
60
123
  - column
61
124
  PUBTABLES_CELL_NAMES:
62
- - spanning
63
- - row_header
125
+ - cell
126
+ PUBTABLES_ITEM_HEADER_CELL_NAMES:
64
127
  - column_header
128
+ - row_header
65
129
  - projected_row_header
66
- - cell
130
+ PUBTABLES_ITEM_HEADER_THRESHOLDS:
131
+ - 0.6
132
+ - 0.0001
67
133
  PUBTABLES_ITEM_NAMES:
68
134
  - row
69
135
  - column
70
136
  PUBTABLES_SPANNING_CELL_NAMES:
71
137
  - spanning
72
- - row_header
73
- - column_header
74
- - projected_row_header
75
138
  PUBTABLES_SUB_ITEM_NAMES:
76
139
  - row_number
77
140
  - column_number
78
- PUBTABLES_ITEM_HEADER_CELL_NAMES:
79
- - column_header
80
- - row_header
81
- PUBTABLES_ITEM_HEADER_THRESHOLDS:
82
- - 0.6
83
- - 0.0001
84
- REMOVE_IOU_THRESHOLD_COLS: 0.001
85
- REMOVE_IOU_THRESHOLD_ROWS: 0.001
141
+ REMOVE_IOU_THRESHOLD_COLS: 0.2
142
+ REMOVE_IOU_THRESHOLD_ROWS: 0.2
86
143
  STRETCH_RULE: equal
87
144
  SUB_ITEM_NAMES:
88
145
  - row_number
@@ -96,22 +153,29 @@ TEXT_ORDERING:
96
153
  FLOATING_TEXT_BLOCK_CATEGORIES:
97
154
  - text
98
155
  - title
99
- - figure
100
156
  - list
157
+ - key_value_area
101
158
  HEIGHT_TOLERANCE: 2.0
102
- INCLUDE_RESIDUAL_TEXT_CONTAINER: false
159
+ INCLUDE_RESIDUAL_TEXT_CONTAINER: true
103
160
  PARAGRAPH_BREAK: 0.035
104
161
  STARTING_POINT_TOLERANCE: 0.005
105
162
  TEXT_BLOCK_CATEGORIES:
106
163
  - text
107
164
  - title
165
+ - list_item
108
166
  - list
109
- - cell
167
+ - caption
168
+ - page_header
169
+ - page_footer
170
+ - page_number
171
+ - mark
172
+ - key_value_area
110
173
  - figure
111
174
  - column_header
112
175
  - projected_row_header
113
176
  - spanning
114
177
  - row_header
178
+ - cell
115
179
  TF:
116
180
  CELL:
117
181
  FILTER: null
@@ -124,22 +188,14 @@ TF:
124
188
  WEIGHTS: layout/model-800000_inf_only.data-00000-of-00001
125
189
  USE_LAYOUT: true
126
190
  USE_LAYOUT_LINK: false
127
- USE_LAYOUT_NMS: false
191
+ USE_LAYOUT_NMS: true
192
+ USE_LINE_MATCHER: false
128
193
  USE_OCR: true
129
194
  USE_PDF_MINER: false
130
195
  USE_ROTATOR: false
131
- USE_TABLE_REFINEMENT: true
196
+ USE_TABLE_REFINEMENT: false
132
197
  USE_TABLE_SEGMENTATION: true
133
198
  WORD_MATCHING:
134
199
  MAX_PARENT_ONLY: true
135
- PARENTAL_CATEGORIES:
136
- - text
137
- - title
138
- - list
139
- - cell
140
- - column_header
141
- - projected_row_header
142
- - spanning
143
- - row_header
144
200
  RULE: ioa
145
- THRESHOLD: 0.6
201
+ THRESHOLD: 0.3
@@ -0,0 +1,32 @@
1
+ {"name": "layout/model-800000_inf_only.data-00000-of-00001", "description": "Tensorpack layout model for inference purposes trained on Publaynet", "size": [274552244, 7907], "tp_model": true, "config": "dd/tp/conf_frcnn_layout.yaml", "preprocessor_config": null, "hf_repo_id": "deepdoctection/tp_casc_rcnn_X_32xd4_50_FPN_GN_2FC_publaynet_inference_only", "hf_model_name": "model-800000_inf_only", "hf_config_file": ["conf_frcnn_layout.yaml"], "urls": null, "categories": {"1": "text", "2": "title", "3": "list", "4": "table", "5": "figure"}, "categories_orig": null, "dl_library": "TF", "model_wrapper": "TPFrcnnDetector", "architecture": null, "padding": null}
2
+ {"name": "cell/model-1800000_inf_only.data-00000-of-00001", "description": "Tensorpack cell detection model for inference purposes trained on Pubtabnet", "size": [274503056, 8056], "tp_model": true, "config": "dd/tp/conf_frcnn_cell.yaml", "preprocessor_config": null, "hf_repo_id": "deepdoctection/tp_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c_inference_only", "hf_model_name": "model-1800000_inf_only", "hf_config_file": ["conf_frcnn_cell.yaml"], "urls": null, "categories": {"1": "cell"}, "categories_orig": null, "dl_library": "TF", "model_wrapper": "TPFrcnnDetector", "architecture": null, "padding": null}
3
+ {"name": "item/model-1620000_inf_only.data-00000-of-00001", "description": "Tensorpack row/column detection model for inference purposes trained on Pubtabnet", "size": [274515344, 7904], "tp_model": true, "config": "dd/tp/conf_frcnn_rows.yaml", "preprocessor_config": null, "hf_repo_id": "deepdoctection/tp_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc_inference_only", "hf_model_name": "model-1620000_inf_only", "hf_config_file": ["conf_frcnn_rows.yaml"], "urls": null, "categories": {"1": "row", "2": "column"}, "categories_orig": null, "dl_library": "TF", "model_wrapper": "TPFrcnnDetector", "architecture": null, "padding": null}
4
+ {"name": "layout/d2_model_0829999_layout_inf_only.pt", "description": "Detectron2 layout detection model trained on Publaynet", "size": [274632215], "tp_model": false, "config": "dd/d2/layout/CASCADE_RCNN_R_50_FPN_GN.yaml", "preprocessor_config": null, "hf_repo_id": "deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_publaynet_inference_only", "hf_model_name": "d2_model_0829999_layout_inf_only.pt", "hf_config_file": ["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"], "urls": null, "categories": {"1": "text", "2": "title", "3": "list", "4": "table", "5": "figure"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "D2FrcnnDetector", "architecture": null, "padding": null}
5
+ {"name": "layout/d2_model_0829999_layout_inf_only.ts", "description": "Detectron2 layout detection model trained on Publaynet. Torchscript export", "size": [274974842], "tp_model": false, "config": "dd/d2/layout/CASCADE_RCNN_R_50_FPN_GN_TS.yaml", "preprocessor_config": null, "hf_repo_id": "deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_publaynet_inference_only", "hf_model_name": "d2_model_0829999_layout_inf_only.ts", "hf_config_file": ["CASCADE_RCNN_R_50_FPN_GN_TS.yaml"], "urls": null, "categories": {"1": "text", "2": "title", "3": "list", "4": "table", "5": "figure"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "D2FrcnnTracingDetector", "architecture": null, "padding": null}
6
+ {"name": "cell/d2_model_1849999_cell_inf_only.pt", "description": "Detectron2 cell detection inference only model trained on Pubtabnet", "size": [274583063], "tp_model": false, "config": "dd/d2/cell/CASCADE_RCNN_R_50_FPN_GN.yaml", "preprocessor_config": null, "hf_repo_id": "deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c_inference_only", "hf_model_name": "d2_model_1849999_cell_inf_only.pt", "hf_config_file": ["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"], "urls": null, "categories": {"1": "cell"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "D2FrcnnDetector", "architecture": null, "padding": null}
7
+ {"name": "cell/d2_model_1849999_cell_inf_only.ts", "description": "Detectron2 cell detection inference only model trained on Pubtabnet. Torchscript export", "size": [274898682], "tp_model": false, "config": "dd/d2/cell/CASCADE_RCNN_R_50_FPN_GN_TS.yaml", "preprocessor_config": null, "hf_repo_id": "deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c_inference_only", "hf_model_name": "d2_model_1849999_cell_inf_only.ts", "hf_config_file": ["CASCADE_RCNN_R_50_FPN_GN_TS.yaml"], "urls": null, "categories": {"1": "cell"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "D2FrcnnTracingDetector", "architecture": null, "padding": null}
8
+ {"name": "item/d2_model_1639999_item_inf_only.pt", "description": "Detectron2 item detection model inference only trained on Pubtabnet", "size": [274595351], "tp_model": false, "config": "dd/d2/item/CASCADE_RCNN_R_50_FPN_GN.yaml", "preprocessor_config": null, "hf_repo_id": "deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc_inference_only", "hf_model_name": "d2_model_1639999_item_inf_only.pt", "hf_config_file": ["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"], "urls": null, "categories": {"1": "row", "2": "column"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "D2FrcnnDetector", "architecture": null, "padding": null}
9
+ {"name": "item/d2_model_1639999_item_inf_only.ts", "description": "Detectron2 cell detection inference only model trained on Pubtabnet. Torchscript export", "size": [274910970], "tp_model": false, "config": "dd/d2/item/CASCADE_RCNN_R_50_FPN_GN_TS.yaml", "preprocessor_config": null, "hf_repo_id": "deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc_inference_only", "hf_model_name": "d2_model_1639999_item_inf_only.ts", "hf_config_file": ["CASCADE_RCNN_R_50_FPN_GN_TS.yaml"], "urls": null, "categories": {"1": "row", "2": "column"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "D2FrcnnTracingDetector", "architecture": null, "padding": null}
10
+ {"name": "nielsr/lilt-xlm-roberta-base/pytorch_model.bin", "description": "LiLT build with a RobertaXLM base model", "size": [1136743583], "tp_model": false, "config": "nielsr/lilt-xlm-roberta-base/config.json", "preprocessor_config": null, "hf_repo_id": "nielsr/lilt-xlm-roberta-base", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json"], "urls": null, "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": null, "architecture": null, "padding": null}
11
+ {"name": "SCUT-DLVCLab/lilt-infoxlm-base/pytorch_model.bin", "description": "Language-Independent Layout Transformer - InfoXLM model by stitching a pre-trained InfoXLM and a pre-trained Language-Independent Layout Transformer (LiLT) together. It was introduced in the paper LiLT: A Simple yet Effective Language-Independent Layout Transformer for Structured Document Understanding by Wang et al. and first released in this repository.", "size": [1136743583], "tp_model": false, "config": "SCUT-DLVCLab/lilt-infoxlm-base/config.json", "preprocessor_config": null, "hf_repo_id": "SCUT-DLVCLab/lilt-infoxlm-base", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json"], "urls": null, "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": null, "architecture": null, "padding": null}
12
+ {"name": "SCUT-DLVCLab/lilt-roberta-en-base/pytorch_model.bin", "description": "Language-Independent Layout Transformer - RoBERTa model by stitching a pre-trained RoBERTa (English) and a pre-trained Language-Independent Layout Transformer (LiLT) together. It was introduced in the paper LiLT: A Simple yet Effective Language-Independent Layout Transformer for Structured Document Understanding by Wang et al. and first released in this repository.", "size": [523151519], "tp_model": false, "config": "SCUT-DLVCLab/lilt-roberta-en-base/config.json", "preprocessor_config": null, "hf_repo_id": "SCUT-DLVCLab/lilt-roberta-en-base", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json"], "urls": null, "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": null, "architecture": null, "padding": null}
13
+ {"name": "microsoft/layoutlm-base-uncased/pytorch_model.bin", "description": "LayoutLM is a simple but effective pre-training method of text and layout for document image understanding and information extraction tasks, such as form understanding and receipt understanding. LayoutLM archived the SOTA results on multiple datasets. This model does notcontain any head and has to be fine tuned on a downstream task. This is model has been trained on 11M documents for 2 epochs. Configuration: 12-layer, 768-hidden, 12-heads, 113M parameters", "size": [453093832], "tp_model": false, "config": "microsoft/layoutlm-base-uncased/config.json", "preprocessor_config": null, "hf_repo_id": "microsoft/layoutlm-base-uncased", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json"], "urls": null, "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": null, "architecture": null, "padding": null}
14
+ {"name": "microsoft/layoutlm-large-uncased/pytorch_model.bin", "description": "LayoutLM is a simple but effective pre-training method of text and layout for document image understanding and information extraction tasks, such as form understanding and receipt understanding. LayoutLM archived the SOTA results on multiple datasets. This model does notcontain any head and has to be fine tuned on a downstream task. This is model has been trained on 11M documents for 2 epochs. Configuration: 24-layer, 1024-hidden, 16-heads, 343M parameters", "size": [1361845448], "tp_model": false, "config": "microsoft/layoutlm-large-uncased/config.json", "preprocessor_config": null, "hf_repo_id": "microsoft/layoutlm-large-uncased", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json"], "urls": null, "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": null, "architecture": null, "padding": null}
15
+ {"name": "microsoft/layoutlmv2-base-uncased/pytorch_model.bin", "description": "LayoutLMv2 is an improved version of LayoutLM with new pre-training tasks to model the interaction among text, layout, and image in a single multi-modal framework. It outperforms strong baselines and achieves new state-of-the-art results on a wide variety of downstream visually-rich document understanding tasks, including , including FUNSD (0.7895 → 0.8420), CORD (0.9493 → 0.9601), SROIE (0.9524 → 0.9781), Kleister-NDA (0.834 → 0.852), RVL-CDIP (0.9443 → 0.9564), and DocVQA (0.7295 → 0.8672). The license is cc-by-nc-sa-4.0", "size": [802243295], "tp_model": false, "config": "microsoft/layoutlmv2-base-uncased/config.json", "preprocessor_config": null, "hf_repo_id": "microsoft/layoutlmv2-base-uncased", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json"], "urls": null, "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": null, "architecture": null, "padding": null}
16
+ {"name": "microsoft/layoutxlm-base/pytorch_model.bin", "description": "Multimodal pre-training with text, layout, and image has achieved SOTA performance for visually-rich document understanding tasks recently, which demonstrates the great potential for joint learning across different modalities. In this paper, we present LayoutXLM, a multimodal pre-trained model for multilingual document understanding, which aims to bridge the language barriers for visually-rich document understanding. To accurately evaluate LayoutXLM, we also introduce a multilingual form understanding benchmark dataset named XFUN, which includes form understanding samples in 7 languages (Chinese, Japanese, Spanish, French, Italian, German, Portuguese), and key-value pairs are manually labeled for each language. Experiment results show that the LayoutXLM model has significantly outperformed the existing SOTA cross-lingual pre-trained models on the XFUN dataset. The license is cc-by-nc-sa-4.0", "size": [1476537178], "tp_model": false, "config": "microsoft/layoutxlm-base/config.json", "preprocessor_config": null, "hf_repo_id": "microsoft/layoutxlm-base", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json"], "urls": null, "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": null, "architecture": null, "padding": null}
17
+ {"name": "microsoft/layoutlmv3-base/pytorch_model.bin", "description": "LayoutLMv3 is a pre-trained multimodal Transformer for Document AI with unified text and image masking. The simple unified architecture and training objectives make LayoutLMv3 a general-purpose pre-trained model. For example, LayoutLMv3 can be fine-tuned for both text-centric tasks, including form understanding, receipt understanding, and document visual question answering, and image-centric tasks such as document image classification and document layout analysis. The license is cc-by-nc-sa-4.0", "size": [501380823], "tp_model": false, "config": "microsoft/layoutlmv3-base/config.json", "preprocessor_config": null, "hf_repo_id": "microsoft/layoutlmv3-base", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json"], "urls": null, "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": null, "architecture": null, "padding": null}
18
+ {"name": "microsoft/table-transformer-detection/pytorch_model.bin", "description": "Table Transformer (DETR) model trained on PubTables1M. It was introduced in the paper PubTables-1M: Towards Comprehensive Table Extraction From Unstructured Documents by Smock et al. This model is devoted to table detection", "size": [115393245], "tp_model": false, "config": "microsoft/table-transformer-detection/config.json", "preprocessor_config": "microsoft/table-transformer-detection/preprocessor_config.json", "hf_repo_id": "microsoft/table-transformer-detection", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json", "preprocessor_config.json"], "urls": null, "categories": {"1": "table", "2": "table_rotated"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "HFDetrDerivedDetector", "architecture": null, "padding": null}
19
+ {"name": "microsoft/table-transformer-structure-recognition/pytorch_model.bin", "description": "Table Transformer (DETR) model trained on PubTables1M. It was introduced in the paper PubTables-1M: Towards Comprehensive Table Extraction From Unstructured Documents by Smock et al. This model is devoted to table structure recognition and assumes to receive a croppedtable as input. It will predict rows, column and spanning cells", "size": [115509981], "tp_model": false, "config": "microsoft/table-transformer-structure-recognition/config.json", "preprocessor_config": "microsoft/table-transformer-structure-recognition/preprocessor_config.json", "hf_repo_id": "microsoft/table-transformer-structure-recognition", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json", "preprocessor_config.json"], "urls": null, "categories": {"1": "table", "2": "column", "3": "row", "4": "column_header", "5": "projected_row_header", "6": "spanning"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "HFDetrDerivedDetector", "architecture": null, "padding": null}
20
+ {"name": "doctr/db_resnet50/pt/db_resnet50-ac60cadc.pt", "description": "Doctr implementation of DBNet from “Real-time Scene Text Detection with Differentiable Binarization”. For more information please check https://mindee.github.io/doctr/using_doctr/using_models.html#. This is the Pytorch artefact.", "size": [101971449], "tp_model": false, "config": null, "preprocessor_config": null, "hf_repo_id": null, "hf_model_name": null, "hf_config_file": null, "urls": ["https://doctr-static.mindee.com/models?id=v0.3.1/db_resnet50-ac60cadc.pt&src=0"], "categories": {"1": "word"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "DoctrTextlineDetector", "architecture": "db_resnet50", "padding": null}
21
+ {"name": "doctr/db_resnet50/tf/db_resnet50-adcafc63.zip", "description": "Doctr implementation of DBNet from “Real-time Scene Text Detection with Differentiable Binarization”. For more information please check https://mindee.github.io/doctr/using_doctr/using_models.html#. This is the Tensorflow artefact.", "size": [94178964], "tp_model": false, "config": null, "preprocessor_config": null, "hf_repo_id": null, "hf_model_name": null, "hf_config_file": null, "urls": ["https://doctr-static.mindee.com/models?id=v0.2.0/db_resnet50-adcafc63.zip&src=0"], "categories": {"1": "word"}, "categories_orig": null, "dl_library": "TF", "model_wrapper": "DoctrTextlineDetector", "architecture": "db_resnet50", "padding": null}
22
+ {"name": "doctr/crnn_vgg16_bn/pt/crnn_vgg16_bn-9762b0b0.pt", "description": "Doctr implementation of CRNN from “An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition”. For more information please check https://mindee.github.io/doctr/using_doctr/using_models.html#. This is the Pytorch artefact.", "size": [63286381], "tp_model": false, "config": null, "preprocessor_config": null, "hf_repo_id": null, "hf_model_name": null, "hf_config_file": null, "urls": ["https://doctr-static.mindee.com/models?id=v0.3.1/crnn_vgg16_bn-9762b0b0.pt&src=0"], "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "DoctrTextRecognizer", "architecture": "crnn_vgg16_bn", "padding": null}
23
+ {"name": "doctr/crnn_vgg16_bn/tf/crnn_vgg16_bn-76b7f2c6.zip", "description": "Doctr implementation of CRNN from “An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition”. For more information please check https://mindee.github.io/doctr/using_doctr/using_models.html#. This is the Tensorflow artefact.", "size": [58758994], "tp_model": false, "config": null, "preprocessor_config": null, "hf_repo_id": null, "hf_model_name": null, "hf_config_file": null, "urls": ["https://doctr-static.mindee.com/models?id=v0.3.0/crnn_vgg16_bn-76b7f2c6.zip&src=0"], "categories": {}, "categories_orig": null, "dl_library": "TF", "model_wrapper": "DoctrTextRecognizer", "architecture": "crnn_vgg16_bn", "padding": null}
24
+ {"name": "FacebookAI/xlm-roberta-base/pytorch_model.bin", "description": "XLM-RoBERTa model pre-trained on 2.5TB of filtered CommonCrawl data containing 100 languages. It was introduced in the paper Unsupervised Cross-lingual Representation Learning at Scale by Conneau et al. and first released in this repository.", "size": [1115590446], "tp_model": false, "config": "FacebookAI/xlm-roberta-base/config.json", "preprocessor_config": null, "hf_repo_id": "FacebookAI/xlm-roberta-base", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json"], "urls": null, "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": null, "architecture": null, "padding": null}
25
+ {"name": "fasttext/lid.176.bin", "description": "Fasttext language detection model", "size": [131266198], "tp_model": false, "config": null, "preprocessor_config": null, "hf_repo_id": null, "hf_model_name": null, "hf_config_file": null, "urls": ["https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin"], "categories": {"1": "eng", "2": "rus", "3": "deu", "4": "fre", "5": "ita", "6": "jpn", "7": "spa", "8": "ceb", "9": "tur", "10": "por", "11": "ukr", "12": "epo", "13": "pol", "14": "swe", "15": "dut", "16": "heb", "17": "chi", "18": "hun", "19": "ara", "20": "cat", "21": "fin", "22": "cze", "23": "per", "24": "srp", "25": "gre", "26": "vie", "27": "bul", "28": "kor", "29": "nor", "30": "mac", "31": "rum", "32": "ind", "33": "tha", "34": "arm", "35": "dan", "36": "tam", "37": "hin", "38": "hrv", "39": "nn", "40": "bel", "41": "geo", "42": "tel", "43": "kaz", "44": "war", "45": "lit", "46": "glg", "47": "slo", "48": "ben", "49": "baq", "50": "slv", "51": "nn", "52": "mal", "53": "mar", "54": "est", "55": "aze", "56": "nn", "57": "alb", "58": "lat", "59": "bos", "60": "nno", "61": "urd", "62": "nn", "63": "nn", "64": "nn", "65": "nn", "66": "nn", "67": "nn", "68": "nn", "69": "nn", "70": "nn", "71": "nn", "72": "nn", "73": "nn", "74": "nn", "75": "nn", "76": "nn", "77": "nn", "78": "nn", "79": "nn", "80": "nn", "81": "nn", "82": "nn", "83": "nn", "84": "nn", "85": "nn", "86": "nn", "87": "nn", "88": "nn", "89": "nn", "90": "nn", "91": "nn", "92": "nn", "93": "nn", "94": "nn", "95": "nn", "96": "nn", "97": "nn", "98": "nn", "99": "nn", "100": "nn", "101": "nn", "102": "nn", "103": "nn", "104": "nn", "105": "nn", "106": "nn", "107": "nn", "108": "nn", "109": "nn", "110": "nn", "111": "nn", "112": "nn", "113": "nn", "114": "nn", "115": "nn", "116": "nn", "117": "nn", "118": "nn", "119": "nn", "120": "nn", "121": "nn", "122": "nn", "123": "nn", "124": "nn", "125": "nn", "126": "nn", "127": "nn", "128": "nn", "129": "nn", "130": "nn", "131": "nn", "132": "nn", "133": "nn", "134": "nn", "135": "nn", "136": "nn", "137": "nn", "138": "nn", "139": "nn", "140": "nn", "141": "nn", "142": "nn", "143": "nn", "144": "nn", "145": "nn", "146": "nn", "147": "nn", "148": "nn", "149": "nn", "150": "nn", "151": "nn", "152": "nn", "153": "nn", "154": "nn", "155": "nn", "156": "nn", "157": "nn", "158": "nn", "159": "nn", "160": "nn", "161": "nn", "162": "nn", "163": "nn", "164": "nn", "165": "nn", "166": "nn", "167": "nn", "168": "nn", "169": "nn", "170": "nn", "171": "nn", "172": "nn", "173": "nn", "174": "nn", "175": "nn", "176": "nn"}, "categories_orig": {"__label__en": "eng", "__label__ru": "rus", "__label__de": "deu", "__label__fr": "fre", "__label__it": "ita", "__label__ja": "jpn", "__label__es": "spa", "__label__ceb": "ceb", "__label__tr": "tur", "__label__pt": "por", "__label__uk": "ukr", "__label__eo": "epo", "__label__pl": "pol", "__label__sv": "swe", "__label__nl": "dut", "__label__he": "heb", "__label__zh": "chi", "__label__hu": "hun", "__label__ar": "ara", "__label__ca": "cat", "__label__fi": "fin", "__label__cs": "cze", "__label__fa": "per", "__label__sr": "srp", "__label__el": "gre", "__label__vi": "vie", "__label__bg": "bul", "__label__ko": "kor", "__label__no": "nor", "__label__mk": "mac", "__label__ro": "rum", "__label__id": "ind", "__label__th": "tha", "__label__hy": "arm", "__label__da": "dan", "__label__ta": "tam", "__label__hi": "hin", "__label__hr": "hrv", "__label__sh": "nn", "__label__be": "bel", "__label__ka": "geo", "__label__te": "tel", "__label__kk": "kaz", "__label__war": "war", "__label__lt": "lit", "__label__gl": "glg", "__label__sk": "slo", "__label__bn": "ben", "__label__eu": "baq", "__label__sl": "slv", "__label__kn": "nn", "__label__ml": "mal", "__label__mr": "mar", "__label__et": "est", "__label__az": "aze", "__label__ms": "nn", "__label__sq": "alb", "__label__la": "lat", "__label__bs": "bos", "__label__nn": "nno", "__label__ur": "urd", "__label__lv": "nn", "__label__my": "nn", "__label__tt": "nn", "__label__af": "nn", "__label__oc": "nn", "__label__nds": "nn", "__label__ky": "nn", "__label__ast": "nn", "__label__tl": "nn", "__label__is": "nn", "__label__ia": "nn", "__label__si": "nn", "__label__gu": "nn", "__label__km": "nn", "__label__br": "nn", "__label__ba": "nn", "__label__uz": "nn", "__label__bo": "nn", "__label__pa": "nn", "__label__vo": "nn", "__label__als": "nn", "__label__ne": "nn", "__label__cy": "nn", "__label__jbo": "nn", "__label__fy": "nn", "__label__mn": "nn", "__label__lb": "nn", "__label__ce": "nn", "__label__ug": "nn", "__label__tg": "nn", "__label__sco": "nn", "__label__sa": "nn", "__label__cv": "nn", "__label__jv": "nn", "__label__min": "nn", "__label__io": "nn", "__label__or": "nn", "__label__as": "nn", "__label__new": "nn", "__label__ga": "nn", "__label__mg": "nn", "__label__an": "nn", "__label__ckb": "nn", "__label__sw": "nn", "__label__bar": "nn", "__label__lmo": "nn", "__label__yi": "nn", "__label__arz": "nn", "__label__mhr": "nn", "__label__azb": "nn", "__label__sah": "nn", "__label__pnb": "nn", "__label__su": "nn", "__label__bpy": "nn", "__label__pms": "nn", "__label__ilo": "nn", "__label__wuu": "nn", "__label__ku": "nn", "__label__ps": "nn", "__label__ie": "nn", "__label__xmf": "nn", "__label__yue": "nn", "__label__gom": "nn", "__label__li": "nn", "__label__mwl": "nn", "__label__kw": "nn", "__label__sd": "nn", "__label__hsb": "nn", "__label__scn": "nn", "__label__gd": "nn", "__label__pam": "nn", "__label__bh": "nn", "__label__mai": "nn", "__label__vec": "nn", "__label__mt": "nn", "__label__dv": "nn", "__label__wa": "nn", "__label__mzn": "nn", "__label__am": "nn", "__label__qu": "nn", "__label__eml": "nn", "__label__cbk": "nn", "__label__tk": "nn", "__label__rm": "nn", "__label__os": "nn", "__label__vls": "nn", "__label__yo": "nn", "__label__lo": "nn", "__label__lez": "nn", "__label__so": "nn", "__label__myv": "nn", "__label__diq": "nn", "__label__mrj": "nn", "__label__dsb": "nn", "__label__frr": "nn", "__label__ht": "nn", "__label__gn": "nn", "__label__bxr": "nn", "__label__kv": "nn", "__label__sc": "nn", "__label__nah": "nn", "__label__krc": "nn", "__label__bcl": "nn", "__label__nap": "nn", "__label__gv": "nn", "__label__av": "nn", "__label__rue": "nn", "__label__xal": "nn", "__label__pfl": "nn", "__label__dty": "nn", "__label__hif": "nn", "__label__co": "nn", "__label__lrc": "nn", "__label__vep": "nn", "__label__tyv": "nn"}, "dl_library": null, "model_wrapper": "FasttextLangDetector", "architecture": null, "padding": null}
26
+ {"name": "deepdoctection/tatr_tab_struct_v2/pytorch_model.bin", "description": "Table Transformer (DETR) model trained on PubTables1M. It was introduced in the paper Aligning benchmark datasets for table structure recognition by Smock et al. This model is devoted to table structure recognition and assumes to receive a slightly croppedtable as input. It will predict rows, column and spanning cells. Use a padding of around 5 pixels", "size": [115511753], "tp_model": false, "config": "deepdoctection/tatr_tab_struct_v2/config.json", "preprocessor_config": "deepdoctection/tatr_tab_struct_v2/preprocessor_config.json", "hf_repo_id": "deepdoctection/tatr_tab_struct_v2", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json", "preprocessor_config.json"], "urls": null, "categories": {"1": "table", "2": "column", "3": "row", "4": "column_header", "5": "projected_row_header", "6": "spanning"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "HFDetrDerivedDetector", "architecture": null, "padding": null}
27
+ {"name": "layout/d2_model_0829999_layout.pth", "description": "Detectron2 layout detection model trained on Publaynet. Checkpoint for resuming training", "size": [548377327], "tp_model": false, "config": "dd/d2/layout/CASCADE_RCNN_R_50_FPN_GN.yaml", "preprocessor_config": null, "hf_repo_id": "deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_publaynet_inference_only", "hf_model_name": "d2_model_0829999_layout.pth", "hf_config_file": ["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"], "urls": null, "categories": {"1": "text", "2": "title", "3": "list", "4": "table", "5": "figure"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "D2FrcnnDetector", "architecture": null, "padding": null}
28
+ {"name": "cell/d2_model_1849999_cell.pth", "description": "Detectron2 cell detection inference only model trained on Pubtabnet", "size": [548279023], "tp_model": false, "config": "dd/d2/cell/CASCADE_RCNN_R_50_FPN_GN.yaml", "preprocessor_config": null, "hf_repo_id": "deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_c_inference_only", "hf_model_name": "cell/d2_model_1849999_cell.pth", "hf_config_file": ["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"], "urls": null, "categories": {"1": "cell"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "D2FrcnnDetector", "architecture": null, "padding": null}
29
+ {"name": "item/d2_model_1639999_item.pth", "description": "Detectron2 item detection model trained on Pubtabnet", "size": [548303599], "tp_model": false, "config": "dd/d2/item/CASCADE_RCNN_R_50_FPN_GN.yaml", "preprocessor_config": null, "hf_repo_id": "deepdoctection/d2_casc_rcnn_X_32xd4_50_FPN_GN_2FC_pubtabnet_rc_inference_only", "hf_model_name": "d2_model_1639999_item.pth", "hf_config_file": ["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"], "urls": null, "categories": {"1": "row", "2": "column"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "D2FrcnnDetector", "architecture": null, "padding": null}
30
+ {"name": "Felix92/doctr-torch-parseq-multilingual-v1/pytorch_model.bin", "description": "", "size": [63286381], "tp_model": false, "config": "Felix92/doctr-torch-parseq-multilingual-v1/config.json", "preprocessor_config": null, "hf_repo_id": "Felix92/doctr-torch-parseq-multilingual-v1", "hf_model_name": "pytorch_model.bin", "hf_config_file": ["config.json"], "urls": null, "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "DoctrTextRecognizer", "architecture": "parseq", "padding": null}
31
+ {"name": "doctr/crnn_vgg16_bn/pt/master-fde31e4a.pt", "description": "MASTER", "size": [63286381], "tp_model": false, "config": null, "preprocessor_config": null, "hf_repo_id": null, "hf_model_name": null, "hf_config_file": null, "urls": ["https://doctr-static.mindee.com/models?id=v0.7.0/master-fde31e4a.pt&src=0"], "categories": {}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "DoctrTextRecognizer", "architecture": "master", "padding": null}
32
+ {"name": "Aryn/deformable-detr-DocLayNet/model.safetensors", "description": "Deformable DEtection TRansformer (DETR), trained on DocLayNet (including 80k annotated pages in 11 classes).", "size": [115511753], "tp_model": false, "config": "Aryn/deformable-detr-DocLayNet/config.json", "preprocessor_config": "Aryn/deformable-detr-DocLayNet/preprocessor_config.json", "hf_repo_id": "Aryn/deformable-detr-DocLayNet", "hf_model_name": "model.safetensors", "hf_config_file": ["config.json", "preprocessor_config.json"], "urls": null, "categories": {"1": "default_type", "2": "caption", "11": "text", "12": "title", "3": "footnote", "4": "formula", "5": "list_item", "6": "page_footer", "7": "page_header", "8": "figure", "9": "section_header", "10": "table"}, "categories_orig": null, "dl_library": "PT", "model_wrapper": "HFDetrDerivedDetector", "architecture": null, "padding": null}
@@ -2,14 +2,17 @@
2
2
  # File: __init__.py
3
3
 
4
4
  """
5
- Dataflow is a package for loading and processing data in both training and prediction environments. Dataflow
6
- is essentially pure Python and, with a simple API, it contains a variety of methods for parallelling complex
7
- transformations. Due to the fact that DataFlow is not available through pypi we have integrated the most important
8
- DataFlow classes into deepdoctection in order to avoid installing the package separately from source.
5
+ # Dataflows
9
6
 
10
- Further information (including several tutorials about performance) can be found in the excellent documentation:
7
+ Info:
8
+ Dataflow is a package for loading and processing data in both training and prediction environments. Dataflow
9
+ is essentially pure Python and, with a simple API, it contains a variety of methods for parallelling complex
10
+ transformations. We have integrated the most important DataFlow classes into deepdoctection in order to avoid
11
+ installing the package separately from source.
11
12
 
12
- <https://tensorpack.readthedocs.io/en/latest/tutorial/dataflow.html>
13
+ Further information (including several tutorials about performance) can be found in the excellent documentation:
14
+
15
+ <https://tensorpack.readthedocs.io/en/latest/tutorial/dataflow.html>
13
16
  """
14
17
 
15
18
 
@@ -5,7 +5,7 @@
5
5
  # Licensed under the Apache License, Version 2.0 (the "License")
6
6
 
7
7
  """
8
- This file replaces relevant parts of the Dataflow package. Most of the code has been taken from
8
+ Most of the code has been taken from
9
9
 
10
10
  <https://github.com/tensorpack/dataflow/blob/master/dataflow/dataflow/base.py>
11
11
  """
@@ -44,12 +44,12 @@ class DataFlow:
44
44
  @abstractmethod
45
45
  def __iter__(self) -> Iterator[Any]:
46
46
  """
47
- * A dataflow is an iterable. The `__iter__` method should yield a list or dict each time.
47
+ - A dataflow is an iterable. The `__iter__` method should yield a list or dict each time.
48
48
  Note that dict is **partially** supported at the moment: certain dataflow does not support dict.
49
- * The `__iter__` method can be either finite (will stop iteration) or infinite
49
+ - The `__iter__` method can be either finite (will stop iteration) or infinite
50
50
  (will not stop iteration). For a finite dataflow, `__iter__` can be called
51
51
  again immediately after the previous call returned.
52
- * For many dataflow, the `__iter__` method is non-reentrant, which means for a dataflow
52
+ - For many dataflow, the `__iter__` method is non-reentrant, which means for a dataflow
53
53
  instance ``df``, `df.__iter__` cannot be called before the previous
54
54
  `df.__iter__` call has finished (iteration has stopped).
55
55
  When a dataflow is non-reentrant, `df.__iter__` should throw an exception if
@@ -62,18 +62,18 @@ class DataFlow:
62
62
 
63
63
  def __len__(self) -> int:
64
64
  """
65
- * A dataflow can optionally implement `__len__`. If not implemented, it will
65
+ - A dataflow can optionally implement `__len__`. If not implemented, it will
66
66
  throw `NotImplementedError`.
67
- * It returns an integer representing the size of the dataflow.
67
+ - It returns an integer representing the size of the dataflow.
68
68
  The return value **may not be accurate or meaningful** at all.
69
69
  When saying the length is "accurate", it means that
70
70
  `__iter__` will always yield this many of datapoints before it stops iteration.
71
- * There could be many reasons why `__len__` is inaccurate.
71
+ - There could be many reasons why `__len__` is inaccurate.
72
72
  For example, some dataflow has dynamic size, if it throws away datapoints on the fly.
73
73
  Some dataflow mixes the datapoints between consecutive passes over
74
74
  the dataset, due to parallelism and buffering.
75
75
  In this case it does not make sense to stop the iteration anywhere.
76
- * Due to the above reasons, the length is only a rough guidance.
76
+ - Due to the above reasons, the length is only a rough guidance.
77
77
  And it's up to the user how to interpret it.
78
78
  Inside tensorpack it's only used in these places:
79
79
  + A default ``steps_per_epoch`` in training, but you probably want to customize
@@ -82,28 +82,30 @@ class DataFlow:
82
82
  + Used by `InferenceRunner` to get the number of iterations in inference.
83
83
  In this case users are **responsible** for making sure that `__len__` is "accurate".
84
84
  This is to guarantee that inference is run on a fixed set of images.
85
+
85
86
  Returns:
86
87
  int: rough size of this dataflow.
88
+
87
89
  Raises:
88
- `NotImplementedError` if this DataFlow doesn't have a size.
90
+ NotImplementedError: if this DataFlow doesn't have a size.
89
91
  """
90
92
  raise NotImplementedError
91
93
 
92
94
  def reset_state(self) -> None:
93
95
  """
94
- * The caller must guarantee that `reset_state` should be called **once and only once**
96
+ - The caller must guarantee that `reset_state` should be called **once and only once**
95
97
  by the **process that uses the dataflow** before `__iter__` is called.
96
98
  The caller thread of this method should stay alive to keep this dataflow alive.
97
- * It is meant for certain initialization that involves processes,
99
+ - It is meant for certain initialization that involves processes,
98
100
  e.g., initialize random number generators (RNG), create worker processes.
99
101
  Because it's very common to use RNG in data processing,
100
102
  developers of dataflow can also subclass `RNGDataFlow` to have easier access to
101
103
  a properly-initialized RNG.
102
- * A dataflow is not fork-safe after `reset_state` is called (because this will violate the guarantee).
104
+ - A dataflow is not fork-safe after `reset_state` is called (because this will violate the guarantee).
103
105
  There are a few other dataflows that are not fork-safe anytime, which will be mentioned in the docs.
104
- * You should take the responsibility and follow the above guarantee if you're the caller of a dataflow yourself
106
+ - You should take the responsibility and follow the above guarantee if you're the caller of a dataflow yourself
105
107
  (either when you're using dataflow outside tensorpack, or if you're writing a wrapper dataflow).
106
- * Tensorpack's built-in forking dataflows (`MultiProcessRunner`, `MultiProcessMapData`, etc)
108
+ - Tensorpack's built-in forking dataflows (`MultiProcessRunner`, `MultiProcessMapData`, etc)
107
109
  and other component that uses dataflows (`InputSource`)
108
110
  already take care of the responsibility of calling this method.
109
111
  """
@@ -131,15 +133,31 @@ class ProxyDataFlow(DataFlow):
131
133
 
132
134
  def __init__(self, df: DataFlow) -> None:
133
135
  """
134
- :param df: DataFlow to proxy.
136
+ Initializes the ProxyDataFlow.
137
+
138
+ Args:
139
+ df: DataFlow to proxy.
135
140
  """
136
141
  self.df = df
137
142
 
138
143
  def reset_state(self) -> None:
144
+ """Resets the state of the proxied DataFlow."""
139
145
  self.df.reset_state()
140
146
 
141
147
  def __len__(self) -> int:
148
+ """
149
+ Returns the size of the proxied DataFlow.
150
+
151
+ Returns:
152
+ int: Size of the proxied DataFlow.
153
+ """
142
154
  return self.df.__len__()
143
155
 
144
156
  def __iter__(self) -> Iterator[Any]:
157
+ """
158
+ Iterates over the proxied DataFlow.
159
+
160
+ Returns:
161
+ Iterator[Any]: Iterator of the proxied DataFlow.
162
+ """
145
163
  return self.df.__iter__()