deepdoctection 0.44.1__py3-none-any.whl → 0.45.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +4 -3
- deepdoctection/analyzer/config.py +41 -0
- deepdoctection/analyzer/factory.py +249 -1
- deepdoctection/configs/profiles.jsonl +2 -1
- deepdoctection/datapoint/view.py +38 -38
- deepdoctection/extern/__init__.py +1 -0
- deepdoctection/extern/d2detect.py +1 -1
- deepdoctection/extern/fastlang.py +2 -2
- deepdoctection/extern/hflayoutlm.py +23 -10
- deepdoctection/extern/hflm.py +432 -7
- deepdoctection/pipe/language.py +4 -4
- deepdoctection/pipe/lm.py +7 -3
- deepdoctection/utils/file_utils.py +1 -1
- deepdoctection/utils/settings.py +2 -0
- deepdoctection/utils/viz.py +3 -3
- {deepdoctection-0.44.1.dist-info → deepdoctection-0.45.0.dist-info}/METADATA +12 -12
- {deepdoctection-0.44.1.dist-info → deepdoctection-0.45.0.dist-info}/RECORD +20 -20
- {deepdoctection-0.44.1.dist-info → deepdoctection-0.45.0.dist-info}/WHEEL +0 -0
- {deepdoctection-0.44.1.dist-info → deepdoctection-0.45.0.dist-info}/licenses/LICENSE +0 -0
- {deepdoctection-0.44.1.dist-info → deepdoctection-0.45.0.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: deepdoctection
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.45.0
|
|
4
4
|
Summary: Repository for Document AI
|
|
5
5
|
Home-page: https://github.com/deepdoctection/deepdoctection
|
|
6
6
|
Author: Dr. Janis Meyer
|
|
@@ -24,7 +24,7 @@ Requires-Dist: jsonlines==3.1.0
|
|
|
24
24
|
Requires-Dist: lazy-imports==0.3.1
|
|
25
25
|
Requires-Dist: mock==4.0.3
|
|
26
26
|
Requires-Dist: networkx>=2.7.1
|
|
27
|
-
Requires-Dist: numpy
|
|
27
|
+
Requires-Dist: numpy>2.0
|
|
28
28
|
Requires-Dist: packaging>=20.0
|
|
29
29
|
Requires-Dist: Pillow>=10.0.0
|
|
30
30
|
Requires-Dist: pypdf>=6.0.0
|
|
@@ -43,7 +43,7 @@ Requires-Dist: jsonlines==3.1.0; extra == "tf"
|
|
|
43
43
|
Requires-Dist: lazy-imports==0.3.1; extra == "tf"
|
|
44
44
|
Requires-Dist: mock==4.0.3; extra == "tf"
|
|
45
45
|
Requires-Dist: networkx>=2.7.1; extra == "tf"
|
|
46
|
-
Requires-Dist: numpy
|
|
46
|
+
Requires-Dist: numpy>2.0; extra == "tf"
|
|
47
47
|
Requires-Dist: packaging>=20.0; extra == "tf"
|
|
48
48
|
Requires-Dist: Pillow>=10.0.0; extra == "tf"
|
|
49
49
|
Requires-Dist: pypdf>=6.0.0; extra == "tf"
|
|
@@ -58,11 +58,10 @@ Requires-Dist: tensorpack==0.11; extra == "tf"
|
|
|
58
58
|
Requires-Dist: protobuf==3.20.1; extra == "tf"
|
|
59
59
|
Requires-Dist: tensorflow-addons>=0.17.1; extra == "tf"
|
|
60
60
|
Requires-Dist: tf2onnx>=1.9.2; extra == "tf"
|
|
61
|
-
Requires-Dist: python-doctr==0.
|
|
61
|
+
Requires-Dist: python-doctr==0.10.0; extra == "tf"
|
|
62
62
|
Requires-Dist: pycocotools>=2.0.2; extra == "tf"
|
|
63
63
|
Requires-Dist: boto3==1.34.102; extra == "tf"
|
|
64
64
|
Requires-Dist: pdfplumber>=0.11.0; extra == "tf"
|
|
65
|
-
Requires-Dist: fasttext-wheel; extra == "tf"
|
|
66
65
|
Requires-Dist: jdeskew>=0.2.2; extra == "tf"
|
|
67
66
|
Requires-Dist: apted==1.0.3; extra == "tf"
|
|
68
67
|
Requires-Dist: distance==0.1.3; extra == "tf"
|
|
@@ -75,7 +74,7 @@ Requires-Dist: jsonlines==3.1.0; extra == "pt"
|
|
|
75
74
|
Requires-Dist: lazy-imports==0.3.1; extra == "pt"
|
|
76
75
|
Requires-Dist: mock==4.0.3; extra == "pt"
|
|
77
76
|
Requires-Dist: networkx>=2.7.1; extra == "pt"
|
|
78
|
-
Requires-Dist: numpy
|
|
77
|
+
Requires-Dist: numpy>2.0; extra == "pt"
|
|
79
78
|
Requires-Dist: packaging>=20.0; extra == "pt"
|
|
80
79
|
Requires-Dist: Pillow>=10.0.0; extra == "pt"
|
|
81
80
|
Requires-Dist: pypdf>=6.0.0; extra == "pt"
|
|
@@ -89,11 +88,10 @@ Requires-Dist: tqdm>=4.64.0; extra == "pt"
|
|
|
89
88
|
Requires-Dist: timm>=0.9.16; extra == "pt"
|
|
90
89
|
Requires-Dist: transformers>=4.48.0; extra == "pt"
|
|
91
90
|
Requires-Dist: accelerate>=0.29.1; extra == "pt"
|
|
92
|
-
Requires-Dist: python-doctr==0.
|
|
91
|
+
Requires-Dist: python-doctr==0.10.0; extra == "pt"
|
|
93
92
|
Requires-Dist: pycocotools>=2.0.2; extra == "pt"
|
|
94
93
|
Requires-Dist: boto3==1.34.102; extra == "pt"
|
|
95
94
|
Requires-Dist: pdfplumber>=0.11.0; extra == "pt"
|
|
96
|
-
Requires-Dist: fasttext-wheel; extra == "pt"
|
|
97
95
|
Requires-Dist: jdeskew>=0.2.2; extra == "pt"
|
|
98
96
|
Requires-Dist: apted==1.0.3; extra == "pt"
|
|
99
97
|
Requires-Dist: distance==0.1.3; extra == "pt"
|
|
@@ -183,7 +181,8 @@ It also provides a framework for training, evaluating and inferencing Document A
|
|
|
183
181
|
[**LiLT**](https://github.com/jpWang/LiLT) and selected
|
|
184
182
|
[**Bert**](https://huggingface.co/docs/transformers/model_doc/xlm-roberta)-style including features like sliding windows.
|
|
185
183
|
- Text mining for native PDFs with [**pdfplumber**](https://github.com/jsvine/pdfplumber),
|
|
186
|
-
- Language detection with [**fastText**](https://github.com/facebookresearch/fastText)
|
|
184
|
+
- Language detection with `papluca/xlm-roberta-base-language-detection`. [**fastText**](https://github.com/facebookresearch/fastText) is still available but
|
|
185
|
+
but will be removed in a future version.
|
|
187
186
|
- Deskewing and rotating images with [**jdeskew**](https://github.com/phamquiluan/jdeskew).
|
|
188
187
|
- Fine-tuning and evaluation tools.
|
|
189
188
|
- Lot's of [tutorials](https://github.com/deepdoctection/notebooks)
|
|
@@ -294,7 +293,7 @@ alt="text" width="40%">
|
|
|
294
293
|
|
|
295
294
|
- Linux or macOS. Windows is not supported but there is a [Dockerfile](./docker/pytorch-cpu-jupyter/Dockerfile) available.
|
|
296
295
|
- Python >= 3.9
|
|
297
|
-
- 2.
|
|
296
|
+
- 2.6 \<= PyTorch **or** 2.11 \<= Tensorflow < 2.16. (For lower Tensorflow versions the code will only run on a GPU).
|
|
298
297
|
Tensorflow support will be stopped from Python 3.11 onwards.
|
|
299
298
|
- To fine-tune models, a GPU is recommended.
|
|
300
299
|
|
|
@@ -321,7 +320,7 @@ For a simple setup which is enough to parse documents with the default setting,
|
|
|
321
320
|
|
|
322
321
|
```
|
|
323
322
|
pip install transformers
|
|
324
|
-
pip install python-doctr==0.
|
|
323
|
+
pip install python-doctr==0.10.0 # If you use Python 3.10 or higher you can use the latest version.
|
|
325
324
|
pip install deepdoctection
|
|
326
325
|
```
|
|
327
326
|
|
|
@@ -329,8 +328,9 @@ pip install deepdoctection
|
|
|
329
328
|
|
|
330
329
|
```
|
|
331
330
|
pip install tensorpack
|
|
332
|
-
pip install python-doctr==0.9.0
|
|
333
331
|
pip install deepdoctection
|
|
332
|
+
pip install "numpy>=1.21,<2.0" --upgrade --force-reinstall # because TF 2.11 does not support numpy 2.0
|
|
333
|
+
pip install "python-doctr==0.9.0"
|
|
334
334
|
```
|
|
335
335
|
|
|
336
336
|
Both setups are sufficient to run the [**introduction notebook**](https://github.com/deepdoctection/notebooks/blob/main/Get_Started.ipynb).
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
deepdoctection/__init__.py,sha256=
|
|
1
|
+
deepdoctection/__init__.py,sha256=nyR805N1k7HNYBtc8gqshSBvRxpK0JTKHWchetqQjno,13125
|
|
2
2
|
deepdoctection/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
deepdoctection/analyzer/__init__.py,sha256=wg0BcFwdCeREwzZfa--Yx8HUJ9LPv5z5PmLwtkZdPH8,772
|
|
4
|
-
deepdoctection/analyzer/config.py,sha256=
|
|
4
|
+
deepdoctection/analyzer/config.py,sha256=_JqF0-6G-IQ9BmQit9OiUMhZDw7pd-eWlC2RPVpwVWQ,43932
|
|
5
5
|
deepdoctection/analyzer/dd.py,sha256=2BGvZpl9o9khcaOV52-DPHMrs0DsqUO8cpdqFVHHzDQ,5176
|
|
6
|
-
deepdoctection/analyzer/factory.py,sha256=
|
|
6
|
+
deepdoctection/analyzer/factory.py,sha256=DVrXVN-h0apWQG3shmmXwyvAcqLKIvMsW8bNG5cwI5s,47668
|
|
7
7
|
deepdoctection/configs/__init__.py,sha256=TX_P6tqDOF1LK1mi9ruAl7x0mtv1Asm8cYWCz3Pe2dk,646
|
|
8
8
|
deepdoctection/configs/conf_dd_one.yaml,sha256=DHqAIKH3jRam54QO7qib2zutmpyFA8TqdV5UvIV191A,3688
|
|
9
9
|
deepdoctection/configs/conf_tesseract.yaml,sha256=oF6szDyoi15FHvq7yFUNIEjfA_jNLhGxoowiRsz_zY4,35
|
|
10
|
-
deepdoctection/configs/profiles.jsonl,sha256=
|
|
10
|
+
deepdoctection/configs/profiles.jsonl,sha256=q_xPb-UcR-FxyspoJOJSMGmx-SsUkUokyK2tox98PcE,32510
|
|
11
11
|
deepdoctection/dataflow/__init__.py,sha256=pY4lhjTes2BU-0AdIIRMnRqo9Sv6TopVE_SNfLmpgnc,828
|
|
12
12
|
deepdoctection/dataflow/base.py,sha256=ZLRijyHI1J7tBfnE-q7eqUieYMMERjtK-c1oK40dBkk,6556
|
|
13
13
|
deepdoctection/dataflow/common.py,sha256=DKD_pRZBCt2vO3oNZcOvdoC3jThabTNcNbTS16mpVR0,10351
|
|
@@ -21,7 +21,7 @@ deepdoctection/datapoint/annotation.py,sha256=f32BNmzUGJoNMeGst2RGC2jmjJpzzjxyBR
|
|
|
21
21
|
deepdoctection/datapoint/box.py,sha256=QAS8sK2Ge4_ysW6zOYkLlzNwhSyw_mhYcYsxscClEno,31453
|
|
22
22
|
deepdoctection/datapoint/convert.py,sha256=6ENXX3tBdY8ogb2NBPxsOsQMGnQux8ol5nrUfWS5tYE,7352
|
|
23
23
|
deepdoctection/datapoint/image.py,sha256=N5VH2oeKQWIt5FQvFaeu-FL8eckv7LQS0ZJsHSuVwjI,37187
|
|
24
|
-
deepdoctection/datapoint/view.py,sha256=
|
|
24
|
+
deepdoctection/datapoint/view.py,sha256=x7BuWWHWOMQa_dZiKxlNIoq1NxZ-Z2F9nOW6vsJOxwE,61910
|
|
25
25
|
deepdoctection/datasets/__init__.py,sha256=4ifjIwWCPYiS31GzUlVDScrkNOrb1eo5xHlRXNyg_58,994
|
|
26
26
|
deepdoctection/datasets/adapter.py,sha256=VSLM_980aHi4TpgOxfxiBHiF_fUXyh348PXet6zTo-4,7779
|
|
27
27
|
deepdoctection/datasets/base.py,sha256=oLv2o9QiKVN44kO7Llj-z00_TQRYBsVlvBL3ZQoscUQ,30670
|
|
@@ -50,15 +50,15 @@ deepdoctection/eval/eval.py,sha256=UUL-wk39RONLMOOyH3WjjpHunZJiQluXZFqir8eaDtY,1
|
|
|
50
50
|
deepdoctection/eval/registry.py,sha256=us6EGN_tAia1Mk1mwWQwDeE-xqxcuopztdi8n-ieGbg,1100
|
|
51
51
|
deepdoctection/eval/tedsmetric.py,sha256=EcNeJynsmxyl5bOH3bjy2wE647ONf0SF5OZyGbVu35Q,9963
|
|
52
52
|
deepdoctection/eval/tp_eval_callback.py,sha256=lqrOn2tdaRiF_Vr_9CwBr2ryatcWu3mQKya8YZ2pA9A,5261
|
|
53
|
-
deepdoctection/extern/__init__.py,sha256=
|
|
53
|
+
deepdoctection/extern/__init__.py,sha256=1RVkuC0MPlz_g4nhU-nc0sPIRR72JWeDgZtyy4BWw8w,1011
|
|
54
54
|
deepdoctection/extern/base.py,sha256=LomTR9HXcBU55MPDIA8D1rIamk7DUmToJmgcRXzCoeU,31650
|
|
55
|
-
deepdoctection/extern/d2detect.py,sha256=
|
|
55
|
+
deepdoctection/extern/d2detect.py,sha256=O8XN_sUrQThMmd9-t97lzZvTGVSMG-1DD_VR5TV9V8c,22375
|
|
56
56
|
deepdoctection/extern/deskew.py,sha256=L_jU0rXh03qzwaT79EIqE_zYMUVeFwWDbsGbtahuL2k,3124
|
|
57
57
|
deepdoctection/extern/doctrocr.py,sha256=jB0mnvGmmygoUu9e9zw2_HtAgQUdCJHbxMSt1cfK5bA,25381
|
|
58
|
-
deepdoctection/extern/fastlang.py,sha256=
|
|
58
|
+
deepdoctection/extern/fastlang.py,sha256=0nBFZTwMS5s9fhjgMc_p0y18V6wZwQme0r6B6B3uFro,4952
|
|
59
59
|
deepdoctection/extern/hfdetr.py,sha256=N3eLNI5BsQS9_7YZyBeWndSgUydJij7ugZA9p4V1xaQ,14316
|
|
60
|
-
deepdoctection/extern/hflayoutlm.py,sha256=
|
|
61
|
-
deepdoctection/extern/hflm.py,sha256=
|
|
60
|
+
deepdoctection/extern/hflayoutlm.py,sha256=htPfwwJ5VpzYP6CZ86YHwNfvJ7fdhDF_rxTKUa8rG9w,60488
|
|
61
|
+
deepdoctection/extern/hflm.py,sha256=ftr5jLb39521KtHZOEyTWuaE7bnbSwm3EQSHlcynXIM,27585
|
|
62
62
|
deepdoctection/extern/model.py,sha256=kMIlx07_kdwZHLYB3QUG0DT_VSv2aZuKIIbv3fs0WqA,18233
|
|
63
63
|
deepdoctection/extern/pdftext.py,sha256=ljzPQn3yYAlS6MoZqzixD-fO2GlHwu1aMiOQ6qMIzbg,7513
|
|
64
64
|
deepdoctection/extern/tessocr.py,sha256=SuPmngsJg38riL4b09z6_FIzJH6H3RIwoighG2GPMYM,17457
|
|
@@ -108,9 +108,9 @@ deepdoctection/pipe/base.py,sha256=ZOQ9G9xCxzzGSoKVNwNlqmqx73S9tp4L_5W-R2vcah0,1
|
|
|
108
108
|
deepdoctection/pipe/common.py,sha256=6asx6ionpKxO90kttDgCRhU893FrsSZJpW91_dRGGBc,24586
|
|
109
109
|
deepdoctection/pipe/concurrency.py,sha256=w3GaL50_bvEJTfrZ2Omds_5jBSBPkjvxr63ZZLpULOM,9780
|
|
110
110
|
deepdoctection/pipe/doctectionpipe.py,sha256=ik5F92F3klI5Nve_AnyIRj-ApMoKHSR2SjcWWnI0d2g,14063
|
|
111
|
-
deepdoctection/pipe/language.py,sha256=
|
|
111
|
+
deepdoctection/pipe/language.py,sha256=VZvw1hYrs4F1g2aSmqt16jAzgigedui2dptcRtvASfY,5949
|
|
112
112
|
deepdoctection/pipe/layout.py,sha256=pm53RUyMCERHJVWRJmeDUfjf-6DlRuTtUGETHpyr1UY,6391
|
|
113
|
-
deepdoctection/pipe/lm.py,sha256=
|
|
113
|
+
deepdoctection/pipe/lm.py,sha256=XtvaqjPK-2exWOmzznCZORL5MjkP-33fd0MlJpTtbMA,20617
|
|
114
114
|
deepdoctection/pipe/order.py,sha256=m31RLoQNTpUTMpuyrAZKcTnRhyPLZ_Bmb1Ngxs7JkbY,41129
|
|
115
115
|
deepdoctection/pipe/refine.py,sha256=AazkdLz5F1H8OIO8d1oFY4pqOprP0zW42ZeXVfeUtew,23422
|
|
116
116
|
deepdoctection/pipe/registry.py,sha256=uT5fnHjffoNGk2JPuD2-pMYtO3Iko7-wrwVZVCWLtok,906
|
|
@@ -129,21 +129,21 @@ deepdoctection/utils/context.py,sha256=5QfdzxsiSPnNs1qtJdgjguIoD8srLQ2W8oeDzwp9F
|
|
|
129
129
|
deepdoctection/utils/develop.py,sha256=4myrqBDypM6tQ2a2Jo3Q20RuE_W2czykpXBwgXPrxNw,3568
|
|
130
130
|
deepdoctection/utils/env_info.py,sha256=b1WohrfQuoL-BPN0_s8Rjtwzx-WKvCyaX2I4qYl1Emc,19878
|
|
131
131
|
deepdoctection/utils/error.py,sha256=sIry8F5MZ0yLvKfAwVz90IorKWVvjoRqcC0L8qq8mLk,2480
|
|
132
|
-
deepdoctection/utils/file_utils.py,sha256=
|
|
132
|
+
deepdoctection/utils/file_utils.py,sha256=PzUAE7eaiPl-m4SKXNF5_s3Ks7B0WeolmRaIJ7FNO2U,26276
|
|
133
133
|
deepdoctection/utils/fs.py,sha256=KTS9FJzZk9le_vmIPr9IisJw0AyTfjkyX1KoWQy4DNs,12729
|
|
134
134
|
deepdoctection/utils/identifier.py,sha256=Jt12MeZf7eC1qciY5Fp_AYUGxYVcjsy7xNBUvJil7dU,2270
|
|
135
135
|
deepdoctection/utils/logger.py,sha256=ddQ0xBStluf8OvoRlEB8YkqyRR-ZYgyJYLClTmJJMAU,10290
|
|
136
136
|
deepdoctection/utils/metacfg.py,sha256=5M390--ZMoyJEt5oZOwFMGt2i8OF_ayeb0NVmUO_3OQ,7235
|
|
137
137
|
deepdoctection/utils/mocks.py,sha256=IkN3-IzAl4eX0ibgKIHg8IY7ykVw6BnpF6XnxKnKaZI,2389
|
|
138
138
|
deepdoctection/utils/pdf_utils.py,sha256=BrxTuY9j0COyIRkJchJ0tt2h6ZsA2an6z-H8E8QwgUQ,13490
|
|
139
|
-
deepdoctection/utils/settings.py,sha256=
|
|
139
|
+
deepdoctection/utils/settings.py,sha256=nzD2OMxfsL50CvKGnbwn8IWW-t5wGfCS439HFian274,12920
|
|
140
140
|
deepdoctection/utils/tqdm.py,sha256=kx3Ivf0x85S0ZmEaN5mImu0V6isOgygOU8iyr2U99XU,1850
|
|
141
141
|
deepdoctection/utils/transform.py,sha256=jgeCyQWLN9q79jCGW7jysyKUKcJ1AVMk8OslF-3fbag,16095
|
|
142
142
|
deepdoctection/utils/types.py,sha256=Nsr2J7XSZazXho94y0oc01LBQxh0ve67c4Yx2gMlSXU,2952
|
|
143
143
|
deepdoctection/utils/utils.py,sha256=NBUb1qbx8Jm-AvYN1Sdbk0huXhbAKxZ-ZtOcMespsMM,7064
|
|
144
|
-
deepdoctection/utils/viz.py,sha256=
|
|
145
|
-
deepdoctection-0.
|
|
146
|
-
deepdoctection-0.
|
|
147
|
-
deepdoctection-0.
|
|
148
|
-
deepdoctection-0.
|
|
149
|
-
deepdoctection-0.
|
|
144
|
+
deepdoctection/utils/viz.py,sha256=Aduyr65LoI4l9Fv4HCm4Sz9Fa_rL5mR5mQwLN4rqLdM,27385
|
|
145
|
+
deepdoctection-0.45.0.dist-info/licenses/LICENSE,sha256=GQ0rUvuGdrMNEI3iHK5UQx6dIMU1QwAuyXsxUHn5MEQ,11351
|
|
146
|
+
deepdoctection-0.45.0.dist-info/METADATA,sha256=1obCXEh1LnL5mDI_95SHiu5KrehKqP2r3SL5j4Hm2OE,14972
|
|
147
|
+
deepdoctection-0.45.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
148
|
+
deepdoctection-0.45.0.dist-info/top_level.txt,sha256=hs2DdoOL9h4mnHhmO82BT4pz4QATIoOZ20PZmlnxFI8,15
|
|
149
|
+
deepdoctection-0.45.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|