deepdoctection 0.42.1__py3-none-any.whl → 0.43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of deepdoctection might be problematic. Click here for more details.
- deepdoctection/__init__.py +2 -1
- deepdoctection/analyzer/__init__.py +2 -1
- deepdoctection/analyzer/config.py +904 -0
- deepdoctection/analyzer/dd.py +36 -62
- deepdoctection/analyzer/factory.py +311 -141
- deepdoctection/configs/conf_dd_one.yaml +100 -44
- deepdoctection/configs/profiles.jsonl +32 -0
- deepdoctection/dataflow/__init__.py +9 -6
- deepdoctection/dataflow/base.py +33 -15
- deepdoctection/dataflow/common.py +96 -75
- deepdoctection/dataflow/custom.py +36 -29
- deepdoctection/dataflow/custom_serialize.py +135 -91
- deepdoctection/dataflow/parallel_map.py +33 -31
- deepdoctection/dataflow/serialize.py +15 -10
- deepdoctection/dataflow/stats.py +41 -28
- deepdoctection/datapoint/__init__.py +4 -6
- deepdoctection/datapoint/annotation.py +104 -66
- deepdoctection/datapoint/box.py +190 -130
- deepdoctection/datapoint/convert.py +66 -39
- deepdoctection/datapoint/image.py +151 -95
- deepdoctection/datapoint/view.py +383 -236
- deepdoctection/datasets/__init__.py +2 -6
- deepdoctection/datasets/adapter.py +11 -11
- deepdoctection/datasets/base.py +118 -81
- deepdoctection/datasets/dataflow_builder.py +18 -12
- deepdoctection/datasets/info.py +76 -57
- deepdoctection/datasets/instances/__init__.py +6 -2
- deepdoctection/datasets/instances/doclaynet.py +17 -14
- deepdoctection/datasets/instances/fintabnet.py +16 -22
- deepdoctection/datasets/instances/funsd.py +11 -6
- deepdoctection/datasets/instances/iiitar13k.py +9 -9
- deepdoctection/datasets/instances/layouttest.py +9 -9
- deepdoctection/datasets/instances/publaynet.py +9 -9
- deepdoctection/datasets/instances/pubtables1m.py +13 -13
- deepdoctection/datasets/instances/pubtabnet.py +13 -15
- deepdoctection/datasets/instances/rvlcdip.py +8 -8
- deepdoctection/datasets/instances/xfund.py +11 -9
- deepdoctection/datasets/registry.py +18 -11
- deepdoctection/datasets/save.py +12 -11
- deepdoctection/eval/__init__.py +3 -2
- deepdoctection/eval/accmetric.py +72 -52
- deepdoctection/eval/base.py +29 -10
- deepdoctection/eval/cocometric.py +14 -12
- deepdoctection/eval/eval.py +56 -41
- deepdoctection/eval/registry.py +6 -3
- deepdoctection/eval/tedsmetric.py +24 -9
- deepdoctection/eval/tp_eval_callback.py +13 -12
- deepdoctection/extern/__init__.py +1 -1
- deepdoctection/extern/base.py +176 -97
- deepdoctection/extern/d2detect.py +127 -92
- deepdoctection/extern/deskew.py +19 -10
- deepdoctection/extern/doctrocr.py +157 -106
- deepdoctection/extern/fastlang.py +25 -17
- deepdoctection/extern/hfdetr.py +137 -60
- deepdoctection/extern/hflayoutlm.py +329 -248
- deepdoctection/extern/hflm.py +67 -33
- deepdoctection/extern/model.py +108 -762
- deepdoctection/extern/pdftext.py +37 -12
- deepdoctection/extern/pt/nms.py +15 -1
- deepdoctection/extern/pt/ptutils.py +13 -9
- deepdoctection/extern/tessocr.py +87 -54
- deepdoctection/extern/texocr.py +29 -14
- deepdoctection/extern/tp/tfutils.py +36 -8
- deepdoctection/extern/tp/tpcompat.py +54 -16
- deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
- deepdoctection/extern/tpdetect.py +4 -2
- deepdoctection/mapper/__init__.py +1 -1
- deepdoctection/mapper/cats.py +117 -76
- deepdoctection/mapper/cocostruct.py +35 -17
- deepdoctection/mapper/d2struct.py +56 -29
- deepdoctection/mapper/hfstruct.py +32 -19
- deepdoctection/mapper/laylmstruct.py +221 -185
- deepdoctection/mapper/maputils.py +71 -35
- deepdoctection/mapper/match.py +76 -62
- deepdoctection/mapper/misc.py +68 -44
- deepdoctection/mapper/pascalstruct.py +13 -12
- deepdoctection/mapper/prodigystruct.py +33 -19
- deepdoctection/mapper/pubstruct.py +42 -32
- deepdoctection/mapper/tpstruct.py +39 -19
- deepdoctection/mapper/xfundstruct.py +20 -13
- deepdoctection/pipe/__init__.py +1 -2
- deepdoctection/pipe/anngen.py +104 -62
- deepdoctection/pipe/base.py +226 -107
- deepdoctection/pipe/common.py +206 -123
- deepdoctection/pipe/concurrency.py +74 -47
- deepdoctection/pipe/doctectionpipe.py +108 -47
- deepdoctection/pipe/language.py +41 -24
- deepdoctection/pipe/layout.py +45 -18
- deepdoctection/pipe/lm.py +146 -78
- deepdoctection/pipe/order.py +196 -113
- deepdoctection/pipe/refine.py +111 -63
- deepdoctection/pipe/registry.py +1 -1
- deepdoctection/pipe/segment.py +213 -142
- deepdoctection/pipe/sub_layout.py +76 -46
- deepdoctection/pipe/text.py +52 -33
- deepdoctection/pipe/transform.py +8 -6
- deepdoctection/train/d2_frcnn_train.py +87 -69
- deepdoctection/train/hf_detr_train.py +72 -40
- deepdoctection/train/hf_layoutlm_train.py +85 -46
- deepdoctection/train/tp_frcnn_train.py +56 -28
- deepdoctection/utils/concurrency.py +59 -16
- deepdoctection/utils/context.py +40 -19
- deepdoctection/utils/develop.py +25 -17
- deepdoctection/utils/env_info.py +85 -36
- deepdoctection/utils/error.py +16 -10
- deepdoctection/utils/file_utils.py +246 -62
- deepdoctection/utils/fs.py +162 -43
- deepdoctection/utils/identifier.py +29 -16
- deepdoctection/utils/logger.py +49 -32
- deepdoctection/utils/metacfg.py +83 -21
- deepdoctection/utils/pdf_utils.py +119 -62
- deepdoctection/utils/settings.py +24 -10
- deepdoctection/utils/tqdm.py +10 -5
- deepdoctection/utils/transform.py +182 -46
- deepdoctection/utils/utils.py +61 -28
- deepdoctection/utils/viz.py +150 -104
- deepdoctection-0.43.dist-info/METADATA +376 -0
- deepdoctection-0.43.dist-info/RECORD +149 -0
- deepdoctection/analyzer/_config.py +0 -146
- deepdoctection-0.42.1.dist-info/METADATA +0 -431
- deepdoctection-0.42.1.dist-info/RECORD +0 -148
- {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/WHEEL +0 -0
- {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
- {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0
deepdoctection/utils/fs.py
CHANGED
|
@@ -32,7 +32,7 @@ from urllib.request import urlretrieve
|
|
|
32
32
|
from .develop import deprecated
|
|
33
33
|
from .logger import LoggingRecord, logger
|
|
34
34
|
from .pdf_utils import get_pdf_file_reader, get_pdf_file_writer
|
|
35
|
-
from .settings import CONFIGS, DATASET_DIR, MODEL_DIR, PATH
|
|
35
|
+
from .settings import CACHE_DIR, CONFIGS, DATASET_DIR, MODEL_DIR, PATH
|
|
36
36
|
from .tqdm import get_tqdm
|
|
37
37
|
from .types import B64, B64Str, JsonDict, PathLikeOrStr, PixelValues
|
|
38
38
|
from .utils import is_file_extension
|
|
@@ -48,6 +48,7 @@ __all__ = [
|
|
|
48
48
|
"load_json",
|
|
49
49
|
"sub_path",
|
|
50
50
|
"get_package_path",
|
|
51
|
+
"get_cache_dir_path",
|
|
51
52
|
"get_configs_dir_path",
|
|
52
53
|
"get_weights_dir_path",
|
|
53
54
|
"get_dataset_dir_path",
|
|
@@ -57,7 +58,19 @@ __all__ = [
|
|
|
57
58
|
|
|
58
59
|
def sizeof_fmt(num: float, suffix: str = "B") -> str:
|
|
59
60
|
"""
|
|
60
|
-
|
|
61
|
+
Converts a number of bytes into a human-readable string.
|
|
62
|
+
|
|
63
|
+
Example:
|
|
64
|
+
```python
|
|
65
|
+
sizeof_fmt(1024)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
num: The number of bytes.
|
|
70
|
+
suffix: The suffix to use (default is `B`).
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
A human-readable string representation of the byte size.
|
|
61
74
|
"""
|
|
62
75
|
for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]:
|
|
63
76
|
if abs(num) < 1024.0:
|
|
@@ -70,9 +83,18 @@ def sizeof_fmt(num: float, suffix: str = "B") -> str:
|
|
|
70
83
|
# Licensed under the Apache License, Version 2.0 (the "License")
|
|
71
84
|
def mkdir_p(dir_name: PathLikeOrStr) -> None:
|
|
72
85
|
"""
|
|
73
|
-
|
|
86
|
+
Creates a directory recursively, similar to `mkdir -p`. Does nothing if the directory already exists.
|
|
87
|
+
|
|
88
|
+
Example:
|
|
89
|
+
```python
|
|
90
|
+
mkdir_p('/tmp/mydir')
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
dir_name: The name of the directory to create.
|
|
74
95
|
|
|
75
|
-
:
|
|
96
|
+
Returns:
|
|
97
|
+
None
|
|
76
98
|
"""
|
|
77
99
|
assert dir_name is not None
|
|
78
100
|
if dir_name == "" or os.path.isdir(dir_name):
|
|
@@ -90,7 +112,21 @@ def download(
|
|
|
90
112
|
url: str, directory: PathLikeOrStr, file_name: Optional[str] = None, expect_size: Optional[int] = None
|
|
91
113
|
) -> str:
|
|
92
114
|
"""
|
|
93
|
-
|
|
115
|
+
Downloads a file from a URL to a directory. Determines the filename from the URL if not provided.
|
|
116
|
+
|
|
117
|
+
Example:
|
|
118
|
+
```python
|
|
119
|
+
download('http://example.com/file.txt', '/tmp')
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
url: The URL to download from.
|
|
124
|
+
directory: The directory to save the file in.
|
|
125
|
+
file_name: The name of the file (optional).
|
|
126
|
+
expect_size: The expected size of the file in bytes (optional).
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
The path to the downloaded file.
|
|
94
130
|
"""
|
|
95
131
|
mkdir_p(directory)
|
|
96
132
|
if file_name is None:
|
|
@@ -150,12 +186,20 @@ def load_image_from_file(
|
|
|
150
186
|
path: PathLikeOrStr, type_id: Literal["np", "b64"] = "np"
|
|
151
187
|
) -> Optional[Union[B64Str, PixelValues]]:
|
|
152
188
|
"""
|
|
153
|
-
Loads an image from
|
|
154
|
-
or a conversion error occurs.
|
|
189
|
+
Loads an image from a file and returns either a base64-encoded string, a numpy array, or `None` if the file is not
|
|
190
|
+
found or a conversion error occurs.
|
|
191
|
+
|
|
192
|
+
Example:
|
|
193
|
+
```python
|
|
194
|
+
load_image_from_file('image.png', type_id='b64')
|
|
195
|
+
```
|
|
155
196
|
|
|
156
|
-
:
|
|
157
|
-
|
|
158
|
-
|
|
197
|
+
Args:
|
|
198
|
+
path: The path to the image.
|
|
199
|
+
type_id: The type of output, either `np` for numpy array or `b64` for base64 string.
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
The image in the desired representation or `None`.
|
|
159
203
|
"""
|
|
160
204
|
image: Optional[Union[str, PixelValues]] = None
|
|
161
205
|
path = path.as_posix() if isinstance(path, Path) else path
|
|
@@ -177,12 +221,21 @@ def load_image_from_file(
|
|
|
177
221
|
|
|
178
222
|
def load_bytes_from_pdf_file(path: PathLikeOrStr, page_number: int = 0) -> B64:
|
|
179
223
|
"""
|
|
180
|
-
Loads a
|
|
181
|
-
|
|
224
|
+
Loads a PDF file with a single page and returns a bytes representation of this file. Can be converted into a numpy
|
|
225
|
+
array or passed directly to the `image` attribute of `Image`.
|
|
226
|
+
|
|
227
|
+
Example:
|
|
228
|
+
```python
|
|
229
|
+
load_bytes_from_pdf_file('document.pdf', page_number=0)
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
path: The path to a PDF file. If more pages are available, it will take the first page.
|
|
234
|
+
page_number: The page number to load. Raises `IndexError` if the document has fewer pages.
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
A bytes representation of the file.
|
|
182
238
|
|
|
183
|
-
:param path: A path to a pdf file. If more pages are available, it will take the first page.
|
|
184
|
-
:param page_number: If a document has less than page_number it will raise an `IndexError`
|
|
185
|
-
:return: A bytes' representation of the file, width and height
|
|
186
239
|
"""
|
|
187
240
|
|
|
188
241
|
assert is_file_extension(path, [".pdf"]), f"type not allowed: {path}"
|
|
@@ -197,7 +250,10 @@ def load_bytes_from_pdf_file(path: PathLikeOrStr, page_number: int = 0) -> B64:
|
|
|
197
250
|
|
|
198
251
|
class LoadImageFunc(Protocol):
|
|
199
252
|
"""
|
|
200
|
-
Protocol for typing load_image_from_file
|
|
253
|
+
Protocol for typing `load_image_from_file`.
|
|
254
|
+
|
|
255
|
+
Info:
|
|
256
|
+
This protocol defines the call signature for image loading functions.
|
|
201
257
|
"""
|
|
202
258
|
|
|
203
259
|
def __call__(self, path: PathLikeOrStr) -> Optional[PixelValues]:
|
|
@@ -208,10 +264,21 @@ def get_load_image_func(
|
|
|
208
264
|
path: PathLikeOrStr,
|
|
209
265
|
) -> Union[LoadImageFunc, Callable[[PathLikeOrStr], B64]]:
|
|
210
266
|
"""
|
|
211
|
-
|
|
267
|
+
Returns the loading function according to the file extension.
|
|
268
|
+
|
|
269
|
+
Example:
|
|
270
|
+
```python
|
|
271
|
+
get_load_image_func('image.png')
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
path: The path to a file.
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
The function that loads the file and converts it to the desired format.
|
|
212
279
|
|
|
213
|
-
:
|
|
214
|
-
|
|
280
|
+
Raises:
|
|
281
|
+
NotImplementedError: If the file extension is not supported.
|
|
215
282
|
"""
|
|
216
283
|
|
|
217
284
|
assert is_file_extension(path, [".png", ".jpeg", ".jpg", ".pdf", ".tif"]), f"image type not allowed: " f"{path}"
|
|
@@ -227,12 +294,19 @@ def get_load_image_func(
|
|
|
227
294
|
|
|
228
295
|
def maybe_path_or_pdf(path: PathLikeOrStr) -> int:
|
|
229
296
|
"""
|
|
230
|
-
Checks if the path points to a directory, a
|
|
231
|
-
directory, 2 if the path points to a pdf doc and 3 if path points to either a PNG, JPG or JPEG or 0 if none of the
|
|
232
|
-
previous is true.
|
|
297
|
+
Checks if the path points to a directory, a PDF document, or a single image.
|
|
233
298
|
|
|
234
|
-
:
|
|
235
|
-
|
|
299
|
+
Example:
|
|
300
|
+
```python
|
|
301
|
+
maybe_path_or_pdf('/path/to/file.pdf')
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
Args:
|
|
305
|
+
path: The path to check.
|
|
306
|
+
|
|
307
|
+
Returns:
|
|
308
|
+
`1` if the path points to a directory, `2` if it points to a PDF document, `3` if it points to a PNG, JPG,
|
|
309
|
+
JPEG, or TIF image, or `0` otherwise.
|
|
236
310
|
"""
|
|
237
311
|
|
|
238
312
|
if os.path.isdir(path):
|
|
@@ -247,10 +321,18 @@ def maybe_path_or_pdf(path: PathLikeOrStr) -> int:
|
|
|
247
321
|
|
|
248
322
|
def load_json(path_ann: PathLikeOrStr) -> JsonDict:
|
|
249
323
|
"""
|
|
250
|
-
|
|
324
|
+
Loads a JSON file.
|
|
325
|
+
|
|
326
|
+
Example:
|
|
327
|
+
```python
|
|
328
|
+
load_json('annotations.json')
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
path_ann: The path to the JSON file.
|
|
251
333
|
|
|
252
|
-
:
|
|
253
|
-
|
|
334
|
+
Returns:
|
|
335
|
+
The loaded JSON as a dictionary.
|
|
254
336
|
"""
|
|
255
337
|
with open(path_ann, "r", encoding="utf-8") as file:
|
|
256
338
|
json_dict = json.loads(file.read())
|
|
@@ -259,28 +341,50 @@ def load_json(path_ann: PathLikeOrStr) -> JsonDict:
|
|
|
259
341
|
|
|
260
342
|
def get_package_path() -> Path:
|
|
261
343
|
"""
|
|
262
|
-
|
|
344
|
+
Returns the full base path of this package.
|
|
345
|
+
|
|
346
|
+
Returns:
|
|
347
|
+
The base path of the package.
|
|
263
348
|
"""
|
|
264
349
|
return PATH
|
|
265
350
|
|
|
266
351
|
|
|
352
|
+
def get_cache_dir_path() -> Path:
|
|
353
|
+
"""
|
|
354
|
+
Returns the full base path to the cache directory.
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
The base path to the cache directory.
|
|
358
|
+
"""
|
|
359
|
+
return CACHE_DIR
|
|
360
|
+
|
|
361
|
+
|
|
267
362
|
def get_weights_dir_path() -> Path:
|
|
268
363
|
"""
|
|
269
|
-
|
|
364
|
+
Returns the full base path to the model directory.
|
|
365
|
+
|
|
366
|
+
Returns:
|
|
367
|
+
The base path to the model directory.
|
|
270
368
|
"""
|
|
271
369
|
return MODEL_DIR
|
|
272
370
|
|
|
273
371
|
|
|
274
372
|
def get_configs_dir_path() -> Path:
|
|
275
373
|
"""
|
|
276
|
-
|
|
374
|
+
Returns the full base path to the configs directory.
|
|
375
|
+
|
|
376
|
+
Returns:
|
|
377
|
+
The base path to the configs directory.
|
|
277
378
|
"""
|
|
278
379
|
return CONFIGS
|
|
279
380
|
|
|
280
381
|
|
|
281
382
|
def get_dataset_dir_path() -> Path:
|
|
282
383
|
"""
|
|
283
|
-
|
|
384
|
+
Returns the full base path to the dataset directory.
|
|
385
|
+
|
|
386
|
+
Returns:
|
|
387
|
+
The base path to the dataset directory.
|
|
284
388
|
"""
|
|
285
389
|
return DATASET_DIR
|
|
286
390
|
|
|
@@ -289,13 +393,21 @@ def maybe_copy_config_to_cache(
|
|
|
289
393
|
package_path: PathLikeOrStr, configs_dir_path: PathLikeOrStr, file_name: str, force_copy: bool = True
|
|
290
394
|
) -> str:
|
|
291
395
|
"""
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
:
|
|
295
|
-
|
|
296
|
-
|
|
396
|
+
Copies a file from the source directory to the target directory.
|
|
397
|
+
|
|
398
|
+
Example:
|
|
399
|
+
```python
|
|
400
|
+
maybe_copy_config_to_cache('/src', '/dst', 'config.yaml')
|
|
401
|
+
```
|
|
297
402
|
|
|
298
|
-
:
|
|
403
|
+
Args:
|
|
404
|
+
package_path: The base path to the source directory of the file.
|
|
405
|
+
configs_dir_path: The base path to the target directory.
|
|
406
|
+
file_name: The name of the file to copy.
|
|
407
|
+
force_copy: If `True`, will re-copy the file even if it already exists in the target directory.
|
|
408
|
+
|
|
409
|
+
Returns:
|
|
410
|
+
The path to the copied file.
|
|
299
411
|
"""
|
|
300
412
|
|
|
301
413
|
absolute_path_source = os.path.join(package_path, file_name)
|
|
@@ -309,14 +421,21 @@ def maybe_copy_config_to_cache(
|
|
|
309
421
|
@deprecated("Use pathlib operations instead", "2022-06-08")
|
|
310
422
|
def sub_path(anchor_dir: PathLikeOrStr, *paths: PathLikeOrStr) -> PathLikeOrStr:
|
|
311
423
|
"""
|
|
312
|
-
|
|
424
|
+
Generates a path from the anchor directory and additional path arguments.
|
|
425
|
+
|
|
426
|
+
Example:
|
|
427
|
+
```python
|
|
428
|
+
sub_path('/path/to', 'dir1', 'dir2')
|
|
429
|
+
```
|
|
313
430
|
|
|
314
|
-
|
|
431
|
+
Args:
|
|
432
|
+
anchor_dir: The anchor directory.
|
|
433
|
+
*paths: Additional directories to add to the path.
|
|
315
434
|
|
|
316
|
-
|
|
435
|
+
Returns:
|
|
436
|
+
The generated sub-path.
|
|
317
437
|
|
|
318
|
-
:
|
|
319
|
-
|
|
320
|
-
:return: sub_path
|
|
438
|
+
Note:
|
|
439
|
+
Deprecated. Use pathlib operations instead.
|
|
321
440
|
"""
|
|
322
441
|
return os.path.join(os.path.dirname(os.path.abspath(anchor_dir)), *paths)
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
# limitations under the License.
|
|
17
17
|
|
|
18
18
|
"""
|
|
19
|
-
|
|
19
|
+
Generating and checking uuids
|
|
20
20
|
"""
|
|
21
21
|
import hashlib
|
|
22
22
|
import uuid
|
|
@@ -28,14 +28,18 @@ __all__ = ["is_uuid_like", "get_uuid_from_str", "get_uuid"]
|
|
|
28
28
|
|
|
29
29
|
def is_uuid_like(input_id: str) -> bool:
|
|
30
30
|
"""
|
|
31
|
-
Check
|
|
31
|
+
Check if the input string has a UUID3 string representation format.
|
|
32
32
|
|
|
33
|
-
|
|
33
|
+
Example:
|
|
34
|
+
```python
|
|
35
|
+
is_uuid_like("886313e1-3b8a-5372-9b90-0c9aee199e5d")
|
|
36
|
+
```
|
|
34
37
|
|
|
35
|
-
|
|
38
|
+
Args:
|
|
39
|
+
input_id: An input string.
|
|
36
40
|
|
|
37
|
-
:
|
|
38
|
-
|
|
41
|
+
Returns:
|
|
42
|
+
A boolean output.
|
|
39
43
|
"""
|
|
40
44
|
try:
|
|
41
45
|
uuid.UUID(str(input_id))
|
|
@@ -46,20 +50,26 @@ def is_uuid_like(input_id: str) -> bool:
|
|
|
46
50
|
|
|
47
51
|
def get_uuid_from_str(input_id: str) -> str:
|
|
48
52
|
"""
|
|
49
|
-
|
|
53
|
+
Return a UUID3 string representation generated from an input string.
|
|
50
54
|
|
|
51
|
-
:
|
|
52
|
-
|
|
55
|
+
Args:
|
|
56
|
+
input_id: Input string.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
UUID3 string representation.
|
|
53
60
|
"""
|
|
54
61
|
return str(uuid.uuid3(uuid.NAMESPACE_DNS, input_id))
|
|
55
62
|
|
|
56
63
|
|
|
57
64
|
def get_uuid(*inputs: str) -> str:
|
|
58
65
|
"""
|
|
59
|
-
Set
|
|
66
|
+
Set a UUID generated by the concatenation of string inputs.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
*inputs: String inputs.
|
|
60
70
|
|
|
61
|
-
:
|
|
62
|
-
|
|
71
|
+
Returns:
|
|
72
|
+
UUID3 string representation.
|
|
63
73
|
"""
|
|
64
74
|
str_input = "".join(inputs)
|
|
65
75
|
return get_uuid_from_str(str_input)
|
|
@@ -67,11 +77,14 @@ def get_uuid(*inputs: str) -> str:
|
|
|
67
77
|
|
|
68
78
|
def get_md5_hash(path: PathLikeOrStr, buffer_size: int = 65536) -> str:
|
|
69
79
|
"""
|
|
70
|
-
Calculate
|
|
80
|
+
Calculate an MD5 hash for a given file.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
path: Path to a file.
|
|
84
|
+
buffer_size: Will calculate the hash in chunks.
|
|
71
85
|
|
|
72
|
-
:
|
|
73
|
-
|
|
74
|
-
:return: md5 string
|
|
86
|
+
Returns:
|
|
87
|
+
MD5 string.
|
|
75
88
|
"""
|
|
76
89
|
|
|
77
90
|
hash_md5 = hashlib.md5()
|
deepdoctection/utils/logger.py
CHANGED
|
@@ -3,23 +3,23 @@
|
|
|
3
3
|
|
|
4
4
|
# Copyright (c) Tensorpack Contributors
|
|
5
5
|
# Licensed under the Apache License, Version 2.0 (the "License")
|
|
6
|
+
|
|
6
7
|
"""
|
|
7
8
|
This file is modified from
|
|
8
|
-
https://github.com/tensorpack/tensorpack/blob/master/tensorpack/utils/logger.py
|
|
9
|
+
<https://github.com/tensorpack/tensorpack/blob/master/tensorpack/utils/logger.py>
|
|
9
10
|
|
|
10
11
|
The logger module itself has the common logging functions of Python's
|
|
11
12
|
`logging.Logger`.
|
|
12
13
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
from deepdoctection.utils.logger import logger
|
|
14
|
+
Example:
|
|
15
|
+
```python
|
|
16
|
+
from deepdoctection.utils.logger import logger
|
|
18
17
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
18
|
+
logger.set_logger_dir("path/to/dir")
|
|
19
|
+
logger.info("Something has happened")
|
|
20
|
+
logger.warning("Attention!")
|
|
21
|
+
logger.error("Error happened!")
|
|
22
|
+
```
|
|
23
23
|
|
|
24
24
|
Log levels can be set via the environment variable `LOG_LEVEL` (default: INFO).
|
|
25
25
|
`STD_OUT_VERBOSE` will print a verbose message to the terminal (default: False).
|
|
@@ -49,7 +49,17 @@ ENV_VARS_TRUE: set[str] = {"1", "True", "TRUE", "true", "yes"}
|
|
|
49
49
|
|
|
50
50
|
@dataclass
|
|
51
51
|
class LoggingRecord:
|
|
52
|
-
"""
|
|
52
|
+
"""
|
|
53
|
+
`LoggingRecord` to pass to the logger in order to distinguish from third party libraries.
|
|
54
|
+
|
|
55
|
+
Note:
|
|
56
|
+
`log_dict` will be added to the log record as a dict.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
msg: The log message.
|
|
60
|
+
log_dict: Optional dictionary to add to the log record.
|
|
61
|
+
|
|
62
|
+
"""
|
|
53
63
|
|
|
54
64
|
msg: str
|
|
55
65
|
log_dict: Optional[dict[Union[int, str], Any]] = field(default=None)
|
|
@@ -192,17 +202,16 @@ def set_logger_dir(dir_name: PathLikeOrStr, action: Optional[str] = None) -> Non
|
|
|
192
202
|
"""
|
|
193
203
|
Set the directory for global logging.
|
|
194
204
|
|
|
195
|
-
:
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
old states for you. It simply does nothing.
|
|
205
|
+
Args:
|
|
206
|
+
dir_name: Log directory.
|
|
207
|
+
action: An action of ["k", "d", "q"] to be performed when the directory exists. Will ask user by default.
|
|
208
|
+
"d": Delete the directory. Note that the deletion may fail when the directory is used by tensorboard.
|
|
209
|
+
"k": Keep the directory. This is useful when you resume from a previous training and want the directory to
|
|
210
|
+
look as if the training was not interrupted.
|
|
211
|
+
Note that this option does not load old models or any other old states for you. It simply does nothing.
|
|
212
|
+
|
|
213
|
+
Raises:
|
|
214
|
+
OSError: If the directory exists and an invalid action is selected.
|
|
206
215
|
"""
|
|
207
216
|
if isinstance(dir_name, Path):
|
|
208
217
|
dir_name = dir_name.as_posix()
|
|
@@ -253,10 +262,11 @@ def set_logger_dir(dir_name: PathLikeOrStr, action: Optional[str] = None) -> Non
|
|
|
253
262
|
def auto_set_dir(action: Optional[str] = None, name: Optional[str] = None) -> None:
|
|
254
263
|
"""
|
|
255
264
|
Will set the log directory to './train_log/{script_name}:{name}'.
|
|
256
|
-
|
|
265
|
+
`script_name` is the name of the main python file currently running.
|
|
257
266
|
|
|
258
|
-
:
|
|
259
|
-
|
|
267
|
+
Args:
|
|
268
|
+
action: An action of ["k", "d", "q"] to be performed (see also `set_logger_dir`).
|
|
269
|
+
name: Optional suffix of file name.
|
|
260
270
|
"""
|
|
261
271
|
|
|
262
272
|
mod = sys.modules["__main__"]
|
|
@@ -269,8 +279,10 @@ def auto_set_dir(action: Optional[str] = None, name: Optional[str] = None) -> No
|
|
|
269
279
|
|
|
270
280
|
def get_logger_dir() -> Optional[PathLikeOrStr]:
|
|
271
281
|
"""
|
|
272
|
-
The logger directory, or None if not set.
|
|
273
|
-
|
|
282
|
+
The logger directory, or `None` if not set.
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
The directory used for general logging, tensorboard events, checkpoints, etc.
|
|
274
286
|
"""
|
|
275
287
|
return _LOG_DIR
|
|
276
288
|
|
|
@@ -278,11 +290,16 @@ def get_logger_dir() -> Optional[PathLikeOrStr]:
|
|
|
278
290
|
@functools.lru_cache(maxsize=None)
|
|
279
291
|
def log_once(message: str, function: str = "info") -> None:
|
|
280
292
|
"""
|
|
281
|
-
Log certain message only once.
|
|
282
|
-
the same message will result in no
|
|
293
|
+
Log certain message only once. Calling this function more than once with
|
|
294
|
+
the same message will result in no operation.
|
|
295
|
+
|
|
296
|
+
Example:
|
|
297
|
+
```python
|
|
298
|
+
log_once("This will only be logged once", "info")
|
|
299
|
+
```
|
|
283
300
|
|
|
284
|
-
:
|
|
285
|
-
|
|
286
|
-
|
|
301
|
+
Args:
|
|
302
|
+
message: Message to log.
|
|
303
|
+
function: The name of the logger method. For example, "info", "warn", "error".
|
|
287
304
|
"""
|
|
288
305
|
getattr(logger, function)(message)
|