deepdoctection 0.42.1__py3-none-any.whl → 0.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (124) hide show
  1. deepdoctection/__init__.py +2 -1
  2. deepdoctection/analyzer/__init__.py +2 -1
  3. deepdoctection/analyzer/config.py +904 -0
  4. deepdoctection/analyzer/dd.py +36 -62
  5. deepdoctection/analyzer/factory.py +311 -141
  6. deepdoctection/configs/conf_dd_one.yaml +100 -44
  7. deepdoctection/configs/profiles.jsonl +32 -0
  8. deepdoctection/dataflow/__init__.py +9 -6
  9. deepdoctection/dataflow/base.py +33 -15
  10. deepdoctection/dataflow/common.py +96 -75
  11. deepdoctection/dataflow/custom.py +36 -29
  12. deepdoctection/dataflow/custom_serialize.py +135 -91
  13. deepdoctection/dataflow/parallel_map.py +33 -31
  14. deepdoctection/dataflow/serialize.py +15 -10
  15. deepdoctection/dataflow/stats.py +41 -28
  16. deepdoctection/datapoint/__init__.py +4 -6
  17. deepdoctection/datapoint/annotation.py +104 -66
  18. deepdoctection/datapoint/box.py +190 -130
  19. deepdoctection/datapoint/convert.py +66 -39
  20. deepdoctection/datapoint/image.py +151 -95
  21. deepdoctection/datapoint/view.py +383 -236
  22. deepdoctection/datasets/__init__.py +2 -6
  23. deepdoctection/datasets/adapter.py +11 -11
  24. deepdoctection/datasets/base.py +118 -81
  25. deepdoctection/datasets/dataflow_builder.py +18 -12
  26. deepdoctection/datasets/info.py +76 -57
  27. deepdoctection/datasets/instances/__init__.py +6 -2
  28. deepdoctection/datasets/instances/doclaynet.py +17 -14
  29. deepdoctection/datasets/instances/fintabnet.py +16 -22
  30. deepdoctection/datasets/instances/funsd.py +11 -6
  31. deepdoctection/datasets/instances/iiitar13k.py +9 -9
  32. deepdoctection/datasets/instances/layouttest.py +9 -9
  33. deepdoctection/datasets/instances/publaynet.py +9 -9
  34. deepdoctection/datasets/instances/pubtables1m.py +13 -13
  35. deepdoctection/datasets/instances/pubtabnet.py +13 -15
  36. deepdoctection/datasets/instances/rvlcdip.py +8 -8
  37. deepdoctection/datasets/instances/xfund.py +11 -9
  38. deepdoctection/datasets/registry.py +18 -11
  39. deepdoctection/datasets/save.py +12 -11
  40. deepdoctection/eval/__init__.py +3 -2
  41. deepdoctection/eval/accmetric.py +72 -52
  42. deepdoctection/eval/base.py +29 -10
  43. deepdoctection/eval/cocometric.py +14 -12
  44. deepdoctection/eval/eval.py +56 -41
  45. deepdoctection/eval/registry.py +6 -3
  46. deepdoctection/eval/tedsmetric.py +24 -9
  47. deepdoctection/eval/tp_eval_callback.py +13 -12
  48. deepdoctection/extern/__init__.py +1 -1
  49. deepdoctection/extern/base.py +176 -97
  50. deepdoctection/extern/d2detect.py +127 -92
  51. deepdoctection/extern/deskew.py +19 -10
  52. deepdoctection/extern/doctrocr.py +157 -106
  53. deepdoctection/extern/fastlang.py +25 -17
  54. deepdoctection/extern/hfdetr.py +137 -60
  55. deepdoctection/extern/hflayoutlm.py +329 -248
  56. deepdoctection/extern/hflm.py +67 -33
  57. deepdoctection/extern/model.py +108 -762
  58. deepdoctection/extern/pdftext.py +37 -12
  59. deepdoctection/extern/pt/nms.py +15 -1
  60. deepdoctection/extern/pt/ptutils.py +13 -9
  61. deepdoctection/extern/tessocr.py +87 -54
  62. deepdoctection/extern/texocr.py +29 -14
  63. deepdoctection/extern/tp/tfutils.py +36 -8
  64. deepdoctection/extern/tp/tpcompat.py +54 -16
  65. deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
  66. deepdoctection/extern/tpdetect.py +4 -2
  67. deepdoctection/mapper/__init__.py +1 -1
  68. deepdoctection/mapper/cats.py +117 -76
  69. deepdoctection/mapper/cocostruct.py +35 -17
  70. deepdoctection/mapper/d2struct.py +56 -29
  71. deepdoctection/mapper/hfstruct.py +32 -19
  72. deepdoctection/mapper/laylmstruct.py +221 -185
  73. deepdoctection/mapper/maputils.py +71 -35
  74. deepdoctection/mapper/match.py +76 -62
  75. deepdoctection/mapper/misc.py +68 -44
  76. deepdoctection/mapper/pascalstruct.py +13 -12
  77. deepdoctection/mapper/prodigystruct.py +33 -19
  78. deepdoctection/mapper/pubstruct.py +42 -32
  79. deepdoctection/mapper/tpstruct.py +39 -19
  80. deepdoctection/mapper/xfundstruct.py +20 -13
  81. deepdoctection/pipe/__init__.py +1 -2
  82. deepdoctection/pipe/anngen.py +104 -62
  83. deepdoctection/pipe/base.py +226 -107
  84. deepdoctection/pipe/common.py +206 -123
  85. deepdoctection/pipe/concurrency.py +74 -47
  86. deepdoctection/pipe/doctectionpipe.py +108 -47
  87. deepdoctection/pipe/language.py +41 -24
  88. deepdoctection/pipe/layout.py +45 -18
  89. deepdoctection/pipe/lm.py +146 -78
  90. deepdoctection/pipe/order.py +196 -113
  91. deepdoctection/pipe/refine.py +111 -63
  92. deepdoctection/pipe/registry.py +1 -1
  93. deepdoctection/pipe/segment.py +213 -142
  94. deepdoctection/pipe/sub_layout.py +76 -46
  95. deepdoctection/pipe/text.py +52 -33
  96. deepdoctection/pipe/transform.py +8 -6
  97. deepdoctection/train/d2_frcnn_train.py +87 -69
  98. deepdoctection/train/hf_detr_train.py +72 -40
  99. deepdoctection/train/hf_layoutlm_train.py +85 -46
  100. deepdoctection/train/tp_frcnn_train.py +56 -28
  101. deepdoctection/utils/concurrency.py +59 -16
  102. deepdoctection/utils/context.py +40 -19
  103. deepdoctection/utils/develop.py +25 -17
  104. deepdoctection/utils/env_info.py +85 -36
  105. deepdoctection/utils/error.py +16 -10
  106. deepdoctection/utils/file_utils.py +246 -62
  107. deepdoctection/utils/fs.py +162 -43
  108. deepdoctection/utils/identifier.py +29 -16
  109. deepdoctection/utils/logger.py +49 -32
  110. deepdoctection/utils/metacfg.py +83 -21
  111. deepdoctection/utils/pdf_utils.py +119 -62
  112. deepdoctection/utils/settings.py +24 -10
  113. deepdoctection/utils/tqdm.py +10 -5
  114. deepdoctection/utils/transform.py +182 -46
  115. deepdoctection/utils/utils.py +61 -28
  116. deepdoctection/utils/viz.py +150 -104
  117. deepdoctection-0.43.dist-info/METADATA +376 -0
  118. deepdoctection-0.43.dist-info/RECORD +149 -0
  119. deepdoctection/analyzer/_config.py +0 -146
  120. deepdoctection-0.42.1.dist-info/METADATA +0 -431
  121. deepdoctection-0.42.1.dist-info/RECORD +0 -148
  122. {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/WHEEL +0 -0
  123. {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
  124. {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0
@@ -50,8 +50,11 @@ __all__ = [
50
50
 
51
51
  def box_to_point4(boxes: npt.NDArray[np.float32]) -> npt.NDArray[np.float32]:
52
52
  """
53
- :param boxes: nx4
54
- :return: (nx4)x2
53
+ Args:
54
+ boxes: nx4
55
+
56
+ Returns:
57
+ (nx4)x2
55
58
  """
56
59
  box = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]]
57
60
  box = box.reshape((-1, 2))
@@ -60,8 +63,11 @@ def box_to_point4(boxes: npt.NDArray[np.float32]) -> npt.NDArray[np.float32]:
60
63
 
61
64
  def point4_to_box(points: npt.NDArray[np.float32]) -> npt.NDArray[np.float32]:
62
65
  """
63
- :param points: (nx4)x2
64
- :return: nx4 boxes (x1y1x2y2)
66
+ Args:
67
+ points: (nx4)x2
68
+
69
+ Returns:
70
+ nx4 boxes (`x1y1x2y2`)
65
71
  """
66
72
  points = points.reshape((-1, 4, 2))
67
73
  min_xy = points.min(axis=1) # nx2
@@ -72,32 +78,52 @@ def point4_to_box(points: npt.NDArray[np.float32]) -> npt.NDArray[np.float32]:
72
78
  class BaseTransform(ABC):
73
79
  """
74
80
  A deterministic image transformation. This class is also the place to provide a default implementation to any
75
- `apply_xxx` method. The current default is to raise NotImplementedError in any such methods.
76
- All subclasses should implement `apply_image`. The image should be of type uint8 in range [0, 255], or
81
+ `apply_xxx` method. The current default is to raise `NotImplementedError` in any such methods.
82
+ All subclasses should implement `apply_image`. The image should be of type `uint8` in range [0, 255], or
77
83
  floating point images in range [0, 1] or [0, 255]. Some subclasses may implement `apply_coords`, when applicable.
78
84
  It should take and return a numpy array of Nx2, where each row is the (x, y) coordinate.
79
85
  The implementation of each method may choose to modify its input data in-place for efficient transformation.
86
+
87
+ Note:
88
+ All subclasses should implement `apply_image`. Some may implement `apply_coords`.
80
89
  """
81
90
 
82
91
  @abstractmethod
83
92
  def apply_image(self, img: PixelValues) -> PixelValues:
84
- """The transformation that should be applied to the image"""
93
+ """
94
+ The transformation that should be applied to the image.
95
+
96
+ Raises:
97
+ NotImplementedError:
98
+ """
85
99
  raise NotImplementedError()
86
100
 
87
101
  @abstractmethod
88
102
  def apply_coords(self, coords: npt.NDArray[float32]) -> npt.NDArray[float32]:
89
- """Transformation that should be applied to coordinates. Coords are supposed to to be passed as like
103
+ """
104
+ Transformation that should be applied to coordinates. Coords are supposed to be passed as like
90
105
 
106
+ ```python
91
107
  np.array([[ulx_0,uly_0,lrx_0,lry_0],[ulx_1,uly_1,lrx_1,lry_1],...])
108
+ ```
109
+
110
+ Raises:
111
+ NotImplementedError:
92
112
  """
93
113
  raise NotImplementedError()
94
114
 
95
115
  @abstractmethod
96
116
  def inverse_apply_coords(self, coords: npt.NDArray[float32]) -> npt.NDArray[float32]:
97
- """Inverse transformation going back from coordinates of transformed image to original image. Coords are
98
- supposed to to be passed as like
117
+ """
118
+ Inverse transformation going back from coordinates of transformed image to original image. Coords are
119
+ supposed to be passed as like
120
+
121
+ ```python
122
+ np.array([[ulx_0,uly_0,lrx_0,lry_0],[ulx_1,uly_1,lrx_1,lry_1],...])
123
+ ```
99
124
 
100
- np.array([[ulx_0,uly_0,lrx_0,lry_0],[ulx_1,uly_1,lrx_1,lry_1],...])
125
+ Raises:
126
+ NotImplementedError:
101
127
  """
102
128
  raise NotImplementedError()
103
129
 
@@ -115,6 +141,8 @@ class BaseTransform(ABC):
115
141
  class ResizeTransform(BaseTransform):
116
142
  """
117
143
  Resize the image.
144
+
145
+
118
146
  """
119
147
 
120
148
  def __init__(
@@ -126,12 +154,14 @@ class ResizeTransform(BaseTransform):
126
154
  interp: str,
127
155
  ):
128
156
  """
129
- :param h: height
130
- :param w: width
131
- :param new_h: target height
132
- :param new_w: target width
133
- :param interp: interpolation method, that depends on the image processing library. Currently, it supports
134
- NEAREST, BOX, BILINEAR, BICUBIC and VIZ for PIL or INTER_NEAREST, INTER_LINEAR, INTER_AREA or VIZ for OpenCV
157
+ Args:
158
+ h: Height.
159
+ w: Width.
160
+ new_h: Target height.
161
+ new_w: Target width.
162
+ interp: Interpolation method, that depends on the image processing library. Currently, it supports
163
+ `NEAREST`, `BOX`, `BILINEAR`, `BICUBIC` and `VIZ` for PIL or `INTER_NEAREST`, `INTER_LINEAR`,
164
+ `INTER_AREA` or `VIZ` for OpenCV.
135
165
  """
136
166
  self.h = h
137
167
  self.w = w
@@ -140,6 +170,18 @@ class ResizeTransform(BaseTransform):
140
170
  self.interp = interp
141
171
 
142
172
  def apply_image(self, img: PixelValues) -> PixelValues:
173
+ """
174
+ Apply the resize transformation to the image.
175
+
176
+ Args:
177
+ img: Image to be resized.
178
+
179
+ Returns:
180
+ Resized image.
181
+
182
+ Raises:
183
+ AssertionError: If the input image shape does not match the expected height and width.
184
+ """
143
185
  assert img.shape[:2] == (self.h, self.w)
144
186
  ret = viz_handler.resize(img, self.new_w, self.new_h, self.interp)
145
187
  if img.ndim == 3 and ret.ndim == 2:
@@ -147,14 +189,30 @@ class ResizeTransform(BaseTransform):
147
189
  return ret
148
190
 
149
191
  def apply_coords(self, coords: npt.NDArray[float32]) -> npt.NDArray[float32]:
150
- """Transformation that should be applied to coordinates. Coords are supposed to to be passed as
151
- np array of points"""
192
+ """
193
+ Transformation that should be applied to coordinates. Coords are supposed to be passed as
194
+ numpy array of points.
195
+
196
+ Args:
197
+ coords: Coordinates to be transformed.
198
+
199
+ Returns:
200
+ Transformed coordinates.
201
+ """
152
202
  coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w)
153
203
  coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h)
154
204
  return coords
155
205
 
156
206
  def inverse_apply_coords(self, coords: npt.NDArray[float32]) -> npt.NDArray[float32]:
157
- """Inverse transformation going back from coordinates of resized image to original image"""
207
+ """
208
+ Inverse transformation going back from coordinates of resized image to original image.
209
+
210
+ Args:
211
+ coords: Coordinates to be inversely transformed.
212
+
213
+ Returns:
214
+ Inversely transformed coordinates.
215
+ """
158
216
  coords[:, 0] = coords[:, 0] * (self.w * 1.0 / self.new_w)
159
217
  coords[:, 1] = coords[:, 1] * (self.h * 1.0 / self.new_h)
160
218
  return coords
@@ -172,8 +230,10 @@ class InferenceResize:
172
230
 
173
231
  def __init__(self, short_edge_length: int, max_size: int, interp: str = "VIZ") -> None:
174
232
  """
175
- :param short_edge_length: a [min, max] interval from which to sample the shortest edge length.
176
- :param max_size: maximum allowed longest edge length.
233
+ Args:
234
+ short_edge_length: A [min, max] interval from which to sample the shortest edge length.
235
+ max_size: Maximum allowed longest edge length.
236
+ interp: Interpolation method.
177
237
  """
178
238
  self.short_edge_length = short_edge_length
179
239
  self.max_size = max_size
@@ -181,7 +241,13 @@ class InferenceResize:
181
241
 
182
242
  def get_transform(self, img: PixelValues) -> ResizeTransform:
183
243
  """
184
- get transform
244
+ Get the `ResizeTransform` for the image.
245
+
246
+ Args:
247
+ img: Image to be transformed.
248
+
249
+ Returns:
250
+ `ResizeTransform` object.
185
251
  """
186
252
  h, w = img.shape[:2]
187
253
  new_w: Union[int, float]
@@ -208,22 +274,31 @@ def normalize_image(
208
274
  """
209
275
  Preprocess pixel values of an image by rescaling.
210
276
 
211
- :param image: image as np.array
212
- :param pixel_mean: (3,) array
213
- :param pixel_std: (3,) array
277
+ Args:
278
+ image: Image as numpy array.
279
+ pixel_mean: (3,) array.
280
+ pixel_std: (3,) array.
281
+
282
+ Returns:
283
+ Normalized image.
214
284
  """
215
285
  return (image - pixel_mean) * (1.0 / pixel_std)
216
286
 
217
287
 
218
288
  def pad_image(image: PixelValues, top: int, right: int, bottom: int, left: int) -> PixelValues:
219
- """Pad an image with white color and with given top/bottom/right/left pixel values. Only white padding is
220
- currently supported
221
-
222
- :param image: image as np.array
223
- :param top: Top pixel value to pad
224
- :param right: Right pixel value to pad
225
- :param bottom: Bottom pixel value to pad
226
- :param left: Left pixel value to pad
289
+ """
290
+ Pad an image with white color and with given top/bottom/right/left pixel values. Only white padding is
291
+ currently supported.
292
+
293
+ Args:
294
+ image: Image as numpy array.
295
+ top: Top pixel value to pad.
296
+ right: Right pixel value to pad.
297
+ bottom: Bottom pixel value to pad.
298
+ left: Left pixel value to pad.
299
+
300
+ Returns:
301
+ Padded image.
227
302
  """
228
303
  return np.pad(image, ((top, bottom), (left, right), (0, 0)), "constant", constant_values=255)
229
304
 
@@ -241,11 +316,15 @@ class PadTransform(BaseTransform):
241
316
  pad_left: int,
242
317
  ):
243
318
  """
244
- :param pad_top: padding top image side
245
- :param pad_right: padding right image side
246
- :param pad_bottom: padding bottom image side
247
- :param pad_left: padding left image side
319
+ A transform for padding images left/right/top/bottom-wise.
320
+
321
+ Args:
322
+ pad_top: Padding top image side.
323
+ pad_right: Padding right image side.
324
+ pad_bottom: Padding bottom image side.
325
+ pad_left: Padding left image side.
248
326
  """
327
+
249
328
  self.pad_top = pad_top
250
329
  self.pad_right = pad_right
251
330
  self.pad_bottom = pad_bottom
@@ -254,13 +333,29 @@ class PadTransform(BaseTransform):
254
333
  self.image_height: Optional[int] = None
255
334
 
256
335
  def apply_image(self, img: PixelValues) -> PixelValues:
257
- """Apply padding to image"""
336
+ """
337
+ Apply padding to image.
338
+
339
+ Args:
340
+ img: Image to be padded.
341
+
342
+ Returns:
343
+ Padded image.
344
+ """
258
345
  self.image_width = img.shape[1]
259
346
  self.image_height = img.shape[0]
260
347
  return pad_image(img, self.pad_top, self.pad_right, self.pad_bottom, self.pad_left)
261
348
 
262
349
  def apply_coords(self, coords: npt.NDArray[float32]) -> npt.NDArray[float32]:
263
- """Transformation that should be applied to coordinates"""
350
+ """
351
+ Transformation that should be applied to coordinates.
352
+
353
+ Args:
354
+ coords: Coordinates to be transformed.
355
+
356
+ Returns:
357
+ Transformed coordinates.
358
+ """
264
359
  coords[:, 0] = coords[:, 0] + self.pad_left
265
360
  coords[:, 1] = coords[:, 1] + self.pad_top
266
361
  coords[:, 2] = coords[:, 2] + self.pad_left
@@ -268,7 +363,18 @@ class PadTransform(BaseTransform):
268
363
  return coords
269
364
 
270
365
  def inverse_apply_coords(self, coords: npt.NDArray[float32]) -> npt.NDArray[float32]:
271
- """Inverse transformation going back from coordinates of padded image to original image"""
366
+ """
367
+ Inverse transformation going back from coordinates of padded image to original image.
368
+
369
+ Args:
370
+ coords: Coordinates to be inversely transformed.
371
+
372
+ Returns:
373
+ Inversely transformed coordinates.
374
+
375
+ Raises:
376
+ ValueError: If `image_width` and `image_height` are not initialized.
377
+ """
272
378
  if self.image_height is None or self.image_width is None:
273
379
  raise ValueError("Initialize image_width and image_height first")
274
380
  coords[:, 0] = np.maximum(coords[:, 0] - self.pad_left, np.zeros(coords[:, 0].shape))
@@ -298,21 +404,40 @@ class RotationTransform(BaseTransform):
298
404
 
299
405
  def __init__(self, angle: Literal[90, 180, 270, 360]):
300
406
  """
301
- :param angle: angle to rotate the image. Must be one of 90, 180, 270, or 360 degrees.
302
- :param mode: coordinate format - "xyxy" (bounding box), "xywh" (width/height), "poly" (polygon points)
407
+ Args:
408
+ angle: Angle to rotate the image. Must be one of 90, 180, 270, or 360 degrees.
303
409
  """
304
410
  self.angle = angle
305
411
  self.image_width: Optional[int] = None
306
412
  self.image_height: Optional[int] = None
307
413
 
308
414
  def apply_image(self, img: PixelValues) -> PixelValues:
309
- """Apply rotation to image"""
415
+ """
416
+ Apply rotation to image.
417
+
418
+ Args:
419
+ img: Image to be rotated.
420
+
421
+ Returns:
422
+ Rotated image.
423
+ """
310
424
  self.image_width = img.shape[1]
311
425
  self.image_height = img.shape[0]
312
426
  return viz_handler.rotate_image(img, self.angle)
313
427
 
314
428
  def apply_coords(self, coords: npt.NDArray[float32]) -> npt.NDArray[float32]:
315
- """Transformation that should be applied to coordinates"""
429
+ """
430
+ Transformation that should be applied to coordinates.
431
+
432
+ Args:
433
+ coords: Coordinates to be transformed.
434
+
435
+ Returns:
436
+ Transformed coordinates.
437
+
438
+ Raises:
439
+ ValueError: If `image_width` and `image_height` are not initialized.
440
+ """
316
441
  if self.image_width is None or self.image_height is None:
317
442
  raise ValueError("Initialize image_width and image_height first")
318
443
 
@@ -332,7 +457,18 @@ class RotationTransform(BaseTransform):
332
457
  return coords
333
458
 
334
459
  def inverse_apply_coords(self, coords: npt.NDArray[float32]) -> npt.NDArray[float32]:
335
- """Inverse transformation going back from coordinates of rotated image to original image"""
460
+ """
461
+ Inverse transformation going back from coordinates of rotated image to original image.
462
+
463
+ Args:
464
+ coords: Coordinates to be inversely transformed.
465
+
466
+ Returns:
467
+ Inversely transformed coordinates.
468
+
469
+ Raises:
470
+ ValueError: If `image_width` and `image_height` are not initialized.
471
+ """
336
472
  if self.image_width is None or self.image_height is None:
337
473
  raise ValueError("Initialize image_width and image_height first")
338
474
 
@@ -34,11 +34,14 @@ def delete_keys_from_dict(
34
34
  dictionary: Union[dict[Any, Any], MutableMapping], keys: Union[str, list[str], set[str]] # type: ignore
35
35
  ) -> dict[Any, Any]:
36
36
  """
37
- Removing key/value pairs from dictionary. Works for nested dicts as well.
37
+ Removes key/value pairs from a `dictionary`. Works for nested dictionaries as well.
38
38
 
39
- :param dictionary: A input dictionary
40
- :param keys: A single or list of keys
41
- :return: The modified dictionary with listed keys removed
39
+ Args:
40
+ dictionary: An input dictionary.
41
+ keys: A single key or a list of keys.
42
+
43
+ Returns:
44
+ The modified dictionary with the specified keys removed.
42
45
  """
43
46
 
44
47
  if isinstance(keys, str):
@@ -64,16 +67,26 @@ def delete_keys_from_dict(
64
67
 
65
68
  def split_string(input_string: str) -> list[str]:
66
69
  """
67
- Takes a string, splits between commas and returns a list with split components as list elements
70
+ Splits an `input_string` by commas and returns a list of the split components.
71
+
72
+ Args:
73
+ input_string: The input string.
68
74
 
69
- :param input_string: input
75
+ Returns:
76
+ A list of string components.
70
77
  """
71
78
  return input_string.split(",")
72
79
 
73
80
 
74
81
  def string_to_dict(input_string: str) -> dict[str, str]:
75
82
  """
76
- Takes a string of a form `key1=val1,key2=val2` and returns the corresponding dict
83
+ Converts an `input_string` of the form `key1=val1,key2=val2` into a dictionary.
84
+
85
+ Args:
86
+ input_string: The input string.
87
+
88
+ Returns:
89
+ The corresponding dictionary.
77
90
  """
78
91
  items_list = input_string.split(",")
79
92
  output_dict = {}
@@ -85,10 +98,13 @@ def string_to_dict(input_string: str) -> dict[str, str]:
85
98
 
86
99
  def to_bool(inputs: Union[str, bool, int]) -> bool:
87
100
  """
88
- Convert a string "True" or "False" to its boolean value
101
+ Converts a string "True" or "False" to its boolean value.
102
+
103
+ Args:
104
+ inputs: Input string, boolean, or integer.
89
105
 
90
- :param inputs: Input string
91
- :return: boolean value
106
+ Returns:
107
+ The boolean value.
92
108
  """
93
109
  if isinstance(inputs, bool):
94
110
  return inputs
@@ -103,9 +119,17 @@ def to_bool(inputs: Union[str, bool, int]) -> bool:
103
119
 
104
120
  def call_only_once(func: Callable[..., Any]) -> Callable[..., Any]:
105
121
  """
106
- Decorate a method or property of a class, so that this method can only
107
- be called once for every instance.
108
- Calling it more than once will result in exception.
122
+ Decorates a method or property of a class so that it can only be called once for every instance.
123
+ Calling it more than once will result in an exception.
124
+
125
+ Args:
126
+ func: The method or property to decorate.
127
+
128
+ Returns:
129
+ The decorated function.
130
+
131
+ Note:
132
+ Use `call_only_once` only on methods or properties.
109
133
  """
110
134
 
111
135
  @functools.wraps(func)
@@ -135,10 +159,13 @@ def call_only_once(func: Callable[..., Any]) -> Callable[..., Any]:
135
159
  # taken from https://github.com/tensorpack/dataflow/blob/master/dataflow/utils/utils.py
136
160
  def get_rng(obj: Any = None) -> np.random.RandomState:
137
161
  """
138
- Get a good RNG seeded with time, pid and the object.
162
+ Gets a good random number generator seeded with time, process id, and the object.
163
+
164
+ Args:
165
+ obj: Some object to use to generate the random seed.
139
166
 
140
- :param obj: some object to use to generate random seed.
141
- :return: np.random.RandomState: the RNG.
167
+ Returns:
168
+ The random number generator.
142
169
  """
143
170
  seed = (id(obj) + os.getpid() + int(datetime.now().strftime("%Y%m%d%H%M%S%f"))) % 4294967295
144
171
  return np.random.RandomState(seed)
@@ -146,11 +173,14 @@ def get_rng(obj: Any = None) -> np.random.RandomState:
146
173
 
147
174
  def is_file_extension(file_name: PathLikeOrStr, extension: Union[str, Sequence[str]]) -> bool:
148
175
  """
149
- Check if a given file name has a given extension
176
+ Checks if a given `file_name` has a given `extension`.
150
177
 
151
- :param file_name: the file name, either full along with path or as stand alone
152
- :param extension: the extension of the file. Must add a dot (.)
153
- :return: True/False
178
+ Args:
179
+ file_name: The file name, either full path or standalone.
180
+ extension: The extension of the file. Must include a dot (e.g., `.txt`).
181
+
182
+ Returns:
183
+ True if the file has the given extension, False otherwise.
154
184
  """
155
185
  if isinstance(extension, str):
156
186
  return os.path.splitext(file_name)[-1].lower() == extension
@@ -159,20 +189,23 @@ def is_file_extension(file_name: PathLikeOrStr, extension: Union[str, Sequence[s
159
189
 
160
190
  def partition_list(base_list: list[str], stop_value: str) -> list[list[str]]:
161
191
  """
162
- Partitions a list of strings into sublists, where each sublist starts with the first occurrence of the stop value.
163
- Consecutive stop values are grouped together in the same sublist.
192
+ Partitions a list of strings into sublists, where each sublist starts with the first occurrence of the `stop_value`.
193
+ Consecutive `stop_value` elements are grouped together in the same sublist.
164
194
 
165
- :param base_list: The list of strings to be partitioned.
166
- :param stop_value: The string value that indicates the start of a new partition.
167
- :return: A list of lists, where each sublist is a partition of the original list.
195
+ Args:
196
+ base_list: The list of strings to be partitioned.
197
+ stop_value: The string value that indicates the start of a new partition.
168
198
 
169
- ** Example:**
199
+ Returns:
200
+ A list of lists, where each sublist is a partition of the original list.
170
201
 
202
+ Example:
203
+ ```python
171
204
  strings = ['a', 'a', 'c', 'c', 'b', 'd', 'c', 'c', 'a', 'b', 'a', 'b', 'a', 'a']
172
205
  stop_string = 'a'
173
206
  partition_list(strings, stop_string)
174
-
175
- # Output [['a', 'a', 'c', 'c', 'b', 'd', 'c', 'c'], ['a', 'b'], ['a', 'b'], ['a', 'a']]
207
+ # Output: [['a', 'a', 'c', 'c', 'b', 'd', 'c', 'c'], ['a', 'b'], ['a', 'b'], ['a', 'a']]
208
+ ```
176
209
  """
177
210
 
178
211
  partitions = []