deepdoctection 0.42.1__py3-none-any.whl → 0.43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of deepdoctection might be problematic. Click here for more details.

Files changed (124) hide show
  1. deepdoctection/__init__.py +2 -1
  2. deepdoctection/analyzer/__init__.py +2 -1
  3. deepdoctection/analyzer/config.py +904 -0
  4. deepdoctection/analyzer/dd.py +36 -62
  5. deepdoctection/analyzer/factory.py +311 -141
  6. deepdoctection/configs/conf_dd_one.yaml +100 -44
  7. deepdoctection/configs/profiles.jsonl +32 -0
  8. deepdoctection/dataflow/__init__.py +9 -6
  9. deepdoctection/dataflow/base.py +33 -15
  10. deepdoctection/dataflow/common.py +96 -75
  11. deepdoctection/dataflow/custom.py +36 -29
  12. deepdoctection/dataflow/custom_serialize.py +135 -91
  13. deepdoctection/dataflow/parallel_map.py +33 -31
  14. deepdoctection/dataflow/serialize.py +15 -10
  15. deepdoctection/dataflow/stats.py +41 -28
  16. deepdoctection/datapoint/__init__.py +4 -6
  17. deepdoctection/datapoint/annotation.py +104 -66
  18. deepdoctection/datapoint/box.py +190 -130
  19. deepdoctection/datapoint/convert.py +66 -39
  20. deepdoctection/datapoint/image.py +151 -95
  21. deepdoctection/datapoint/view.py +383 -236
  22. deepdoctection/datasets/__init__.py +2 -6
  23. deepdoctection/datasets/adapter.py +11 -11
  24. deepdoctection/datasets/base.py +118 -81
  25. deepdoctection/datasets/dataflow_builder.py +18 -12
  26. deepdoctection/datasets/info.py +76 -57
  27. deepdoctection/datasets/instances/__init__.py +6 -2
  28. deepdoctection/datasets/instances/doclaynet.py +17 -14
  29. deepdoctection/datasets/instances/fintabnet.py +16 -22
  30. deepdoctection/datasets/instances/funsd.py +11 -6
  31. deepdoctection/datasets/instances/iiitar13k.py +9 -9
  32. deepdoctection/datasets/instances/layouttest.py +9 -9
  33. deepdoctection/datasets/instances/publaynet.py +9 -9
  34. deepdoctection/datasets/instances/pubtables1m.py +13 -13
  35. deepdoctection/datasets/instances/pubtabnet.py +13 -15
  36. deepdoctection/datasets/instances/rvlcdip.py +8 -8
  37. deepdoctection/datasets/instances/xfund.py +11 -9
  38. deepdoctection/datasets/registry.py +18 -11
  39. deepdoctection/datasets/save.py +12 -11
  40. deepdoctection/eval/__init__.py +3 -2
  41. deepdoctection/eval/accmetric.py +72 -52
  42. deepdoctection/eval/base.py +29 -10
  43. deepdoctection/eval/cocometric.py +14 -12
  44. deepdoctection/eval/eval.py +56 -41
  45. deepdoctection/eval/registry.py +6 -3
  46. deepdoctection/eval/tedsmetric.py +24 -9
  47. deepdoctection/eval/tp_eval_callback.py +13 -12
  48. deepdoctection/extern/__init__.py +1 -1
  49. deepdoctection/extern/base.py +176 -97
  50. deepdoctection/extern/d2detect.py +127 -92
  51. deepdoctection/extern/deskew.py +19 -10
  52. deepdoctection/extern/doctrocr.py +157 -106
  53. deepdoctection/extern/fastlang.py +25 -17
  54. deepdoctection/extern/hfdetr.py +137 -60
  55. deepdoctection/extern/hflayoutlm.py +329 -248
  56. deepdoctection/extern/hflm.py +67 -33
  57. deepdoctection/extern/model.py +108 -762
  58. deepdoctection/extern/pdftext.py +37 -12
  59. deepdoctection/extern/pt/nms.py +15 -1
  60. deepdoctection/extern/pt/ptutils.py +13 -9
  61. deepdoctection/extern/tessocr.py +87 -54
  62. deepdoctection/extern/texocr.py +29 -14
  63. deepdoctection/extern/tp/tfutils.py +36 -8
  64. deepdoctection/extern/tp/tpcompat.py +54 -16
  65. deepdoctection/extern/tp/tpfrcnn/config/config.py +20 -4
  66. deepdoctection/extern/tpdetect.py +4 -2
  67. deepdoctection/mapper/__init__.py +1 -1
  68. deepdoctection/mapper/cats.py +117 -76
  69. deepdoctection/mapper/cocostruct.py +35 -17
  70. deepdoctection/mapper/d2struct.py +56 -29
  71. deepdoctection/mapper/hfstruct.py +32 -19
  72. deepdoctection/mapper/laylmstruct.py +221 -185
  73. deepdoctection/mapper/maputils.py +71 -35
  74. deepdoctection/mapper/match.py +76 -62
  75. deepdoctection/mapper/misc.py +68 -44
  76. deepdoctection/mapper/pascalstruct.py +13 -12
  77. deepdoctection/mapper/prodigystruct.py +33 -19
  78. deepdoctection/mapper/pubstruct.py +42 -32
  79. deepdoctection/mapper/tpstruct.py +39 -19
  80. deepdoctection/mapper/xfundstruct.py +20 -13
  81. deepdoctection/pipe/__init__.py +1 -2
  82. deepdoctection/pipe/anngen.py +104 -62
  83. deepdoctection/pipe/base.py +226 -107
  84. deepdoctection/pipe/common.py +206 -123
  85. deepdoctection/pipe/concurrency.py +74 -47
  86. deepdoctection/pipe/doctectionpipe.py +108 -47
  87. deepdoctection/pipe/language.py +41 -24
  88. deepdoctection/pipe/layout.py +45 -18
  89. deepdoctection/pipe/lm.py +146 -78
  90. deepdoctection/pipe/order.py +196 -113
  91. deepdoctection/pipe/refine.py +111 -63
  92. deepdoctection/pipe/registry.py +1 -1
  93. deepdoctection/pipe/segment.py +213 -142
  94. deepdoctection/pipe/sub_layout.py +76 -46
  95. deepdoctection/pipe/text.py +52 -33
  96. deepdoctection/pipe/transform.py +8 -6
  97. deepdoctection/train/d2_frcnn_train.py +87 -69
  98. deepdoctection/train/hf_detr_train.py +72 -40
  99. deepdoctection/train/hf_layoutlm_train.py +85 -46
  100. deepdoctection/train/tp_frcnn_train.py +56 -28
  101. deepdoctection/utils/concurrency.py +59 -16
  102. deepdoctection/utils/context.py +40 -19
  103. deepdoctection/utils/develop.py +25 -17
  104. deepdoctection/utils/env_info.py +85 -36
  105. deepdoctection/utils/error.py +16 -10
  106. deepdoctection/utils/file_utils.py +246 -62
  107. deepdoctection/utils/fs.py +162 -43
  108. deepdoctection/utils/identifier.py +29 -16
  109. deepdoctection/utils/logger.py +49 -32
  110. deepdoctection/utils/metacfg.py +83 -21
  111. deepdoctection/utils/pdf_utils.py +119 -62
  112. deepdoctection/utils/settings.py +24 -10
  113. deepdoctection/utils/tqdm.py +10 -5
  114. deepdoctection/utils/transform.py +182 -46
  115. deepdoctection/utils/utils.py +61 -28
  116. deepdoctection/utils/viz.py +150 -104
  117. deepdoctection-0.43.dist-info/METADATA +376 -0
  118. deepdoctection-0.43.dist-info/RECORD +149 -0
  119. deepdoctection/analyzer/_config.py +0 -146
  120. deepdoctection-0.42.1.dist-info/METADATA +0 -431
  121. deepdoctection-0.42.1.dist-info/RECORD +0 -148
  122. {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/WHEEL +0 -0
  123. {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/licenses/LICENSE +0 -0
  124. {deepdoctection-0.42.1.dist-info → deepdoctection-0.43.dist-info}/top_level.txt +0 -0
@@ -16,7 +16,7 @@
16
16
  # limitations under the License.
17
17
 
18
18
  """
19
- Module for cell detection pipeline component
19
+ Sub layout detection pipeline component
20
20
  """
21
21
  from __future__ import annotations
22
22
 
@@ -39,12 +39,12 @@ from .registry import pipeline_component_registry
39
39
 
40
40
  class DetectResultGenerator:
41
41
  """
42
- Use: `DetectResultGenerator` to refine raw detection results.
42
+ Use `DetectResultGenerator` to refine raw detection results.
43
43
 
44
44
  Certain pipeline components depend on, for example, at least one object being detected. If this is not the
45
- case, the generator can generate a DetectResult with a default setting. If no object was discovered for a
46
- category, a DetectResult with the dimensions of the original image is generated and added to the remaining
47
- DetectResults.
45
+ case, the generator can generate a `DetectResult` with a default setting. If no object was discovered for a
46
+ category, a `DetectResult` with the dimensions of the original image is generated and added to the remaining
47
+ `DetectResults`.
48
48
  """
49
49
 
50
50
  def __init__(
@@ -55,11 +55,13 @@ class DetectResultGenerator:
55
55
  absolute_coords: bool = True,
56
56
  ) -> None:
57
57
  """
58
- :param categories_name_as_key: The dict of all possible detection categories
59
- :param group_categories: If you only want to generate only one DetectResult for a group of categories, provided
60
- that the sum of the group is less than one, then you can pass a list of list for
61
- grouping category ids.
62
- :param absolute_coords: 'absolute_coords' value to be set in 'DetectionResult'
58
+ Args:
59
+ categories_name_as_key: The dict of all possible detection categories.
60
+ group_categories: If you only want to generate only one `DetectResult` for a group of categories, provided
61
+ that the sum of the group is less than one, then you can pass a list of list for grouping category ids.
62
+ exclude_category_names: List of category names to exclude from result generation.
63
+ absolute_coords: Value to be set in `DetectionResult` for `absolute_coords`.
64
+
63
65
  """
64
66
  self.categories_name_as_key = MappingProxyType(dict(categories_name_as_key.items()))
65
67
  self.width: Optional[int] = None
@@ -75,10 +77,16 @@ class DetectResultGenerator:
75
77
 
76
78
  def create_detection_result(self, detect_result_list: list[DetectionResult]) -> list[DetectionResult]:
77
79
  """
78
- Adds DetectResults for which no object was detected to the list.
80
+ Adds `DetectResults` for which no object was detected to the list.
81
+
82
+ Args:
83
+ detect_result_list: `DetectResults` of a previously run `ObjectDetector`.
84
+
85
+ Returns:
86
+ Refined list of `DetectionResult`.
79
87
 
80
- :param detect_result_list: DetectResults of a previously run ObjectDetector
81
- :return: refined list
88
+ Raises:
89
+ ValueError: If `width` and `height` are not initialized.
82
90
  """
83
91
 
84
92
  if self.width is None and self.height is None:
@@ -115,10 +123,16 @@ class DetectResultGenerator:
115
123
  @staticmethod
116
124
  def _detection_result_sanity_check(detect_result_list: list[DetectionResult]) -> list[DetectionResult]:
117
125
  """
118
- Go through each detect_result in the list and check if the box argument has sensible coordinates:
119
- ulx >= 0 and lrx - ulx >= 0 (same for y coordinate). Remove the detection result if this condition is not
126
+ Go through each `detect_result` in the list and check if the `box` argument has sensible coordinates:
127
+ `ulx >= 0` and `lrx - ulx >= 0` (same for y coordinate). Remove the detection result if this condition is not
120
128
  satisfied. We need this check because if some detection results are not sane, we might end up with some
121
- none existing categories.
129
+ non-existing categories.
130
+
131
+ Args:
132
+ detect_result_list: List of `DetectionResult` to check.
133
+
134
+ Returns:
135
+ List of `DetectionResult` with only valid boxes.
122
136
  """
123
137
  sane_detect_results = []
124
138
  for detect_result in detect_result_list:
@@ -143,19 +157,20 @@ class DetectResultGenerator:
143
157
  @pipeline_component_registry.register("SubImageLayoutService")
144
158
  class SubImageLayoutService(PipelineComponent):
145
159
  """
146
- Component in which the selected ImageAnnotation can be selected with cropped images and presented to a detector.
160
+ Component in which the selected `ImageAnnotation` can be selected with cropped images and presented to a detector.
147
161
 
148
- The detected DetectResults are transformed into ImageAnnotations and stored both in the cache of the parent image
149
- and in the cache of the sub image.
162
+ The detected `DetectResults` are transformed into `ImageAnnotations` and stored both in the cache of the parent
163
+ image and in the cache of the sub image.
150
164
 
151
165
  If no objects are discovered, artificial objects can be added by means of a refinement process.
152
166
 
153
- **Example**
154
-
155
- detect_result_generator = DetectResultGenerator(categories_items)
156
- d_items = TPFrcnnDetector(item_config_path, item_weights_path, {1: LayoutType.row,
157
- 2: LayoutType.column})
158
- item_component = SubImageLayoutService(d_items, LayoutType.table, detect_result_generator)
167
+ Example:
168
+ ```python
169
+ detect_result_generator = DetectResultGenerator(categories_items)
170
+ d_items = TPFrcnnDetector(item_config_path, item_weights_path, {1: LayoutType.row,
171
+ 2: LayoutType.column})
172
+ item_component = SubImageLayoutService(d_items, LayoutType.table, detect_result_generator)
173
+ ```
159
174
  """
160
175
 
161
176
  def __init__(
@@ -167,18 +182,22 @@ class SubImageLayoutService(PipelineComponent):
167
182
  padder: Optional[PadTransform] = None,
168
183
  ):
169
184
  """
170
- :param sub_image_detector: object detector.
171
- :param sub_image_names: Category names of ImageAnnotations to be presented to the detector.
172
- Attention: The selected ImageAnnotations must have: attr:`image` and: attr:`image.image`
173
- not None.
174
- :param service_ids: List of service ids to be used for filtering the ImageAnnotations. If None, all
175
- ImageAnnotations will be used.
176
- :param detect_result_generator: 'DetectResultGenerator' instance. 'categories' attribute has to be the same as
177
- the 'categories' attribute of the 'sub_image_detector'. The generator will be
178
- responsible to create 'DetectionResult' for some categories, if they have not
179
- been detected by 'sub_image_detector'.
180
- :param padder: 'PadTransform' to pad an image before passing to a predictor. Will be also responsible for
181
- inverse coordinate transformation.
185
+ Args:
186
+ sub_image_detector: `ObjectDetector`.
187
+ sub_image_names: Category names of `ImageAnnotations` to be presented to the detector.
188
+ Attention: The selected `ImageAnnotations` must have `image` and `image.image` not None.
189
+ service_ids: List of service ids to be used for filtering the `ImageAnnotations`. If None, all
190
+ `ImageAnnotations` will be used.
191
+ detect_result_generator: `DetectResultGenerator` instance. `categories` attribute has to be the same as
192
+ the `categories` attribute of the `sub_image_detector`. The generator will be
193
+ responsible to create `DetectionResult` for some categories, if they have not
194
+ been detected by `sub_image_detector`.
195
+ padder: `PadTransform` to pad an image before passing to a predictor. Will be also responsible for
196
+ inverse coordinate transformation.
197
+
198
+ Raises:
199
+ ValueError: If the categories of the `detect_result_generator` do not match the categories of the
200
+ `sub_image_detector`.
182
201
  """
183
202
 
184
203
  self.sub_image_name = (
@@ -203,10 +222,13 @@ class SubImageLayoutService(PipelineComponent):
203
222
 
204
223
  def serve(self, dp: Image) -> None:
205
224
  """
206
- - Selection of ImageAnnotation to present to the detector.
207
- - Invoke the detector
208
- - Optionally invoke the DetectResultGenerator
209
- - Generate ImageAnnotations and dump to parent image and sub image.
225
+ - Selection of `ImageAnnotation` to present to the detector.
226
+ - Invoke the detector.
227
+ - Optionally invoke the `DetectResultGenerator`.
228
+ - Generate `ImageAnnotations` and dump to parent image and sub image.
229
+
230
+ Args:
231
+ dp: `Image` to process.
210
232
  """
211
233
  sub_image_anns = dp.get_annotation(category_names=self.sub_image_name, service_ids=self.service_ids)
212
234
  for sub_image_ann in sub_image_anns:
@@ -255,13 +277,21 @@ class SubImageLayoutService(PipelineComponent):
255
277
  )
256
278
 
257
279
  def prepare_np_image(self, sub_image_ann: ImageAnnotation) -> PixelValues:
258
- """Maybe crop and pad a np_array before passing it to the predictor.
280
+ """
281
+ Maybe crop and pad a `np_array` before passing it to the predictor.
282
+
283
+ Note:
284
+ We currently assume a two level hierarchy of images, e.g. we can crop a sub-image from the base
285
+ image, e.g. the original input but we cannot crop a sub-image from an image which is itself a sub-image.
286
+
287
+ Args:
288
+ sub_image_ann: `ImageAnnotation` to be processed.
259
289
 
260
- Note that we currently assume to a two level hierachy of images, e.g. we can crop a sub-image from the base
261
- image, e.g. the original input but we cannot crop a sub-image from an image which is itself a sub-image.
290
+ Returns:
291
+ Processed `np_image`.
262
292
 
263
- :param sub_image_ann: ImageAnnotation to be processed
264
- :return: processed np_image
293
+ Raises:
294
+ ValueError: If `sub_image_ann.image` is `None`.
265
295
  """
266
296
  if sub_image_ann.image is None:
267
297
  raise ValueError("sub_image_ann.image is None, but must be an datapoint.Image")
@@ -16,7 +16,7 @@
16
16
  # limitations under the License.
17
17
 
18
18
  """
19
- Module for text extraction pipeline component
19
+ Text extraction pipeline component
20
20
  """
21
21
 
22
22
  from __future__ import annotations
@@ -40,29 +40,30 @@ __all__ = ["TextExtractionService"]
40
40
  @pipeline_component_registry.register("TextExtractionService")
41
41
  class TextExtractionService(PipelineComponent):
42
42
  """
43
- Pipeline component for extracting text. Any detector can be selected, provided that it can evaluate a
44
- numpy array as an image.
43
+ Text extraction pipeline component.
45
44
 
46
- Text extraction can either be carried out over the entire image or over selected regions of interests (ROIs).
47
- ROIs are layout components that have been determined by means of a pipeline component that has been run through
48
- beforehand. ROI extraction is particularly suitable when an OCR component is selected as the detector and the
49
- document has a complex structure. Instead of transferring the entire image, only the ROIs are transferred to
50
- the detector. Since the ROI has a simpler structure than the entire document page, it can significantly improve
51
- the OCR results.
45
+ This component is responsible for extracting text from images or selected regions of interest (ROIs) using a
46
+ specified detector. The detector must be able to evaluate a numpy array as an image.
52
47
 
53
- Text components (currently only words) are attached to the image as image annotations. A relation is assigned in
54
- relation to text and ROI or in relation to text and the entire image. When selecting ROIs, only the selected
55
- categories are taken into account during processing. ROIs that are not selected are not presented to the
56
- detector.
48
+ Text extraction can be performed on the entire image or on selected ROIs, which are layout components determined by
49
+ a previously run pipeline component. ROI extraction is particularly useful when using an OCR component as the
50
+ detector and the document has a complex structure. By transferring only the ROIs to the detector, OCR results can
51
+ be significantly improved due to the simpler structure of the ROI compared to the entire document page.
57
52
 
58
- textract_predictor = TextractOcrDetector()
59
- text_extract = TextExtractionService(textract_predictor)
53
+ Text components (currently only words) are attached to the image as image annotations. A relation is assigned
54
+ between text and ROI or between text and the entire image. When selecting ROIs, only the selected categories are
55
+ processed. ROIs that are not selected are not presented to the detector.
60
56
 
61
- pipe = DoctectionPipe([text_extract])
62
- df = pipe.analyze(path="path/to/document.pdf")
57
+ Example:
58
+ ```python
59
+ textract_predictor = TextractOcrDetector()
60
+ text_extract = TextExtractionService(textract_predictor)
63
61
 
64
- for dp in df:
65
- ...
62
+ pipe = DoctectionPipe([text_extract])
63
+ df = pipe.analyze(path="path/to/document.pdf")
64
+
65
+ for dp in df:
66
+ ...
66
67
  """
67
68
 
68
69
  def __init__(
@@ -72,12 +73,18 @@ class TextExtractionService(PipelineComponent):
72
73
  run_time_ocr_language_selection: bool = False,
73
74
  ):
74
75
  """
75
- :param text_extract_detector: ObjectDetector
76
- :param extract_from_roi: one or more category names for roi selection
77
- :param run_time_ocr_language_selection: Only available for `TesseractOcrDetector` as this framework has
78
- multiple language selections. Also requires that a language detection
79
- pipeline component ran before. It will select the expert language OCR
80
- model based on the determined language.
76
+ Args:
77
+ text_extract_detector: The detector used for text extraction.
78
+ extract_from_roi: One or more category names for ROI selection.
79
+ run_time_ocr_language_selection: If True, enables runtime OCR language selection. Only available for
80
+ `TesseractOcrDetector` as this framework supports multiple languages.
81
+ Requires a language detection pipeline component to have run before.
82
+ Selects the expert language OCR model based on the determined language.
83
+
84
+ Raises:
85
+ TypeError: If predicting from a cropped image and the detector is not an `ObjectDetector` or
86
+ `TextRecognizer`.
87
+ TypeError: If `run_time_ocr_language_selection` is True and the detector is not a `TesseractOcrDetector`.
81
88
  """
82
89
 
83
90
  if extract_from_roi is None:
@@ -140,11 +147,17 @@ class TextExtractionService(PipelineComponent):
140
147
 
141
148
  def get_text_rois(self, dp: Image) -> Sequence[Union[Image, ImageAnnotation, list[ImageAnnotation]]]:
142
149
  """
143
- Return image rois based on selected categories. As this selection makes only sense for specific text extractors
144
- (e.g. those who do proper OCR and do not mine from text from native pdfs) it will do some sanity checks.
145
- It is possible that a preceding text extractor dumped text before. If the predictor must not extract text as
146
- well `get_text_rois` will return an empty list.
147
- :return: list of ImageAnnotation or Image
150
+ Returns image ROIs based on selected categories.
151
+
152
+ This selection is only meaningful for specific text extractors (e.g., those performing OCR and not mining text
153
+ from native PDFs). Performs sanity checks. If a preceding text extractor has already dumped text, and the
154
+ predictor should not extract text as well, returns an empty list.
155
+
156
+ Args:
157
+ dp: The `Image` to process.
158
+
159
+ Returns:
160
+ A list of `ImageAnnotation` or `Image`.
148
161
  """
149
162
 
150
163
  if self.extract_from_category:
@@ -157,11 +170,17 @@ class TextExtractionService(PipelineComponent):
157
170
  self, text_roi: Union[Image, ImageAnnotation, list[ImageAnnotation]]
158
171
  ) -> Optional[Union[bytes, PixelValues, list[tuple[str, PixelValues]], int]]:
159
172
  """
160
- Return raw input for a given `text_roi`. This can be a numpy array or pdf bytes and depends on the chosen
173
+ Returns raw input for a given `text_roi`. The input can be a numpy array or PDF bytes, depending on the chosen
161
174
  predictor.
162
175
 
163
- :param text_roi: `Image` or `ImageAnnotation`
164
- :return: pdf bytes or numpy array
176
+ Args:
177
+ text_roi: The `Image`, `ImageAnnotation`, or list of `ImageAnnotation` to process.
178
+
179
+ Returns:
180
+ PDF bytes, numpy array, or other predictor-specific input.
181
+
182
+ Raises:
183
+ ImageError: If required image data is missing or if `text_roi` is not an `Image` when required.
165
184
  """
166
185
 
167
186
  if isinstance(text_roi, ImageAnnotation):
@@ -16,8 +16,7 @@
16
16
  # limitations under the License.
17
17
 
18
18
  """
19
- Module for transform style pipeline components. These pipeline components are used for various transforming operations
20
- on images (e.g. deskew, de-noising or more general GAN like operations.
19
+ Transform style pipeline components.
21
20
  """
22
21
 
23
22
  from __future__ import annotations
@@ -32,9 +31,10 @@ from .registry import pipeline_component_registry
32
31
  @pipeline_component_registry.register("SimpleTransformService")
33
32
  class SimpleTransformService(PipelineComponent):
34
33
  """
35
- Pipeline component for transforming an image. The service is designed for applying transform predictors that
36
- take an image as numpy array as input and return the same. The service itself will change the underlying metadata
37
- like height and width of the returned transform.
34
+ Pipeline component for transforming an image.
35
+
36
+ The service is designed for applying transform predictors that take an image as numpy array as input and return
37
+ the same. The service itself will change the underlying metadata like height and width of the returned transform.
38
38
 
39
39
  This component is meant to be used at the very first stage of a pipeline. If components have already returned image
40
40
  annotations then this component will currently not re-calculate bounding boxes in terms of the transformed image.
@@ -43,8 +43,10 @@ class SimpleTransformService(PipelineComponent):
43
43
 
44
44
  def __init__(self, transform_predictor: ImageTransformer):
45
45
  """
46
+ Initializes a `SimpleTransformService`.
46
47
 
47
- :param transform_predictor: image transformer
48
+ Args:
49
+ transform_predictor: Image transformer.
48
50
  """
49
51
  self.transform_predictor = transform_predictor
50
52
  super().__init__(self._get_name(transform_predictor.name), self.transform_predictor.model_id)
@@ -16,7 +16,7 @@
16
16
  # limitations under the License.
17
17
 
18
18
  """
19
- Module for training Detectron2 `GeneralizedRCNN`
19
+ Training Detectron2 `GeneralizedRCNN`
20
20
  """
21
21
  from __future__ import annotations
22
22
 
@@ -111,10 +111,12 @@ class WandbWriter(EventWriter):
111
111
  **kwargs: Any,
112
112
  ):
113
113
  """
114
- :param project: W&B Project name
115
- :param config: the project level configuration object
116
- :param window_size: the scalars will be median-smoothed by this window size
117
- :param kwargs: other arguments passed to `wandb.init(...)`
114
+ Args:
115
+ project: W&B Project name.
116
+ repo: Repository name.
117
+ config: The project level configuration object.
118
+ window_size: The scalars will be median-smoothed by this window size.
119
+ **kwargs: Other arguments passed to `wandb.init(...)`.
118
120
  """
119
121
  if config is None:
120
122
  config = {}
@@ -137,8 +139,10 @@ class WandbWriter(EventWriter):
137
139
 
138
140
  class D2Trainer(DefaultTrainer):
139
141
  """
140
- Detectron2 `DefaultTrainer` with some custom method for handling datasets and running evaluation. The setting is
141
- made to train standard models in detectron2.
142
+ Detectron2 `DefaultTrainer` with some custom method for handling datasets and running evaluation.
143
+
144
+ Info:
145
+ The setting is made to train standard models in Detectron2.
142
146
  """
143
147
 
144
148
  def __init__(self, cfg: CfgNode, torch_dataset: IterableDataset[Any], mapper: DatasetMapper) -> None:
@@ -150,10 +154,16 @@ class D2Trainer(DefaultTrainer):
150
154
 
151
155
  def build_hooks(self) -> list[HookBase]:
152
156
  """
153
- Overwritten from DefaultTrainer. This ensures that the EvalHook is being called before the writer and
154
- all metrics are being written to JSON, Tensorboard etc.
157
+ Builds the list of hooks for training.
158
+
159
+ Note:
160
+ This ensures that the `EvalHook` is being called before the writer and all metrics are being written to
161
+ JSON, Tensorboard etc.
162
+
163
+ Returns:
164
+ List of `HookBase` objects.
165
+
155
166
 
156
- :return: list[HookBase]
157
167
  """
158
168
  cfg = self.cfg.clone()
159
169
  cfg.defrost()
@@ -203,10 +213,12 @@ class D2Trainer(DefaultTrainer):
203
213
  def build_writers(self) -> list[EventWriter]:
204
214
  """
205
215
  Build a list of writers to be using `default_writers()`.
206
- If you'd like a different list of writers, you can overwrite it in
207
- your trainer.
208
216
 
209
- :return: A list of `EventWriter` objects.
217
+ Note:
218
+ If you'd like a different list of writers, you can overwrite it in your trainer.
219
+
220
+ Returns:
221
+ A list of `EventWriter` objects.
210
222
  """
211
223
  writers_list = default_writers(self.cfg.OUTPUT_DIR, self.max_iter)
212
224
  if self.cfg.WANDB.USE_WANDB:
@@ -220,10 +232,13 @@ class D2Trainer(DefaultTrainer):
220
232
 
221
233
  def build_train_loader(self, cfg: CfgNode) -> DataLoader[Any]: # pylint: disable=W0221
222
234
  """
223
- Overwritten method from `DefaultTrainer`.
235
+ Builds the data loader for training.
236
+
237
+ Args:
238
+ cfg: Configuration.
224
239
 
225
- :param cfg: Configuration
226
- :return: The data loader for a given dataset adapter, mapper.
240
+ Returns:
241
+ The data loader for a given dataset adapter and mapper.
227
242
  """
228
243
  return build_detection_train_loader(
229
244
  dataset=self.dataset, mapper=self.mapper, total_batch_size=cfg.SOLVER.IMS_PER_BATCH
@@ -231,10 +246,13 @@ class D2Trainer(DefaultTrainer):
231
246
 
232
247
  def eval_with_dd_evaluator(self, **build_eval_kwargs: str) -> Union[list[dict[str, Any]], dict[str, Any]]:
233
248
  """
234
- Running the Evaluator. This method will be called from the `EvalHook`
249
+ Runs the evaluator. This method will be called from the `EvalHook`.
235
250
 
236
- :param build_eval_kwargs: dataflow eval config kwargs of the underlying dataset
237
- :return: A dict of evaluation results
251
+ Args:
252
+ **build_eval_kwargs: Dataflow eval config kwargs of the underlying dataset.
253
+
254
+ Returns:
255
+ A dict or list of dicts with evaluation results.
238
256
  """
239
257
  assert self.evaluator is not None
240
258
  assert self.evaluator.pipe_component is not None
@@ -251,13 +269,16 @@ class D2Trainer(DefaultTrainer):
251
269
  build_val_dict: Optional[Mapping[str, str]] = None,
252
270
  ) -> None:
253
271
  """
254
- Setup of evaluator before starting training. During training, predictors will be replaced by current
255
- checkpoints.
272
+ Setup of evaluator before starting training.
273
+
274
+ Note:
275
+ During training, predictors will be replaced by current checkpoints.
256
276
 
257
- :param dataset_val: dataset on which to run evaluation
258
- :param pipeline_component: pipeline component to plug into the evaluator
259
- :param metric: A metric class
260
- :param build_val_dict: evaluation dataflow build config
277
+ Args:
278
+ dataset_val: Dataset on which to run evaluation.
279
+ pipeline_component: Pipeline component to plug into the evaluator.
280
+ metric: A metric class or instance.
281
+ build_val_dict: Evaluation dataflow build config.
261
282
  """
262
283
  if wandb_available():
263
284
  run = wandb.run if wandb.run is not None else None
@@ -295,50 +316,47 @@ def train_d2_faster_rcnn(
295
316
  pipeline_component_name: Optional[str] = None,
296
317
  ) -> None:
297
318
  """
298
- Adaptation of <https://github.com/facebookresearch/detectron2/blob/main/tools/train_net.py> for training Detectron2
299
- standard models
300
-
301
- Train Detectron2 from scratch or fine-tune a model using this API. Compared to Tensorpack this framework trains much
302
- faster, e.g. <https://detectron2.readthedocs.io/en/latest/notes/benchmarks.html> .
303
-
304
- This training script is devoted to the case where one cluster with one GPU is available. To run on several machines
305
- with more than one GPU use `detectron2.engine.launch` .
306
-
307
- if __name__ == "__main__":
308
-
309
- launch(train_d2_faster_rcnn,
310
- num_gpus,
311
- num_machines,
312
- machine_rank,
313
- dist_url,
314
- args=(path_config_yaml,
315
- path_weights,
316
- config_overwrite,
317
- log_dir,
318
- build_train_config,
319
- dataset_val,
320
- build_val_config,
321
- metric_name,
322
- metric,
323
- pipeline_component_name),)
324
-
325
-
326
- :param path_config_yaml: path to a D2 config file. Check
327
- https://github.com/facebookresearch/detectron2/blob/main/detectron2/config/defaults.py
328
- for various settings.
329
- :param dataset_train: the dataset to use for training.
330
- :param path_weights: path to a checkpoint, if you want to continue training or fine-tune. Will train from scratch if
331
- an empty string is passed
332
- :param config_overwrite: Pass a list of arguments if some configs from the .yaml file should be replaced. Use the
333
- list convention, e.g. ['TRAIN.STEPS_PER_EPOCH=500', 'OUTPUT.RESULT_SCORE_THRESH=0.4']
334
- :param log_dir: Path to log dir. Will default to `train_log/frcnn`
335
- :param build_train_config: dataflow build setting. Again, use list convention setting, e.g. ['max_datapoints=1000']
336
- :param dataset_val: the dataset to use for validation.
337
- :param build_val_config: same as `build_train_config` but for validation
338
- :param metric_name: A metric name to choose for validation. Will use the default setting. If you want a custom
339
- metric setting, pass a metric explicitly.
340
- :param metric: A metric to choose for validation.
341
- :param pipeline_component_name: A pipeline component name to use for validation.
319
+ Adaptation of https://github.com/facebookresearch/detectron2/blob/main/tools/train_net.py for training Detectron2
320
+ standard models.
321
+
322
+ Trains Detectron2 from scratch or fine-tunes a model using this API.
323
+
324
+ Info:
325
+ This training script is devoted to the case where one cluster with one GPU is available. To run on several
326
+ machines with more than one GPU use `detectron2.engine.launch`.
327
+
328
+ Example:
329
+ ```python
330
+ launch(train_d2_faster_rcnn,
331
+ num_gpus,
332
+ num_machines,
333
+ machine_rank,
334
+ dist_url,
335
+ args=(path_config_yaml,
336
+ path_weights,
337
+ config_overwrite,
338
+ log_dir,
339
+ build_train_config,
340
+ dataset_val,
341
+ build_val_config,
342
+ metric_name,
343
+ metric,
344
+ pipeline_component_name),)
345
+ ```
346
+
347
+ Args:
348
+ path_config_yaml: Path to a Detectron2 config file.
349
+ dataset_train: The dataset to use for training.
350
+ path_weights: Path to a checkpoint, if you want to continue training or fine-tune. Will train from scratch if
351
+ an empty string is passed.
352
+ config_overwrite: List of arguments if some configs from the .yaml file should be replaced.
353
+ log_dir: Path to log dir. Will default to `train_log/frcnn`.
354
+ build_train_config: Dataflow build setting.
355
+ dataset_val: The dataset to use for validation.
356
+ build_val_config: Same as `build_train_config` but for validation.
357
+ metric_name: A metric name to choose for validation.
358
+ metric: A metric to choose for validation.
359
+ pipeline_component_name: A pipeline component name to use for validation.
342
360
  """
343
361
 
344
362
  assert cuda.device_count() > 0, "Has to train with GPU!"