megadetector 5.0.12__py3-none-any.whl → 5.0.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (45) hide show
  1. megadetector/api/batch_processing/api_core/server.py +1 -1
  2. megadetector/api/batch_processing/api_core/server_api_config.py +0 -1
  3. megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -3
  4. megadetector/api/batch_processing/api_core/server_utils.py +0 -4
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  6. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -3
  7. megadetector/classification/efficientnet/utils.py +0 -3
  8. megadetector/data_management/camtrap_dp_to_coco.py +0 -2
  9. megadetector/data_management/cct_json_utils.py +15 -6
  10. megadetector/data_management/coco_to_labelme.py +12 -1
  11. megadetector/data_management/databases/integrity_check_json_db.py +43 -27
  12. megadetector/data_management/importers/cacophony-thermal-importer.py +1 -4
  13. megadetector/data_management/ocr_tools.py +0 -4
  14. megadetector/data_management/read_exif.py +178 -44
  15. megadetector/data_management/rename_images.py +187 -0
  16. megadetector/data_management/wi_download_csv_to_coco.py +3 -2
  17. megadetector/data_management/yolo_output_to_md_output.py +7 -2
  18. megadetector/detection/process_video.py +548 -244
  19. megadetector/detection/pytorch_detector.py +33 -14
  20. megadetector/detection/run_detector.py +17 -5
  21. megadetector/detection/run_detector_batch.py +179 -65
  22. megadetector/detection/run_inference_with_yolov5_val.py +527 -357
  23. megadetector/detection/tf_detector.py +14 -3
  24. megadetector/detection/video_utils.py +284 -61
  25. megadetector/postprocessing/categorize_detections_by_size.py +16 -14
  26. megadetector/postprocessing/classification_postprocessing.py +716 -0
  27. megadetector/postprocessing/compare_batch_results.py +101 -93
  28. megadetector/postprocessing/convert_output_format.py +12 -5
  29. megadetector/postprocessing/merge_detections.py +18 -7
  30. megadetector/postprocessing/postprocess_batch_results.py +133 -127
  31. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +236 -232
  32. megadetector/postprocessing/subset_json_detector_output.py +66 -62
  33. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +0 -2
  34. megadetector/utils/ct_utils.py +5 -4
  35. megadetector/utils/md_tests.py +380 -128
  36. megadetector/utils/path_utils.py +39 -6
  37. megadetector/utils/process_utils.py +13 -4
  38. megadetector/visualization/visualization_utils.py +7 -2
  39. megadetector/visualization/visualize_db.py +79 -77
  40. megadetector/visualization/visualize_detector_output.py +0 -1
  41. {megadetector-5.0.12.dist-info → megadetector-5.0.14.dist-info}/LICENSE +0 -0
  42. {megadetector-5.0.12.dist-info → megadetector-5.0.14.dist-info}/METADATA +2 -2
  43. {megadetector-5.0.12.dist-info → megadetector-5.0.14.dist-info}/RECORD +45 -43
  44. {megadetector-5.0.12.dist-info → megadetector-5.0.14.dist-info}/top_level.txt +0 -0
  45. {megadetector-5.0.12.dist-info → megadetector-5.0.14.dist-info}/WHEEL +0 -0
@@ -66,247 +66,251 @@ class RepeatDetectionOptions:
66
66
  """
67
67
  Options that control the behavior of repeat detection elimination
68
68
  """
69
+
70
+ def __init__(self):
71
+
72
+ #: Folder where images live; filenames in the MD results .json file should
73
+ #: be relative to this folder.
74
+ #:
75
+ #: imageBase can also be a SAS URL, in which case some error-checking is
76
+ #: disabled.
77
+ self.imageBase = ''
78
+
79
+ #: Folder where we should write temporary output.
80
+ self.outputBase = ''
69
81
 
70
- #: Folder where images live; filenames in the MD results .json file should
71
- #: be relative to this folder.
72
- #:
73
- #: imageBase can also be a SAS URL, in which case some error-checking is
74
- #: disabled.
75
- imageBase = ''
82
+ #: Don't consider detections with confidence lower than this as suspicious
83
+ self.confidenceMin = 0.1
84
+
85
+ #: Don't consider detections with confidence higher than this as suspicious
86
+ self.confidenceMax = 1.0
76
87
 
77
- #: Folder where we should write temporary output.
78
- outputBase = ''
79
-
80
- #: Don't consider detections with confidence lower than this as suspicious
81
- confidenceMin = 0.1
82
-
83
- #: Don't consider detections with confidence higher than this as suspicious
84
- confidenceMax = 1.0
85
-
86
- #: What's the IOU threshold for considering two boxes the same?
87
- iouThreshold = 0.9
88
-
89
- #: How many occurrences of a single location (as defined by the IOU threshold)
90
- #: are required before we declare it suspicious?
91
- occurrenceThreshold = 20
92
-
93
- #: Ignore "suspicious" detections smaller than some size
94
- minSuspiciousDetectionSize = 0.0
95
-
96
- #: Ignore "suspicious" detections larger than some size; these are often animals
97
- #: taking up the whole image. This is expressed as a fraction of the image size.
98
- maxSuspiciousDetectionSize = 0.2
99
-
100
- #: Ignore folders with more than this many images in them
101
- maxImagesPerFolder = None
102
-
103
- #: A list of category IDs (ints) that we don't want consider as candidate repeat detections.
104
- #:
105
- #: Typically used to say, e.g., "don't bother analyzing people or vehicles for repeat
106
- #: detections", which you could do by saying excludeClasses = [2,3].
107
- excludeClasses = []
108
-
109
- #: For very large sets of results, passing chunks of results to and from workers as
110
- #: parameters ('memory') can be memory-intensive, so we can serialize to intermediate
111
- #: files instead ('file').
112
- #:
113
- #: The use of 'file' here is still experimental.
114
- pass_detections_to_processes_method = 'memory'
115
-
116
- #: Number of workers to use for parallel operations
117
- nWorkers = 10
118
-
119
- #: Should we use threads (True) or processes (False) for parallelization?
120
- #:
121
- #: Not relevant if nWorkers <= 1, or if bParallelizeComparisons and
122
- #: bParallelizeRendering are both False.
123
- parallelizationUsesThreads = True
124
-
125
- #: If this is not empty, we'll load detections from a filter file rather than finding them
126
- #: from the detector output. This should be a .json file containing detections, generally this
127
- #: is the detectionIndex.json file in the filtering_* folder produced by find_repeat_detections().
128
- filterFileToLoad = ''
129
-
130
- #: (optional) List of filenames remaining after deletion of identified
131
- #: repeated detections that are actually animals. This should be a flat
132
- #: text file, one relative filename per line.
133
- #:
134
- #: This is a pretty esoteric code path and a candidate for removal.
135
- #:
136
- #: The scenario where I see it being most useful is the very hypothetical one
137
- #: where we use an external tool for image handling that allows us to do something
138
- #: smarter and less destructive than deleting images to mark them as non-false-positives.
139
- filteredFileListToLoad = None
140
-
141
- #: Should we write the folder of images used to manually review repeat detections?
142
- bWriteFilteringFolder = True
143
-
144
- #: For debugging: limit comparisons to a specific number of folders
145
- debugMaxDir = -1
146
-
147
- #: For debugging: limit rendering to a specific number of folders
148
- debugMaxRenderDir = -1
149
-
150
- #: For debugging: limit comparisons to a specific number of detections
151
- debugMaxRenderDetection = -1
152
-
153
- #: For debugging: limit comparisons to a specific number of instances
154
- debugMaxRenderInstance = -1
155
-
156
- #: Should we parallelize (across cameras) comparisons to find repeat detections?
157
- bParallelizeComparisons = True
158
-
159
- #: Should we parallelize image rendering?
160
- bParallelizeRendering = True
161
-
162
- #: If this is False (default), a detection from class A is *not* considered to be "the same"
163
- #: as a detection from class B, even if they're at the same location.
164
- categoryAgnosticComparisons = False
165
-
166
- #: Determines whether bounding-box rendering errors (typically network errors) should
167
- #: be treated as failures
168
- bFailOnRenderError = False
169
-
170
- #: Should we print a warning if images referred to in the MD results file are missing?
171
- bPrintMissingImageWarnings = True
172
-
173
- #: If bPrintMissingImageWarnings is True, should we print a warning about missing images
174
- #: just once ('once') or every time ('all')?
175
- missingImageWarningType = 'once' # 'all'
176
-
177
- #: Image width for rendered images (it's called "max" because we don't resize smaller images).
178
- #:
179
- #: Original size is preserved if this is None.
180
- #:
181
- #: This does *not* include the tile image grid.
182
- maxOutputImageWidth = None
183
-
184
- #: Line thickness (in pixels) for box rendering
185
- lineThickness = 10
186
-
187
- #: Box expansion (in pixels)
188
- boxExpansion = 2
189
-
190
- #: Progress bar used during comparisons and rendering. Do not set externally.
191
- #:
192
- #: :meta private:
193
- pbar = None
194
-
195
- #: Replace filename tokens after reading, useful when the directory structure
196
- #: has changed relative to the structure the detector saw.
197
- filenameReplacements = {}
198
-
199
- #: How many folders up from the leaf nodes should we be going to aggregate images into
200
- #: cameras?
201
- #:
202
- #: If this is zero, each leaf folder is treated as a camera.
203
- nDirLevelsFromLeaf = 0
204
-
205
- #: An optional function that takes a string (an image file name) and returns
206
- #: a string (the corresponding folder ID), typically used when multiple folders
207
- #: actually correspond to the same camera in a manufacturer-specific way (e.g.
208
- #: a/b/c/RECONYX100 and a/b/c/RECONYX101 may really be the same camera).
209
- #:
210
- #: See ct_utils for a common replacement function that handles most common
211
- #: manufacturer folder names.
212
- customDirNameFunction = None
213
-
214
- #: Include only specific folders, mutually exclusive with [excludeFolders]
215
- includeFolders = None
216
-
217
- #: Exclude specific folders, mutually exclusive with [includeFolders]
218
- excludeFolders = None
219
-
220
- #: Optionally show *other* detections (i.e., detections other than the
221
- #: one the user is evaluating), typically in a light gray.
222
- bRenderOtherDetections = False
223
-
224
- #: Threshold to use for *other* detections
225
- otherDetectionsThreshold = 0.2
226
-
227
- #: Line width (in pixels) for *other* detections
228
- otherDetectionsLineWidth = 1
229
-
230
- #: Optionally show a grid that includes a sample image for the detection, plus
231
- #: the top N additional detections
232
- bRenderDetectionTiles = True
233
-
234
- #: Width of the original image (within the larger output image) when bRenderDetectionTiles
235
- #: is True.
236
- #:
237
- #: If this is None, we'll render the original image in the detection tile image
238
- #: at its original width.
239
- detectionTilesPrimaryImageWidth = None
240
-
241
- #: Width to use for the grid of detection instances.
242
- #:
243
- #: Can be a width in pixels, or a number from 0 to 1 representing a fraction
244
- #: of the primary image width.
245
- #:
246
- #: If you want to render the grid at exactly 1 pixel wide, I guess you're out
247
- #: of luck.
248
- detectionTilesCroppedGridWidth = 0.6
249
-
250
- #: Location of the primary image within the mosaic ('right' or 'left)
251
- detectionTilesPrimaryImageLocation = 'right'
252
-
253
- #: Maximum number of individual detection instances to include in the mosaic
254
- detectionTilesMaxCrops = 250
255
-
256
- #: If bRenderOtherDetections is True, what color should we use to render the
257
- #: (hopefully pretty subtle) non-target detections?
258
- #:
259
- #: In theory I'd like these "other detection" rectangles to be partially
260
- #: transparent, but this is not straightforward, and the alpha is ignored
261
- #: here. But maybe if I leave it here and wish hard enough, someday it
262
- #: will work.
263
- #:
264
- #: otherDetectionsColors = ['dimgray']
265
- otherDetectionsColors = [(105,105,105,100)]
266
-
267
- #: Sort detections within a directory so nearby detections are adjacent
268
- #: in the list, for faster review.
269
- #:
270
- #: Can be None, 'xsort', or 'clustersort'
271
- #:
272
- #: * None sorts detections chronologically by first occurrence
273
- #: * 'xsort' sorts detections from left to right
274
- #: * 'clustersort' clusters detections and sorts by cluster
275
- smartSort = 'xsort'
276
-
277
- #: Only relevant if smartSort == 'clustersort'
278
- smartSortDistanceThreshold = 0.1
88
+ #: What's the IOU threshold for considering two boxes the same?
89
+ self.iouThreshold = 0.9
279
90
 
91
+ #: How many occurrences of a single location (as defined by the IOU threshold)
92
+ #: are required before we declare it suspicious?
93
+ self.occurrenceThreshold = 20
94
+
95
+ #: Ignore "suspicious" detections smaller than some size
96
+ self.minSuspiciousDetectionSize = 0.0
97
+
98
+ #: Ignore "suspicious" detections larger than some size; these are often animals
99
+ #: taking up the whole image. This is expressed as a fraction of the image size.
100
+ self.maxSuspiciousDetectionSize = 0.2
101
+
102
+ #: Ignore folders with more than this many images in them
103
+ self.maxImagesPerFolder = None
104
+
105
+ #: A list of category IDs (ints) that we don't want consider as candidate repeat detections.
106
+ #:
107
+ #: Typically used to say, e.g., "don't bother analyzing people or vehicles for repeat
108
+ #: detections", which you could do by saying excludeClasses = [2,3].
109
+ self.excludeClasses = []
110
+
111
+ #: For very large sets of results, passing chunks of results to and from workers as
112
+ #: parameters ('memory') can be memory-intensive, so we can serialize to intermediate
113
+ #: files instead ('file').
114
+ #:
115
+ #: The use of 'file' here is still experimental.
116
+ self.pass_detections_to_processes_method = 'memory'
117
+
118
+ #: Number of workers to use for parallel operations
119
+ self.nWorkers = 10
120
+
121
+ #: Should we use threads (True) or processes (False) for parallelization?
122
+ #:
123
+ #: Not relevant if nWorkers <= 1, or if bParallelizeComparisons and
124
+ #: bParallelizeRendering are both False.
125
+ self.parallelizationUsesThreads = True
126
+
127
+ #: If this is not empty, we'll load detections from a filter file rather than finding them
128
+ #: from the detector output. This should be a .json file containing detections, generally this
129
+ #: is the detectionIndex.json file in the filtering_* folder produced by find_repeat_detections().
130
+ self.filterFileToLoad = ''
131
+
132
+ #: (optional) List of filenames remaining after deletion of identified
133
+ #: repeated detections that are actually animals. This should be a flat
134
+ #: text file, one relative filename per line.
135
+ #:
136
+ #: This is a pretty esoteric code path and a candidate for removal.
137
+ #:
138
+ #: The scenario where I see it being most useful is the very hypothetical one
139
+ #: where we use an external tool for image handling that allows us to do something
140
+ #: smarter and less destructive than deleting images to mark them as non-false-positives.
141
+ self.filteredFileListToLoad = None
142
+
143
+ #: Should we write the folder of images used to manually review repeat detections?
144
+ self.bWriteFilteringFolder = True
145
+
146
+ #: For debugging: limit comparisons to a specific number of folders
147
+ self.debugMaxDir = -1
148
+
149
+ #: For debugging: limit rendering to a specific number of folders
150
+ self.debugMaxRenderDir = -1
151
+
152
+ #: For debugging: limit comparisons to a specific number of detections
153
+ self.debugMaxRenderDetection = -1
154
+
155
+ #: For debugging: limit comparisons to a specific number of instances
156
+ self.debugMaxRenderInstance = -1
157
+
158
+ #: Should we parallelize (across cameras) comparisons to find repeat detections?
159
+ self.bParallelizeComparisons = True
160
+
161
+ #: Should we parallelize image rendering?
162
+ self.bParallelizeRendering = True
163
+
164
+ #: If this is False (default), a detection from class A is *not* considered to be "the same"
165
+ #: as a detection from class B, even if they're at the same location.
166
+ self.categoryAgnosticComparisons = False
167
+
168
+ #: Determines whether bounding-box rendering errors (typically network errors) should
169
+ #: be treated as failures
170
+ self.bFailOnRenderError = False
171
+
172
+ #: Should we print a warning if images referred to in the MD results file are missing?
173
+ self.bPrintMissingImageWarnings = True
174
+
175
+ #: If bPrintMissingImageWarnings is True, should we print a warning about missing images
176
+ #: just once ('once') or every time ('all')?
177
+ self.missingImageWarningType = 'once' # 'all'
178
+
179
+ #: Image width for rendered images (it's called "max" because we don't resize smaller images).
180
+ #:
181
+ #: Original size is preserved if this is None.
182
+ #:
183
+ #: This does *not* include the tile image grid.
184
+ self.maxOutputImageWidth = None
185
+
186
+ #: Line thickness (in pixels) for box rendering
187
+ self.lineThickness = 10
188
+
189
+ #: Box expansion (in pixels)
190
+ self.boxExpansion = 2
191
+
192
+ #: Progress bar used during comparisons and rendering. Do not set externally.
193
+ #:
194
+ #: :meta private:
195
+ self.pbar = None
196
+
197
+ #: Replace filename tokens after reading, useful when the directory structure
198
+ #: has changed relative to the structure the detector saw.
199
+ self.filenameReplacements = {}
200
+
201
+ #: How many folders up from the leaf nodes should we be going to aggregate images into
202
+ #: cameras?
203
+ #:
204
+ #: If this is zero, each leaf folder is treated as a camera.
205
+ self.nDirLevelsFromLeaf = 0
206
+
207
+ #: An optional function that takes a string (an image file name) and returns
208
+ #: a string (the corresponding folder ID), typically used when multiple folders
209
+ #: actually correspond to the same camera in a manufacturer-specific way (e.g.
210
+ #: a/b/c/RECONYX100 and a/b/c/RECONYX101 may really be the same camera).
211
+ #:
212
+ #: See ct_utils for a common replacement function that handles most common
213
+ #: manufacturer folder names.
214
+ self.customDirNameFunction = None
215
+
216
+ #: Include only specific folders, mutually exclusive with [excludeFolders]
217
+ self.includeFolders = None
218
+
219
+ #: Exclude specific folders, mutually exclusive with [includeFolders]
220
+ self.excludeFolders = None
221
+
222
+ #: Optionally show *other* detections (i.e., detections other than the
223
+ #: one the user is evaluating), typically in a light gray.
224
+ self.bRenderOtherDetections = False
225
+
226
+ #: Threshold to use for *other* detections
227
+ self.otherDetectionsThreshold = 0.2
228
+
229
+ #: Line width (in pixels) for *other* detections
230
+ self.otherDetectionsLineWidth = 1
231
+
232
+ #: Optionally show a grid that includes a sample image for the detection, plus
233
+ #: the top N additional detections
234
+ self.bRenderDetectionTiles = True
235
+
236
+ #: Width of the original image (within the larger output image) when bRenderDetectionTiles
237
+ #: is True.
238
+ #:
239
+ #: If this is None, we'll render the original image in the detection tile image
240
+ #: at its original width.
241
+ self.detectionTilesPrimaryImageWidth = None
242
+
243
+ #: Width to use for the grid of detection instances.
244
+ #:
245
+ #: Can be a width in pixels, or a number from 0 to 1 representing a fraction
246
+ #: of the primary image width.
247
+ #:
248
+ #: If you want to render the grid at exactly 1 pixel wide, I guess you're out
249
+ #: of luck.
250
+ self.detectionTilesCroppedGridWidth = 0.6
251
+
252
+ #: Location of the primary image within the mosaic ('right' or 'left)
253
+ self.detectionTilesPrimaryImageLocation = 'right'
254
+
255
+ #: Maximum number of individual detection instances to include in the mosaic
256
+ self.detectionTilesMaxCrops = 250
257
+
258
+ #: If bRenderOtherDetections is True, what color should we use to render the
259
+ #: (hopefully pretty subtle) non-target detections?
260
+ #:
261
+ #: In theory I'd like these "other detection" rectangles to be partially
262
+ #: transparent, but this is not straightforward, and the alpha is ignored
263
+ #: here. But maybe if I leave it here and wish hard enough, someday it
264
+ #: will work.
265
+ #:
266
+ #: otherDetectionsColors = ['dimgray']
267
+ self.otherDetectionsColors = [(105,105,105,100)]
268
+
269
+ #: Sort detections within a directory so nearby detections are adjacent
270
+ #: in the list, for faster review.
271
+ #:
272
+ #: Can be None, 'xsort', or 'clustersort'
273
+ #:
274
+ #: * None sorts detections chronologically by first occurrence
275
+ #: * 'xsort' sorts detections from left to right
276
+ #: * 'clustersort' clusters detections and sorts by cluster
277
+ self.smartSort = 'xsort'
278
+
279
+ #: Only relevant if smartSort == 'clustersort'
280
+ self.smartSortDistanceThreshold = 0.1
281
+
280
282
 
281
283
  class RepeatDetectionResults:
282
284
  """
283
285
  The results of an entire repeat detection analysis
284
286
  """
285
287
 
286
- #: The data table (Pandas DataFrame), as loaded from the input json file via
287
- #: load_api_results(). Has columns ['file', 'detections','failure'].
288
- detectionResults = None
289
-
290
- #: The other fields in the input json file, loaded via load_api_results()
291
- otherFields = None
292
-
293
- #: The data table after modification
294
- detectionResultsFiltered = None
295
-
296
- #: dict mapping folder names to whole rows from the data table
297
- rowsByDirectory = None
298
-
299
- #: dict mapping filenames to rows in the master table
300
- filenameToRow = None
301
-
302
- #: An array of length nDirs, where each element is a list of DetectionLocation
303
- #: objects for that directory that have been flagged as suspicious
304
- suspiciousDetections = None
305
-
306
- #: The location of the .json file written with information about the RDE
307
- #: review images (typically detectionIndex.json)
308
- filterFile = None
309
-
288
+ def __init__(self):
289
+
290
+ #: The data table (Pandas DataFrame), as loaded from the input json file via
291
+ #: load_api_results(). Has columns ['file', 'detections','failure'].
292
+ self.detectionResults = None
293
+
294
+ #: The other fields in the input json file, loaded via load_api_results()
295
+ self.otherFields = None
296
+
297
+ #: The data table after modification
298
+ self.detectionResultsFiltered = None
299
+
300
+ #: dict mapping folder names to whole rows from the data table
301
+ self.rowsByDirectory = None
302
+
303
+ #: dict mapping filenames to rows in the master table
304
+ self.filenameToRow = None
305
+
306
+ #: An array of length nDirs, where each element is a list of DetectionLocation
307
+ #: objects for that directory that have been flagged as suspicious
308
+ self.suspiciousDetections = None
309
+
310
+ #: The location of the .json file written with information about the RDE
311
+ #: review images (typically detectionIndex.json)
312
+ self.filterFile = None
313
+
310
314
 
311
315
  class IndexedDetection:
312
316
  """
@@ -72,68 +72,70 @@ class SubsetJsonDetectorOutputOptions:
72
72
  """
73
73
  Options used to parameterize subset_json_detector_output()
74
74
  """
75
-
76
- #: Only process files containing the token 'query'
77
- query = None
78
-
79
- #: Replace 'query' with 'replacement' if 'replacement' is not None. If 'query' is None,
80
- #: prepend 'replacement'
81
- replacement = None
82
-
83
- #: Should we split output into individual .json files for each folder?
84
- split_folders = False
85
-
86
- #: Folder level to use for splitting ['bottom','top','n_from_bottom','n_from_top','dict']
87
- #:
88
- #: 'dict' requires 'split_folder_param' to be a dictionary mapping each filename
89
- #: to a token.
90
- split_folder_mode = 'bottom' # 'top'
91
-
92
- #: When using the 'n_from_bottom' parameter to define folder splitting, this
93
- #: defines the number of directories from the bottom. 'n_from_bottom' with
94
- #: a parameter of zero is the same as 'bottom'.
95
- #:
96
- #: Same story with 'n_from_top'.
97
- #:
98
- #: When 'split_folder_mode' is 'dict', this should be a dictionary mapping each filename
99
- #: to a token.
100
- split_folder_param = 0
101
-
102
- #: Only meaningful if split_folders is True: should we convert pathnames to be relative
103
- #: the folder for each .json file?
104
- make_folder_relative = False
105
-
106
- #: Only meaningful if split_folders and make_folder_relative are True: if not None,
107
- #: will copy .json files to their corresponding output directories, relative to
108
- #: output_filename
109
- copy_jsons_to_folders = False
110
-
111
- #: Should we over-write .json files?
112
- overwrite_json_files = False
113
-
114
- #: If copy_jsons_to_folders is true, do we require that directories already exist?
115
- copy_jsons_to_folders_directories_must_exist = True
116
-
117
- #: Optional confidence threshold; if not None, detections below this confidence won't be
118
- #: included in the output.
119
- confidence_threshold = None
120
-
121
- #: Should we remove failed images?
122
- remove_failed_images = False
123
-
124
- #: Either a list of category IDs (as string-ints) (not names), or a dictionary mapping category *IDs*
125
- #: (as string-ints) (not names) to thresholds. Removes non-matching detections, does not
126
- #: remove images. Not technically mutually exclusize with category_names_to_keep, but it's an esoteric
127
- #: scenario indeed where you would want to specify both.
128
- categories_to_keep = None
129
-
130
- #: Either a list of category names (not IDs), or a dictionary mapping category *names* (not IDs) to thresholds.
131
- #: Removes non-matching detections, does not remove images. Not technically mutually exclusize with
132
- #: category_ids_to_keep, but it's an esoteric scenario indeed where you would want to specify both.
133
- category_names_to_keep = None
134
-
135
- #: Set to >0 during testing to limit the number of images that get processed.
136
- debug_max_images = -1
75
+
76
+ def __init__(self):
77
+
78
+ #: Only process files containing the token 'query'
79
+ self.query = None
80
+
81
+ #: Replace 'query' with 'replacement' if 'replacement' is not None. If 'query' is None,
82
+ #: prepend 'replacement'
83
+ self.replacement = None
84
+
85
+ #: Should we split output into individual .json files for each folder?
86
+ self.split_folders = False
87
+
88
+ #: Folder level to use for splitting ['bottom','top','n_from_bottom','n_from_top','dict']
89
+ #:
90
+ #: 'dict' requires 'split_folder_param' to be a dictionary mapping each filename
91
+ #: to a token.
92
+ self.split_folder_mode = 'bottom' # 'top'
93
+
94
+ #: When using the 'n_from_bottom' parameter to define folder splitting, this
95
+ #: defines the number of directories from the bottom. 'n_from_bottom' with
96
+ #: a parameter of zero is the same as 'bottom'.
97
+ #:
98
+ #: Same story with 'n_from_top'.
99
+ #:
100
+ #: When 'split_folder_mode' is 'dict', this should be a dictionary mapping each filename
101
+ #: to a token.
102
+ self.split_folder_param = 0
103
+
104
+ #: Only meaningful if split_folders is True: should we convert pathnames to be relative
105
+ #: the folder for each .json file?
106
+ self.make_folder_relative = False
107
+
108
+ #: Only meaningful if split_folders and make_folder_relative are True: if not None,
109
+ #: will copy .json files to their corresponding output directories, relative to
110
+ #: output_filename
111
+ self.copy_jsons_to_folders = False
112
+
113
+ #: Should we over-write .json files?
114
+ self.overwrite_json_files = False
115
+
116
+ #: If copy_jsons_to_folders is true, do we require that directories already exist?
117
+ self.copy_jsons_to_folders_directories_must_exist = True
118
+
119
+ #: Optional confidence threshold; if not None, detections below this confidence won't be
120
+ #: included in the output.
121
+ self.confidence_threshold = None
122
+
123
+ #: Should we remove failed images?
124
+ self.remove_failed_images = False
125
+
126
+ #: Either a list of category IDs (as string-ints) (not names), or a dictionary mapping category *IDs*
127
+ #: (as string-ints) (not names) to thresholds. Removes non-matching detections, does not
128
+ #: remove images. Not technically mutually exclusize with category_names_to_keep, but it's an esoteric
129
+ #: scenario indeed where you would want to specify both.
130
+ self.categories_to_keep = None
131
+
132
+ #: Either a list of category names (not IDs), or a dictionary mapping category *names* (not IDs) to thresholds.
133
+ #: Removes non-matching detections, does not remove images. Not technically mutually exclusize with
134
+ #: category_ids_to_keep, but it's an esoteric scenario indeed where you would want to specify both.
135
+ self.category_names_to_keep = None
136
+
137
+ #: Set to >0 during testing to limit the number of images that get processed.
138
+ self.debug_max_images = -1
137
139
 
138
140
 
139
141
  #%% Main function
@@ -466,6 +468,8 @@ def subset_json_detector_output(input_filename, output_filename, options, data=N
466
468
 
467
469
  if options is None:
468
470
  options = SubsetJsonDetectorOutputOptions()
471
+ else:
472
+ options = copy.deepcopy(options)
469
473
 
470
474
  # Input validation
471
475
  if options.copy_jsons_to_folders:
@@ -397,8 +397,6 @@ images_per_query = 15
397
397
  min_valid_images_per_query = 3
398
398
  min_valid_image_size = 3000
399
399
 
400
- # TODO: trivially prallelizable
401
- #
402
400
  # i_row = 0; row = df.iloc[i_row]
403
401
  for i_row,row in df.iterrows():
404
402
 
@@ -117,7 +117,8 @@ def pretty_print_object(obj, b_print=True):
117
117
 
118
118
  # _ = pretty_print_object(obj)
119
119
 
120
- # TODO: it's sloppy that I'm making a module-wide change here.
120
+ # TODO: it's sloppy that I'm making a module-wide change here, consider at least
121
+ # recording these operations and re-setting them at the end of this function.
121
122
  jsonpickle.set_encoder_options('json', sort_keys=True, indent=2)
122
123
  a = jsonpickle.encode(obj)
123
124
  s = '{}'.format(a)
@@ -360,9 +361,9 @@ def rect_distance(r1, r2, format='x0y0x1y1'):
360
361
 
361
362
  def split_list_into_fixed_size_chunks(L,n):
362
363
  """
363
- Split the list or tuple L into chunks of size n (allowing chunks of size n-1 if necessary,
364
- i.e. len(L) does not have to be a multiple of n).
365
-
364
+ Split the list or tuple L into chunks of size n (allowing at most one chunk with size
365
+ less than N, i.e. len(L) does not have to be a multiple of n).
366
+
366
367
  Args:
367
368
  L (list): list to split into chunks
368
369
  n (int): preferred chunk size