megadetector 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show
  1. megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
  2. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
  3. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  7. megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
  8. megadetector/classification/aggregate_classifier_probs.py +3 -3
  9. megadetector/classification/analyze_failed_images.py +5 -5
  10. megadetector/classification/cache_batchapi_outputs.py +5 -5
  11. megadetector/classification/create_classification_dataset.py +11 -12
  12. megadetector/classification/crop_detections.py +10 -10
  13. megadetector/classification/csv_to_json.py +8 -8
  14. megadetector/classification/detect_and_crop.py +13 -15
  15. megadetector/classification/evaluate_model.py +7 -7
  16. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  17. megadetector/classification/json_to_azcopy_list.py +1 -1
  18. megadetector/classification/json_validator.py +29 -32
  19. megadetector/classification/map_classification_categories.py +9 -9
  20. megadetector/classification/merge_classification_detection_output.py +12 -9
  21. megadetector/classification/prepare_classification_script.py +19 -19
  22. megadetector/classification/prepare_classification_script_mc.py +23 -23
  23. megadetector/classification/run_classifier.py +4 -4
  24. megadetector/classification/save_mislabeled.py +6 -6
  25. megadetector/classification/train_classifier.py +1 -1
  26. megadetector/classification/train_classifier_tf.py +9 -9
  27. megadetector/classification/train_utils.py +10 -10
  28. megadetector/data_management/annotations/annotation_constants.py +1 -1
  29. megadetector/data_management/camtrap_dp_to_coco.py +45 -45
  30. megadetector/data_management/cct_json_utils.py +101 -101
  31. megadetector/data_management/cct_to_md.py +49 -49
  32. megadetector/data_management/cct_to_wi.py +33 -33
  33. megadetector/data_management/coco_to_labelme.py +75 -75
  34. megadetector/data_management/coco_to_yolo.py +189 -189
  35. megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
  36. megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
  37. megadetector/data_management/databases/integrity_check_json_db.py +202 -188
  38. megadetector/data_management/databases/subset_json_db.py +33 -33
  39. megadetector/data_management/generate_crops_from_cct.py +38 -38
  40. megadetector/data_management/get_image_sizes.py +54 -49
  41. megadetector/data_management/labelme_to_coco.py +130 -124
  42. megadetector/data_management/labelme_to_yolo.py +78 -72
  43. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  44. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  45. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  46. megadetector/data_management/lila/download_lila_subset.py +21 -24
  47. megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
  48. megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
  49. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  50. megadetector/data_management/lila/lila_common.py +70 -70
  51. megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
  52. megadetector/data_management/mewc_to_md.py +339 -340
  53. megadetector/data_management/ocr_tools.py +258 -252
  54. megadetector/data_management/read_exif.py +231 -224
  55. megadetector/data_management/remap_coco_categories.py +26 -26
  56. megadetector/data_management/remove_exif.py +31 -20
  57. megadetector/data_management/rename_images.py +187 -187
  58. megadetector/data_management/resize_coco_dataset.py +41 -41
  59. megadetector/data_management/speciesnet_to_md.py +41 -41
  60. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  61. megadetector/data_management/yolo_output_to_md_output.py +117 -120
  62. megadetector/data_management/yolo_to_coco.py +195 -188
  63. megadetector/detection/change_detection.py +831 -0
  64. megadetector/detection/process_video.py +340 -337
  65. megadetector/detection/pytorch_detector.py +304 -262
  66. megadetector/detection/run_detector.py +177 -164
  67. megadetector/detection/run_detector_batch.py +364 -363
  68. megadetector/detection/run_inference_with_yolov5_val.py +328 -325
  69. megadetector/detection/run_tiled_inference.py +256 -249
  70. megadetector/detection/tf_detector.py +24 -24
  71. megadetector/detection/video_utils.py +290 -282
  72. megadetector/postprocessing/add_max_conf.py +15 -11
  73. megadetector/postprocessing/categorize_detections_by_size.py +44 -44
  74. megadetector/postprocessing/classification_postprocessing.py +415 -415
  75. megadetector/postprocessing/combine_batch_outputs.py +20 -21
  76. megadetector/postprocessing/compare_batch_results.py +528 -517
  77. megadetector/postprocessing/convert_output_format.py +97 -97
  78. megadetector/postprocessing/create_crop_folder.py +219 -146
  79. megadetector/postprocessing/detector_calibration.py +173 -168
  80. megadetector/postprocessing/generate_csv_report.py +508 -499
  81. megadetector/postprocessing/load_api_results.py +23 -20
  82. megadetector/postprocessing/md_to_coco.py +129 -98
  83. megadetector/postprocessing/md_to_labelme.py +89 -83
  84. megadetector/postprocessing/md_to_wi.py +40 -40
  85. megadetector/postprocessing/merge_detections.py +87 -114
  86. megadetector/postprocessing/postprocess_batch_results.py +313 -298
  87. megadetector/postprocessing/remap_detection_categories.py +36 -36
  88. megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
  89. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  90. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  91. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
  92. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  93. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  94. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  95. megadetector/postprocessing/validate_batch_results.py +70 -70
  96. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  97. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
  98. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
  99. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -66
  100. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  101. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  102. megadetector/taxonomy_mapping/species_lookup.py +33 -33
  103. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  104. megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
  105. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  106. megadetector/utils/azure_utils.py +22 -22
  107. megadetector/utils/ct_utils.py +1018 -200
  108. megadetector/utils/directory_listing.py +21 -77
  109. megadetector/utils/gpu_test.py +22 -22
  110. megadetector/utils/md_tests.py +541 -518
  111. megadetector/utils/path_utils.py +1457 -398
  112. megadetector/utils/process_utils.py +41 -41
  113. megadetector/utils/sas_blob_utils.py +53 -49
  114. megadetector/utils/split_locations_into_train_val.py +61 -61
  115. megadetector/utils/string_utils.py +147 -26
  116. megadetector/utils/url_utils.py +463 -173
  117. megadetector/utils/wi_utils.py +2629 -2526
  118. megadetector/utils/write_html_image_list.py +137 -137
  119. megadetector/visualization/plot_utils.py +21 -21
  120. megadetector/visualization/render_images_with_thumbnails.py +37 -73
  121. megadetector/visualization/visualization_utils.py +401 -397
  122. megadetector/visualization/visualize_db.py +197 -190
  123. megadetector/visualization/visualize_detector_output.py +79 -73
  124. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/METADATA +135 -132
  125. megadetector-5.0.29.dist-info/RECORD +163 -0
  126. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
  127. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
  128. {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
  129. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  130. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  131. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  132. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  133. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  134. megadetector/data_management/importers/awc_to_json.py +0 -191
  135. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  136. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  137. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  138. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  139. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  140. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  141. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  142. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  143. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  144. megadetector/data_management/importers/ena24_to_json.py +0 -276
  145. megadetector/data_management/importers/filenames_to_json.py +0 -386
  146. megadetector/data_management/importers/helena_to_cct.py +0 -283
  147. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  148. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  149. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  150. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  151. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  152. megadetector/data_management/importers/missouri_to_json.py +0 -490
  153. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  154. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  155. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  156. megadetector/data_management/importers/pc_to_json.py +0 -365
  157. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  158. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  159. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  160. megadetector/data_management/importers/rspb_to_json.py +0 -356
  161. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  162. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  163. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  164. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  165. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  166. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  167. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  168. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  169. megadetector/data_management/importers/ubc_to_json.py +0 -399
  170. megadetector/data_management/importers/umn_to_json.py +0 -507
  171. megadetector/data_management/importers/wellington_to_json.py +0 -263
  172. megadetector/data_management/importers/wi_to_json.py +0 -442
  173. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  174. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  175. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  176. megadetector-5.0.28.dist-info/RECORD +0 -209
@@ -5,16 +5,16 @@ subset_json_detector_output.py
5
5
  Creates one or more subsets of a detector results file (.json), doing either
6
6
  or both of the following (if both are requested, they happen in this order):
7
7
 
8
- 1) Retrieve all elements where filenames contain a specified query string,
9
- optionally replacing that query with a replacement token. If the query is blank,
8
+ 1) Retrieve all elements where filenames contain a specified query string,
9
+ optionally replacing that query with a replacement token. If the query is blank,
10
10
  can also be used to prepend content to all filenames.
11
11
 
12
12
  Does not support regex's, but supports a special case of ^string to indicate "must start with
13
13
  to match".
14
14
 
15
- 2) Create separate .jsons for each unique path, optionally making the filenames
16
- in those .json's relative paths. In this case, you specify an output directory,
17
- rather than an output path. All images in the folder blah/foo/bar will end up
15
+ 2) Create separate .jsons for each unique path, optionally making the filenames
16
+ in those .json's relative paths. In this case, you specify an output directory,
17
+ rather than an output path. All images in the folder blah/foo/bar will end up
18
18
  in a .json file called blah_foo_bar.json.
19
19
 
20
20
  Can also apply a confidence threshold.
@@ -26,16 +26,20 @@ To subset a COCO Camera Traps .json database, see subset_json_db.py
26
26
 
27
27
  **Sample invocation (splitting into multiple json's)**
28
28
 
29
- Read from "1800_idfg_statewide_wolf_detections_w_classifications.json", split up into
29
+ Read from "1800_idfg_statewide_wolf_detections_w_classifications.json", split up into
30
30
  individual .jsons in 'd:/temp/idfg/output', making filenames relative to their individual
31
31
  folders:
32
32
 
33
- python subset_json_detector_output.py "d:/temp/idfg/1800_idfg_statewide_wolf_detections_w_classifications.json" "d:/temp/idfg/output" --split_folders --make_folder_relative
33
+ python subset_json_detector_output.py ^
34
+ "d:/temp/idfg/1800_idfg_statewide_wolf_detections_w_classifications.json" "d:/temp/idfg/output" ^
35
+ --split_folders --make_folder_relative
34
36
 
35
37
  Now do the same thing, but instead of writing .json's to d:/temp/idfg/output, write them to *subfolders*
36
38
  corresponding to the subfolders for each .json file.
37
39
 
38
- python subset_json_detector_output.py "d:/temp/idfg/1800_detections_S2.json" "d:/temp/idfg/output_to_folders" --split_folders --make_folder_relative --copy_jsons_to_folders
40
+ python subset_json_detector_output.py ^
41
+ "d:/temp/idfg/1800_detections_S2.json" "d:/temp/idfg/output_to_folders" ^
42
+ --split_folders --make_folder_relative --copy_jsons_to_folders
39
43
 
40
44
  **Sample invocation (creating a single subset matching a query)**
41
45
 
@@ -43,11 +47,13 @@ Read from "1800_detections.json", write to "1800_detections_2017.json"
43
47
 
44
48
  Include only images matching "2017", and change "2017" to "blah"
45
49
 
46
- python subset_json_detector_output.py "d:/temp/1800_detections.json" "d:/temp/1800_detections_2017_blah.json" --query 2017 --replacement blah
50
+ python subset_json_detector_output.py "d:/temp/1800_detections.json" "d:/temp/1800_detections_2017_blah.json" ^
51
+ --query 2017 --replacement blah
47
52
 
48
53
  Include all images, prepend with "prefix/"
49
54
 
50
- python subset_json_detector_output.py "d:/temp/1800_detections.json" "d:/temp/1800_detections_prefix.json" --replacement "prefix/"
55
+ python subset_json_detector_output.py "d:/temp/1800_detections.json" "d:/temp/1800_detections_prefix.json" ^
56
+ --replacement "prefix/"
51
57
 
52
58
  """
53
59
 
@@ -61,10 +67,9 @@ import os
61
67
  import re
62
68
 
63
69
  from tqdm import tqdm
64
- from collections import defaultdict
65
70
 
71
+ from megadetector.utils import ct_utils
66
72
  from megadetector.utils.ct_utils import args_to_object, get_max_conf, invert_dictionary
67
- from megadetector.utils.path_utils import top_level_folder
68
73
  from megadetector.utils.path_utils import recursive_file_list
69
74
 
70
75
 
@@ -76,23 +81,23 @@ class SubsetJsonDetectorOutputOptions:
76
81
  """
77
82
 
78
83
  def __init__(self):
79
-
84
+
80
85
  #: Only process files containing the token 'query'
81
86
  self.query = None
82
-
87
+
83
88
  #: Replace 'query' with 'replacement' if 'replacement' is not None. If 'query' is None,
84
89
  #: prepend 'replacement'
85
90
  self.replacement = None
86
-
91
+
87
92
  #: Should we split output into individual .json files for each folder?
88
93
  self.split_folders = False
89
-
90
- #: Folder level to use for splitting ['bottom','top','n_from_bottom','n_from_top','dict']
94
+
95
+ #: Folder level to use for splitting ['bottom','n_from_bottom','n_from_top','dict']
91
96
  #:
92
97
  #: 'dict' requires 'split_folder_param' to be a dictionary mapping each filename
93
98
  #: to a token.
94
- self.split_folder_mode = 'bottom' # 'top'
95
-
99
+ self.split_folder_mode = 'bottom'
100
+
96
101
  #: When using the 'n_from_bottom' parameter to define folder splitting, this
97
102
  #: defines the number of directories from the bottom. 'n_from_bottom' with
98
103
  #: a parameter of zero is the same as 'bottom'.
@@ -102,78 +107,77 @@ class SubsetJsonDetectorOutputOptions:
102
107
  #: When 'split_folder_mode' is 'dict', this should be a dictionary mapping each filename
103
108
  #: to a token.
104
109
  self.split_folder_param = 0
105
-
110
+
106
111
  #: Only meaningful if split_folders is True: should we convert pathnames to be relative
107
112
  #: the folder for each .json file?
108
113
  self.make_folder_relative = False
109
-
110
- #: Only meaningful if split_folders and make_folder_relative are True: if not None,
111
- #: will copy .json files to their corresponding output directories, relative to
114
+
115
+ #: Only meaningful if split_folders and make_folder_relative are True: if not None,
116
+ #: will copy .json files to their corresponding output directories, relative to
112
117
  #: output_filename
113
118
  self.copy_jsons_to_folders = False
114
-
119
+
115
120
  #: Should we over-write .json files?
116
121
  self.overwrite_json_files = False
117
-
122
+
118
123
  #: If copy_jsons_to_folders is true, do we require that directories already exist?
119
124
  self.copy_jsons_to_folders_directories_must_exist = True
120
-
125
+
121
126
  #: Optional confidence threshold; if not None, detections below this confidence won't be
122
127
  #: included in the output.
123
128
  self.confidence_threshold = None
124
-
129
+
125
130
  #: Should we remove failed images?
126
131
  self.remove_failed_images = False
127
-
128
- #: Either a list of category IDs (as string-ints) (not names), or a dictionary mapping category *IDs*
129
- #: (as string-ints) (not names) to thresholds. Removes non-matching detections, does not
130
- #: remove images. Not technically mutually exclusize with category_names_to_keep, but it's an esoteric
132
+
133
+ #: Either a list of category IDs (as string-ints) (not names), or a dictionary mapping category *IDs*
134
+ #: (as string-ints) (not names) to thresholds. Removes non-matching detections, does not
135
+ #: remove images. Not technically mutually exclusize with category_names_to_keep, but it's an esoteric
131
136
  #: scenario indeed where you would want to specify both.
132
137
  self.categories_to_keep = None
133
-
134
- #: Either a list of category names (not IDs), or a dictionary mapping category *names* (not IDs) to thresholds.
135
- #: Removes non-matching detections, does not remove images. Not technically mutually exclusize with
138
+
139
+ #: Either a list of category names (not IDs), or a dictionary mapping category *names* (not IDs) to thresholds.
140
+ #: Removes non-matching detections, does not remove images. Not technically mutually exclusize with
136
141
  #: category_ids_to_keep, but it's an esoteric scenario indeed where you would want to specify both.
137
142
  self.category_names_to_keep = None
138
-
143
+
139
144
  #: Set to >0 during testing to limit the number of images that get processed.
140
145
  self.debug_max_images = -1
141
-
146
+
142
147
  #: Keep only files in this list, which can be a .json results file or a folder.
143
148
  #
144
149
  #: Assumes that the input .json file contains relative paths when comparing to a folder.
145
150
  self.keep_files_in_list = None
146
-
147
- #: Remove classification with <= N instances. Does not re-map categories
151
+
152
+ #: Remove classification with <= N instances. Does not re-map categories
148
153
  #: to be contiguous. Set to 1 to remove empty categories only.
149
154
  self.remove_classification_categories_below_count = None
150
-
155
+
151
156
  # ...class SubsetJsonDetectorOutputOptions
152
157
 
153
-
158
+
154
159
  #%% Main function
155
160
 
156
161
  def _write_detection_results(data, output_filename, options):
157
162
  """
158
163
  Writes the detector-output-formatted dict *data* to *output_filename*.
159
164
  """
160
-
165
+
161
166
  if (not options.overwrite_json_files) and os.path.isfile(output_filename):
162
167
  raise ValueError('File {} exists'.format(output_filename))
163
-
168
+
164
169
  basedir = os.path.dirname(output_filename)
165
-
170
+
166
171
  if options.copy_jsons_to_folders and options.copy_jsons_to_folders_directories_must_exist:
167
172
  if not os.path.isdir(basedir):
168
173
  raise ValueError('Directory {} does not exist'.format(basedir))
169
174
  else:
170
175
  os.makedirs(basedir, exist_ok=True)
171
-
176
+
172
177
  n_images = len(data['images'])
173
-
178
+
174
179
  print('Writing detection output (with {} images) to {}'.format(n_images,output_filename))
175
- with open(output_filename, 'w', newline='\n') as f:
176
- json.dump(data,f,indent=1)
180
+ ct_utils.write_json(output_filename, data)
177
181
 
178
182
  # ...def _write_detection_results(...)
179
183
 
@@ -182,25 +186,25 @@ def remove_classification_categories_below_count(data, options):
182
186
  """
183
187
  Removes all classification categories below a threshold count. Does not re-map
184
188
  classification category IDs.
185
-
189
+
186
190
  Args:
187
191
  data (dict): data loaded from a MD results file
188
192
  options (SubsetJsonDetectorOutputOptions): parameters for subsetting
189
-
193
+
190
194
  Returns:
191
195
  dict: Possibly-modified version of [data] (also modifies in place)
192
196
  """
193
-
197
+
194
198
  if options.remove_classification_categories_below_count is None:
195
199
  return data
196
200
  if 'classification_categories' not in data:
197
201
  return data
198
-
202
+
199
203
  classification_category_id_to_count = {}
200
-
204
+
201
205
  for classification_category_id in data['classification_categories']:
202
206
  classification_category_id_to_count[classification_category_id] = 0
203
-
207
+
204
208
  # Count the number of occurrences of each classification category
205
209
  for im in data['images']:
206
210
  if 'detections' not in im or im['detections'] is None:
@@ -211,8 +215,8 @@ def remove_classification_categories_below_count(data, options):
211
215
  for classification in det['classifications']:
212
216
  classification_category_id_to_count[classification[0]] = \
213
217
  classification_category_id_to_count[classification[0]] + 1
214
-
215
-
218
+
219
+
216
220
  # Which categories have above-threshold counts?
217
221
  classification_category_ids_to_keep = set()
218
222
 
@@ -220,18 +224,18 @@ def remove_classification_categories_below_count(data, options):
220
224
  if classification_category_id_to_count[classification_category_id] > \
221
225
  options.remove_classification_categories_below_count:
222
226
  classification_category_ids_to_keep.add(classification_category_id)
223
-
227
+
224
228
  n_categories_removed = \
225
229
  len(classification_category_id_to_count) - \
226
230
  len(classification_category_ids_to_keep)
227
-
231
+
228
232
  print('Removing {} of {} classification categories'.format(
229
233
  n_categories_removed,len(classification_category_id_to_count)))
230
-
234
+
231
235
  if n_categories_removed == 0:
232
236
  return data
233
-
234
-
237
+
238
+
235
239
  # Filter the category list
236
240
  output_classification_categories = {}
237
241
  for category_id in data['classification_categories']:
@@ -240,8 +244,8 @@ def remove_classification_categories_below_count(data, options):
240
244
  data['classification_categories'][category_id]
241
245
  data['classification_categories'] = output_classification_categories
242
246
  assert len(data['classification_categories']) == len(classification_category_ids_to_keep)
243
-
244
-
247
+
248
+
245
249
  # If necessary, filter the category descriptions
246
250
  if 'classification_category_descriptions' in data:
247
251
  output_classification_category_descriptions = {}
@@ -249,8 +253,8 @@ def remove_classification_categories_below_count(data, options):
249
253
  if category_id in classification_category_ids_to_keep:
250
254
  output_classification_category_descriptions[category_id] = \
251
255
  data['classification_category_descriptions'][category_id]
252
- data['classification_category_descriptions'] = output_classification_category_descriptions
253
-
256
+ data['classification_category_descriptions'] = output_classification_category_descriptions
257
+
254
258
  # Filter images
255
259
  for im in data['images']:
256
260
  if 'detections' not in im or im['detections'] is None:
@@ -263,7 +267,7 @@ def remove_classification_categories_below_count(data, options):
263
267
  if classification[0] in classification_category_ids_to_keep:
264
268
  classifications_to_keep.append(classification)
265
269
  det['classifications'] = classifications_to_keep
266
-
270
+
267
271
  return data
268
272
 
269
273
  # ...def remove_classification_categories_below_count(...)
@@ -272,34 +276,34 @@ def remove_classification_categories_below_count(data, options):
272
276
  def subset_json_detector_output_by_confidence(data, options):
273
277
  """
274
278
  Removes all detections below options.confidence_threshold.
275
-
279
+
276
280
  Args:
277
281
  data (dict): data loaded from a MD results file
278
282
  options (SubsetJsonDetectorOutputOptions): parameters for subsetting
279
-
283
+
280
284
  Returns:
281
285
  dict: Possibly-modified version of [data] (also modifies in place)
282
286
  """
283
-
287
+
284
288
  if options.confidence_threshold is None:
285
289
  return data
286
-
290
+
287
291
  images_in = data['images']
288
- images_out = []
289
-
292
+ images_out = []
293
+
290
294
  print('Subsetting by confidence >= {}'.format(options.confidence_threshold))
291
-
295
+
292
296
  n_max_changes = 0
293
-
297
+
294
298
  # im = images_in[0]
295
299
  for i_image, im in tqdm(enumerate(images_in), total=len(images_in)):
296
-
300
+
297
301
  # Always keep failed images; if the caller wants to remove these, they
298
302
  # will use remove_failed_images
299
303
  if ('detections' not in im) or (im['detections'] is None):
300
304
  images_out.append(im)
301
305
  continue
302
-
306
+
303
307
  p_orig = get_max_conf(im)
304
308
 
305
309
  # Find all detections above threshold for this image
@@ -308,7 +312,7 @@ def subset_json_detector_output_by_confidence(data, options):
308
312
  # If there are no detections above threshold, set the max probability
309
313
  # to -1, unless it already had a negative probability.
310
314
  if len(detections) == 0:
311
- if p_orig <= 0:
315
+ if p_orig <= 0:
312
316
  p = p_orig
313
317
  else:
314
318
  p = -1
@@ -316,7 +320,7 @@ def subset_json_detector_output_by_confidence(data, options):
316
320
  # Otherwise find the max confidence
317
321
  else:
318
322
  p = max([d['conf'] for d in detections])
319
-
323
+
320
324
  im['detections'] = detections
321
325
 
322
326
  # Did this thresholding result in a max-confidence change?
@@ -326,18 +330,18 @@ def subset_json_detector_output_by_confidence(data, options):
326
330
  assert (p_orig <= 0) or (p < p_orig), \
327
331
  'Confidence changed from {} to {}'.format(p_orig, p)
328
332
  n_max_changes += 1
329
-
333
+
330
334
  if 'max_detection_conf' in im:
331
335
  im['max_detection_conf'] = p
332
-
336
+
333
337
  images_out.append(im)
334
-
335
- # ...for each image
336
-
337
- data['images'] = images_out
338
+
339
+ # ...for each image
340
+
341
+ data['images'] = images_out
338
342
  print('done, found {} matches (of {}), {} max conf changes'.format(
339
343
  len(data['images']),len(images_in),n_max_changes))
340
-
344
+
341
345
  return data
342
346
 
343
347
  # ...def subset_json_detector_output_by_confidence(...)
@@ -347,20 +351,20 @@ def subset_json_detector_output_by_list(data, options):
347
351
  """
348
352
  Keeps only files in options.keep_files_in_list, which can be a .json results file or a folder.
349
353
  Assumes that the input .json file contains relative paths when comparing to a folder.
350
-
354
+
351
355
  Args:
352
356
  data (dict): data loaded from a MD results file
353
357
  options (SubsetJsonDetectorOutputOptions): parameters for subsetting
354
-
358
+
355
359
  Returns:
356
360
  dict: Possibly-modified version of [data] (also modifies in place)
357
361
  """
358
-
362
+
359
363
  if options.keep_files_in_list is None:
360
364
  return
361
-
365
+
362
366
  files_to_keep = None
363
-
367
+
364
368
  if os.path.isfile(options.keep_files_in_list):
365
369
  with open(options.keep_files_in_list,'r') as f:
366
370
  d = json.load(f)
@@ -371,36 +375,36 @@ def subset_json_detector_output_by_list(data, options):
371
375
  else:
372
376
  raise ValueError('Subsetting .json file by list: {} is neither a .json results file nor a folder'.format(
373
377
  options.keep_files_in_list))
374
-
378
+
375
379
  files_to_keep = [fn.replace('\\','/') for fn in files_to_keep]
376
380
  files_to_keep_set = set(files_to_keep)
377
-
381
+
378
382
  images_to_keep = []
379
-
383
+
380
384
  for im in data['images']:
381
385
  fn = im['file'].replace('\\','/')
382
386
  if fn in files_to_keep_set:
383
387
  images_to_keep.append(im)
384
-
388
+
385
389
  data['images'] = images_to_keep
386
-
390
+
387
391
  return data
388
392
 
389
393
  # ...def subset_json_detector_output_by_list(...)
390
394
 
391
-
395
+
392
396
  def subset_json_detector_output_by_categories(data, options):
393
397
  """
394
398
  Removes all detections without detections above a threshold for specific categories.
395
-
399
+
396
400
  Args:
397
401
  data (dict): data loaded from a MD results file
398
402
  options (SubsetJsonDetectorOutputOptions): parameters for subsetting
399
-
403
+
400
404
  Returns:
401
405
  dict: Possibly-modified version of [data] (also modifies in place)
402
406
  """
403
-
407
+
404
408
  # If categories_to_keep is supplied as a list, convert to a dict
405
409
  if options.categories_to_keep is not None:
406
410
  if not isinstance(options.categories_to_keep, dict):
@@ -409,7 +413,7 @@ def subset_json_detector_output_by_categories(data, options):
409
413
  # Set unspecified thresholds to a silly negative value
410
414
  dict_categories_to_keep[category_id] = -100000.0
411
415
  options.categories_to_keep = dict_categories_to_keep
412
-
416
+
413
417
  # If category_names_to_keep is supplied as a list, convert to a dict
414
418
  if options.category_names_to_keep is not None:
415
419
  if not isinstance(options.category_names_to_keep, dict):
@@ -418,9 +422,9 @@ def subset_json_detector_output_by_categories(data, options):
418
422
  # Set unspecified thresholds to a silly negative value
419
423
  dict_category_names_to_keep[category_name] = -100000.0
420
424
  options.category_names_to_keep = dict_category_names_to_keep
421
-
425
+
422
426
  category_name_to_category_id = invert_dictionary(data['detection_categories'])
423
-
427
+
424
428
  # If some categories are supplied as names, convert all to IDs and add to "categories_to_keep"
425
429
  if options.category_names_to_keep is not None:
426
430
  if options.categories_to_keep is None:
@@ -433,16 +437,16 @@ def subset_json_detector_output_by_categories(data, options):
433
437
  'Category {} ({}) specified as both a name and an ID'.format(
434
438
  category_name,category_id)
435
439
  options.categories_to_keep[category_id] = options.category_names_to_keep[category_name]
436
-
440
+
437
441
  if options.categories_to_keep is None:
438
442
  return data
439
-
443
+
440
444
  images_in = data['images']
441
- images_out = []
442
-
445
+ images_out = []
446
+
443
447
  print('Subsetting by categories (keeping {} categories):'.format(
444
448
  len(options.categories_to_keep)))
445
-
449
+
446
450
  for category_id in sorted(list(options.categories_to_keep.keys())):
447
451
  if category_id not in data['detection_categories']:
448
452
  print('Warning: category ID {} not in category map in this file'.format(category_id))
@@ -451,28 +455,28 @@ def subset_json_detector_output_by_categories(data, options):
451
455
  category_id,
452
456
  data['detection_categories'][category_id],
453
457
  options.categories_to_keep[category_id]))
454
-
458
+
455
459
  n_detections_in = 0
456
460
  n_detections_kept = 0
457
-
461
+
458
462
  # im = images_in[0]
459
463
  for i_image, im in tqdm(enumerate(images_in), total=len(images_in)):
460
-
464
+
461
465
  # Always keep failed images; if the caller wants to remove these, they
462
- # will use remove_failed_images
466
+ # will use remove_failed_images
463
467
  if ('detections' not in im) or (im['detections'] is None):
464
468
  images_out.append(im)
465
469
  continue
466
-
470
+
467
471
  n_detections_in += len(im['detections'])
468
-
472
+
469
473
  # Find all matching detections for this image
470
474
  detections = []
471
475
  for d in im['detections']:
472
476
  if (d['category'] in options.categories_to_keep) and \
473
477
  (d['conf'] > options.categories_to_keep[d['category']]):
474
478
  detections.append(d)
475
-
479
+
476
480
  im['detections'] = detections
477
481
 
478
482
  if 'max_detection_conf' in im:
@@ -481,17 +485,17 @@ def subset_json_detector_output_by_categories(data, options):
481
485
  else:
482
486
  p = max([d['conf'] for d in detections])
483
487
  im['max_detection_conf'] = p
484
-
488
+
485
489
  n_detections_kept += len(im['detections'])
486
-
490
+
487
491
  images_out.append(im)
488
-
489
- # ...for each image
490
-
491
- data['images'] = images_out
492
+
493
+ # ...for each image
494
+
495
+ data['images'] = images_out
492
496
  print('done, kept {} detections (of {})'.format(
493
497
  n_detections_kept,n_detections_in))
494
-
498
+
495
499
  return data
496
500
 
497
501
  # ...def subset_json_detector_output_by_categories(...)
@@ -500,37 +504,37 @@ def subset_json_detector_output_by_categories(data, options):
500
504
  def remove_failed_images(data,options):
501
505
  """
502
506
  Removed failed images from [data]
503
-
507
+
504
508
  Args:
505
509
  data (dict): data loaded from a MD results file
506
510
  options (SubsetJsonDetectorOutputOptions): parameters for subsetting
507
-
511
+
508
512
  Returns:
509
513
  dict: Possibly-modified version of [data] (also modifies in place)
510
514
  """
511
-
515
+
512
516
  images_in = data['images']
513
- images_out = []
514
-
517
+ images_out = []
518
+
515
519
  if not options.remove_failed_images:
516
520
  return data
517
-
521
+
518
522
  print('Removing failed images...', end='')
519
-
523
+
520
524
  # i_image = 0; im = images_in[0]
521
525
  for i_image, im in tqdm(enumerate(images_in), total=len(images_in)):
522
-
526
+
523
527
  if 'failure' in im and isinstance(im['failure'],str):
524
528
  continue
525
529
  else:
526
530
  images_out.append(im)
527
-
528
- # ...for each image
529
-
530
- data['images'] = images_out
531
+
532
+ # ...for each image
533
+
534
+ data['images'] = images_out
531
535
  n_removed = len(images_in) - len(data['images'])
532
536
  print('Done, removed {} of {}'.format(n_removed, len(images_in)))
533
-
537
+
534
538
  return data
535
539
 
536
540
  # ...def remove_failed_images(...)
@@ -538,35 +542,35 @@ def remove_failed_images(data,options):
538
542
 
539
543
  def subset_json_detector_output_by_query(data, options):
540
544
  """
541
- Subsets to images whose filename matches options.query; replace all instances of
545
+ Subsets to images whose filename matches options.query; replace all instances of
542
546
  options.query with options.replacement. No-op if options.query_string is None or ''.
543
-
547
+
544
548
  Args:
545
549
  data (dict): data loaded from a MD results file
546
550
  options (SubsetJsonDetectorOutputOptions): parameters for subsetting
547
-
551
+
548
552
  Returns:
549
553
  dict: Possibly-modified version of [data] (also modifies in place)
550
554
  """
551
-
555
+
552
556
  images_in = data['images']
553
- images_out = []
554
-
557
+ images_out = []
558
+
555
559
  print('Subsetting by query {}, replacement {}...'.format(options.query, options.replacement), end='')
556
-
560
+
557
561
  query_string = options.query
558
562
  query_starts_with = False
559
-
563
+
560
564
  # Support a special case regex-like notation for "starts with"
561
565
  if query_string is not None and query_string.startswith('^'):
562
566
  query_string = query_string[1:]
563
567
  query_starts_with = True
564
-
568
+
565
569
  # i_image = 0; im = images_in[0]
566
570
  for i_image, im in tqdm(enumerate(images_in), total=len(images_in)):
567
-
571
+
568
572
  fn = im['file']
569
-
573
+
570
574
  # Only take images that match the query
571
575
  if query_string is not None:
572
576
  if query_starts_with:
@@ -575,34 +579,34 @@ def subset_json_detector_output_by_query(data, options):
575
579
  else:
576
580
  if query_string not in fn:
577
581
  continue
578
-
582
+
579
583
  if options.replacement is not None:
580
584
  if query_string is not None:
581
585
  fn = fn.replace(query_string, options.replacement)
582
586
  else:
583
587
  fn = options.replacement + fn
584
-
588
+
585
589
  im['file'] = fn
586
-
590
+
587
591
  images_out.append(im)
588
-
589
- # ...for each image
590
-
591
- data['images'] = images_out
592
+
593
+ # ...for each image
594
+
595
+ data['images'] = images_out
592
596
  print('done, found {} matches (of {})'.format(len(data['images']), len(images_in)))
593
-
597
+
594
598
  return data
595
599
 
596
600
  # ...def subset_json_detector_output_by_query(...)
597
601
 
598
-
602
+
599
603
  def subset_json_detector_output(input_filename, output_filename, options, data=None):
600
604
  """
601
- Main entry point; creates one or more subsets of a detector results file. See the
605
+ Main entry point; creates one or more subsets of a detector results file. See the
602
606
  module header comment for more information about the available subsetting approaches.
603
-
607
+
604
608
  Makes a copy of [data] before modifying if a data dictionary is supplied.
605
-
609
+
606
610
  Args:
607
611
  input_filename (str): filename to load and subset; can be None if [data] is supplied
608
612
  output_filename (str): file or folder name (depending on [options]) to which we should
@@ -611,27 +615,27 @@ def subset_json_detector_output(input_filename, output_filename, options, data=N
611
615
  see SubsetJsonDetectorOutputOptions for details.
612
616
  data (dict, optional): data loaded from a .json file; if this is not None, [input_filename]
613
617
  will be ignored. If supplied, this will be copied before it's modified.
614
-
618
+
615
619
  Returns:
616
620
  dict: Results that are either loaded from [input_filename] and processed, or copied
617
- from [data] and processed.
618
-
621
+ from [data] and processed.
619
622
  """
620
-
621
- if options is None:
623
+
624
+ if options is None:
622
625
  options = SubsetJsonDetectorOutputOptions()
623
626
  else:
624
627
  options = copy.deepcopy(options)
625
-
626
- # Input validation
628
+
629
+ # Input validation
627
630
  if options.copy_jsons_to_folders:
628
631
  assert options.split_folders and options.make_folder_relative, \
629
632
  'copy_jsons_to_folders set without make_folder_relative and split_folders'
630
-
633
+
631
634
  if options.split_folders:
632
635
  if os.path.isfile(output_filename):
633
- raise ValueError('When splitting by folders, output must be a valid directory name, you specified an existing file')
634
-
636
+ raise ValueError('When splitting by folders, output must be a valid directory name, ' + \
637
+ 'you specified an existing file')
638
+
635
639
  if data is None:
636
640
  print('Reading file {}'.format(input_filename))
637
641
  with open(input_filename) as f:
@@ -644,232 +648,231 @@ def subset_json_detector_output(input_filename, output_filename, options, data=N
644
648
  print('Copying data')
645
649
  data = copy.deepcopy(data)
646
650
  print('...done')
647
-
651
+
648
652
  if options.query is not None:
649
-
653
+
650
654
  data = subset_json_detector_output_by_query(data, options)
651
-
655
+
652
656
  if options.remove_failed_images:
653
-
657
+
654
658
  data = remove_failed_images(data, options)
655
-
659
+
656
660
  if options.confidence_threshold is not None:
657
-
661
+
658
662
  data = subset_json_detector_output_by_confidence(data, options)
659
-
663
+
660
664
  if (options.categories_to_keep is not None) or (options.category_names_to_keep is not None):
661
-
665
+
662
666
  data = subset_json_detector_output_by_categories(data, options)
663
-
667
+
664
668
  if options.remove_classification_categories_below_count is not None:
665
-
669
+
666
670
  data = remove_classification_categories_below_count(data, options)
667
-
671
+
668
672
  if options.keep_files_in_list is not None:
669
-
673
+
670
674
  data = subset_json_detector_output_by_list(data, options)
671
-
675
+
672
676
  if not options.split_folders:
673
-
677
+
674
678
  _write_detection_results(data, output_filename, options)
675
679
  return data
676
-
680
+
677
681
  else:
678
-
682
+
679
683
  # Map images to unique folders
680
684
  print('Finding unique folders')
681
-
685
+
682
686
  folders_to_images = {}
683
-
687
+
684
688
  # im = data['images'][0]
685
689
  for im in tqdm(data['images']):
686
-
690
+
687
691
  fn = im['file']
688
-
692
+
689
693
  if options.split_folder_mode == 'bottom':
690
-
694
+
691
695
  dirname = os.path.dirname(fn)
692
-
696
+
693
697
  elif options.split_folder_mode == 'n_from_bottom':
694
-
698
+
695
699
  dirname = os.path.dirname(fn)
696
700
  for n in range(0, options.split_folder_param):
697
701
  dirname = os.path.dirname(dirname)
698
-
702
+
699
703
  elif options.split_folder_mode == 'n_from_top':
700
-
704
+
701
705
  # Split string into folders, keeping delimiters
702
-
706
+
703
707
  # Don't use this, it removes delimiters
704
708
  # tokens = _split_path(fn)
705
709
  tokens = re.split(r'([\\/])',fn)
706
-
707
- n_tokens_to_keep = ((options.split_folder_param + 1) * 2) - 1;
708
-
710
+
711
+ n_tokens_to_keep = ((options.split_folder_param + 1) * 2) - 1
712
+
709
713
  if n_tokens_to_keep > len(tokens):
710
714
  raise ValueError('Cannot walk {} folders from the top in path {}'.format(
711
715
  options.split_folder_param, fn))
712
716
  dirname = ''.join(tokens[0:n_tokens_to_keep])
713
-
714
- elif options.split_folder_mode == 'top':
715
-
716
- dirname = top_level_folder(fn)
717
-
717
+
718
718
  elif options.split_folder_mode == 'dict':
719
-
719
+
720
720
  assert isinstance(options.split_folder_param, dict)
721
721
  dirname = options.split_folder_param[fn]
722
-
722
+
723
723
  else:
724
-
724
+
725
725
  raise ValueError('Unrecognized folder split mode {}'.format(options.split_folder_mode))
726
-
726
+
727
727
  folders_to_images.setdefault(dirname, []).append(im)
728
-
728
+
729
729
  # ...for each image
730
-
730
+
731
731
  print('Found {} unique folders'.format(len(folders_to_images)))
732
-
732
+
733
733
  # Optionally make paths relative
734
734
  # dirname = list(folders_to_images.keys())[0]
735
735
  if options.make_folder_relative:
736
-
736
+
737
737
  print('Converting database-relative paths to individual-json-relative paths...')
738
-
738
+
739
739
  for dirname in tqdm(folders_to_images):
740
740
  # im = folders_to_images[dirname][0]
741
741
  for im in folders_to_images[dirname]:
742
742
  fn = im['file']
743
743
  relfn = os.path.relpath(fn, dirname).replace('\\', '/')
744
744
  im['file'] = relfn
745
-
745
+
746
746
  # ...if we need to convert paths to be folder-relative
747
-
747
+
748
748
  print('Finished converting to json-relative paths, writing output')
749
-
749
+
750
750
  os.makedirs(output_filename, exist_ok=True)
751
751
  all_images = data['images']
752
-
752
+
753
753
  # dirname = list(folders_to_images.keys())[0]
754
754
  for dirname in tqdm(folders_to_images):
755
-
755
+
756
756
  json_fn = dirname.replace('/', '_').replace('\\', '_') + '.json'
757
-
757
+
758
758
  if options.copy_jsons_to_folders:
759
759
  json_fn = os.path.join(output_filename, dirname, json_fn)
760
760
  else:
761
761
  json_fn = os.path.join(output_filename, json_fn)
762
-
763
- # Recycle the 'data' struct, replacing 'images' every time... medium-hacky, but
762
+
763
+ # Recycle the 'data' struct, replacing 'images' every time... medium-hacky, but
764
764
  # forward-compatible in that I don't take dependencies on the other fields
765
765
  dir_data = data
766
766
  dir_data['images'] = folders_to_images[dirname]
767
767
  _write_detection_results(dir_data, json_fn, options)
768
768
  print('Wrote {} images to {}'.format(len(dir_data['images']), json_fn))
769
-
769
+
770
770
  # ...for each directory
771
-
771
+
772
772
  data['images'] = all_images
773
-
773
+
774
774
  return data
775
-
775
+
776
776
  # ...if we're splitting folders
777
777
 
778
778
  # ...def subset_json_detector_output(...)
779
779
 
780
-
780
+
781
781
  #%% Interactive driver
782
-
782
+
783
783
  if False:
784
784
 
785
785
  #%%
786
-
786
+
787
787
  #%% Subset a file without splitting
788
-
788
+
789
789
  input_filename = r"c:\temp\sample.json"
790
790
  output_filename = r"c:\temp\output.json"
791
-
791
+
792
792
  options = SubsetJsonDetectorOutputOptions()
793
793
  options.replacement = None
794
794
  options.query = 'S2'
795
-
795
+
796
796
  data = subset_json_detector_output(input_filename,output_filename,options,None)
797
-
797
+
798
798
 
799
799
  #%% Subset and split, but don't copy to individual folders
800
800
 
801
- input_filename = r"C:\temp\xxx-20201028_detections.filtered_rde_0.60_0.85_10_0.05_r2_export\xxx-20201028_detections.filtered_rde_0.60_0.85_10_0.05_r2_export.json"
801
+ input_filename = r"C:\temp\xxx-export.json"
802
802
  output_filename = r"c:\temp\out"
803
-
803
+
804
804
  options = SubsetJsonDetectorOutputOptions()
805
- options.split_folders = True
805
+ options.split_folders = True
806
806
  options.make_folder_relative = True
807
807
  options.split_folder_mode = 'n_from_top'
808
808
  options.split_folder_param = 1
809
-
809
+
810
810
  data = subset_json_detector_output(input_filename,output_filename,options,None)
811
-
812
-
811
+
812
+
813
813
  #%% Subset and split, copying to individual folders
814
-
814
+
815
815
  input_filename = r"c:\temp\sample.json"
816
816
  output_filename = r"c:\temp\out"
817
-
817
+
818
818
  options = SubsetJsonDetectorOutputOptions()
819
- options.split_folders = True
819
+ options.split_folders = True
820
820
  options.make_folder_relative = True
821
821
  options.copy_jsons_to_folders = True
822
-
822
+
823
823
  data = subset_json_detector_output(input_filename,output_filename,options,data)
824
-
824
+
825
825
 
826
826
  #%% Command-line driver
827
827
 
828
- def main():
829
-
828
+ def main(): # noqa
829
+
830
830
  parser = argparse.ArgumentParser()
831
831
  parser.add_argument('input_file', type=str, help='Input .json filename')
832
832
  parser.add_argument('output_file', type=str, help='Output .json filename')
833
- parser.add_argument('--query', type=str, default=None,
833
+ parser.add_argument('--query', type=str, default=None,
834
834
  help='Query string to search for (omitting this matches all)')
835
- parser.add_argument('--replacement', type=str, default=None,
835
+ parser.add_argument('--replacement', type=str, default=None,
836
836
  help='Replace [query] with this')
837
- parser.add_argument('--confidence_threshold', type=float, default=None,
837
+ parser.add_argument('--confidence_threshold', type=float, default=None,
838
838
  help='Remove detections below this confidence level')
839
- parser.add_argument('--keep_files_in_list', type=str, default=None,
839
+ parser.add_argument('--keep_files_in_list', type=str, default=None,
840
840
  help='Keep only files in this list, which can be a .json results file or a folder.' + \
841
841
  ' Assumes that the input .json file contains relative paths when comparing to a folder.')
842
- parser.add_argument('--split_folders', action='store_true',
842
+ parser.add_argument('--split_folders', action='store_true',
843
843
  help='Split .json files by leaf-node folder')
844
844
  parser.add_argument('--split_folder_param', type=int,
845
845
  help='Directory level count for n_from_bottom and n_from_top splitting')
846
846
  parser.add_argument('--split_folder_mode', type=str,
847
- help='Folder level to use for splitting ("top" or "bottom")')
848
- parser.add_argument('--make_folder_relative', action='store_true',
849
- help='Make image paths relative to their containing folder (only meaningful with split_folders)')
850
- parser.add_argument('--overwrite_json_files', action='store_true',
847
+ help='Folder level to use for splitting ("bottom", "n_from_bottom", or "n_from_top")')
848
+ parser.add_argument('--make_folder_relative', action='store_true',
849
+ help='Make image paths relative to their containing folder ' + \
850
+ '(only meaningful with split_folders)')
851
+ parser.add_argument('--overwrite_json_files', action='store_true',
851
852
  help='Overwrite output files')
852
- parser.add_argument('--copy_jsons_to_folders', action='store_true',
853
- help='When using split_folders and make_folder_relative, copy jsons to their corresponding folders (relative to output_file)')
853
+ parser.add_argument('--copy_jsons_to_folders', action='store_true',
854
+ help='When using split_folders and make_folder_relative, copy jsons to their ' + \
855
+ 'corresponding folders (relative to output_file)')
854
856
  parser.add_argument('--create_folders', action='store_true',
855
- help='When using copy_jsons_to_folders, create folders that don''t exist')
857
+ help='When using copy_jsons_to_folders, create folders that don''t exist')
856
858
  parser.add_argument('--remove_classification_categories_below_count', type=int, default=None,
857
- help='Remove classification categories with less than this many instances (no removal by default)')
858
-
859
+ help='Remove classification categories with less than this many instances ' + \
860
+ '(no removal by default)')
861
+
859
862
  if len(sys.argv[1:]) == 0:
860
863
  parser.print_help()
861
864
  parser.exit()
862
-
863
- args = parser.parse_args()
864
-
865
+
866
+ args = parser.parse_args()
867
+
865
868
  # Convert to an options object
866
869
  options = SubsetJsonDetectorOutputOptions()
867
870
  if args.create_folders:
868
871
  options.copy_jsons_to_folders_directories_must_exist = False
869
-
872
+
870
873
  args_to_object(args, options)
871
-
874
+
872
875
  subset_json_detector_output(args.input_file, args.output_file, options)
873
-
874
- if __name__ == '__main__':
876
+
877
+ if __name__ == '__main__':
875
878
  main()