megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +93 -79
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
  20. api/batch_processing/postprocessing/compare_batch_results.py +114 -44
  21. api/batch_processing/postprocessing/convert_output_format.py +62 -19
  22. api/batch_processing/postprocessing/load_api_results.py +17 -20
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +165 -68
  25. api/batch_processing/postprocessing/merge_detections.py +40 -15
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
  27. api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +107 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -0
  71. data_management/coco_to_yolo.py +86 -62
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +130 -83
  76. data_management/databases/subset_json_db.py +25 -16
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -144
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -160
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +8 -8
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +309 -159
  120. data_management/labelme_to_yolo.py +103 -60
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +114 -31
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +92 -90
  128. data_management/lila/generate_lila_per_image_labels.py +56 -43
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +103 -70
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +161 -99
  135. data_management/remap_coco_categories.py +84 -0
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +32 -44
  138. data_management/wi_download_csv_to_coco.py +246 -0
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +535 -95
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +189 -114
  147. detection/run_inference_with_yolov5_val.py +118 -51
  148. detection/run_tiled_inference.py +113 -42
  149. detection/tf_detector.py +51 -28
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +249 -70
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -862
  157. md_utils/path_utils.py +655 -155
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +208 -27
  163. md_utils/write_html_image_list.py +51 -35
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +908 -311
  168. md_visualization/visualize_db.py +109 -58
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
  171. megadetector-5.0.9.dist-info/RECORD +224 -0
  172. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
  173. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
  174. taxonomy_mapping/__init__.py +0 -0
  175. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  176. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  177. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  178. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  179. taxonomy_mapping/retrieve_sample_image.py +12 -12
  180. taxonomy_mapping/simple_image_download.py +11 -11
  181. taxonomy_mapping/species_lookup.py +10 -10
  182. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  183. taxonomy_mapping/taxonomy_graph.py +47 -47
  184. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  185. data_management/cct_json_to_filename_json.py +0 -89
  186. data_management/cct_to_csv.py +0 -140
  187. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  188. detection/detector_training/copy_checkpoints.py +0 -43
  189. md_visualization/visualize_megadb.py +0 -183
  190. megadetector-5.0.7.dist-info/RECORD +0 -202
  191. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
@@ -1,61 +1,55 @@
1
- ########
2
- #
3
- # subset_json_detector_output.py
4
- #
5
- # Creates one or more subsets of a detector API output file (.json), doing either
6
- # or both of the following (if both are requested, they happen in this order):
7
- #
8
- # 1) Retrieve all elements where filenames contain a specified query string,
9
- # optionally replacing that query with a replacement token. If the query is blank,
10
- # can also be used to prepend content to all filenames.
11
- #
12
- # Does not support regex's, but supports a special case of ^string to indicate "must start with
13
- # to match".
14
- #
15
- # 2) Create separate .jsons for each unique path, optionally making the filenames
16
- # in those .json's relative paths. In this case, you specify an output directory,
17
- # rather than an output path. All images in the folder blah\foo\bar will end up
18
- # in a .json file called blah_foo_bar.json.
19
- #
20
- # Can also apply a confidence threshold.
21
- #
22
- # Can also subset by categories above a threshold (programmatic invocation only, this is
23
- # not supported at the command line yet).
24
- #
25
- ###
26
- #
27
- # Sample invocations (splitting into multiple json's):
28
- #
29
- # Read from "1800_idfg_statewide_wolf_detections_w_classifications.json", split up into
30
- # individual .jsons in 'd:\temp\idfg\output', making filenames relative to their individual
31
- # folders:
32
- #
33
- # python subset_json_detector_output.py "d:\temp\idfg\1800_idfg_statewide_wolf_detections_w_classifications.json" "d:\temp\idfg\output" --split_folders --make_folder_relative
34
- #
35
- # Now do the same thing, but instead of writing .json's to d:\temp\idfg\output, write them to *subfolders*
36
- # corresponding to the subfolders for each .json file.
37
- #
38
- # python subset_json_detector_output.py "d:\temp\idfg\1800_detections_S2.json" "d:\temp\idfg\output_to_folders" --split_folders --make_folder_relative --copy_jsons_to_folders
39
- #
40
- ###
41
- #
42
- # Sample invocations (creating a single subset matching a query):
43
- #
44
- # Read from "1800_detections.json", write to "1800_detections_2017.json"
45
- #
46
- # Include only images matching "2017", and change "2017" to "blah"
47
- #
48
- # python subset_json_detector_output.py "d:\temp\1800_detections.json" "d:\temp\1800_detections_2017_blah.json" --query 2017 --replacement blah
49
- #
50
- # Include all images, prepend with "prefix/"
51
- #
52
- # python subset_json_detector_output.py "d:\temp\1800_detections.json" "d:\temp\1800_detections_prefix.json" --replacement "prefix/"
53
- #
54
- ###
55
- #
56
- # To subset a COCO Camera Traps .json database, see subset_json_db.py
57
- #
58
- ########
1
+ r"""
2
+
3
+ subset_json_detector_output.py
4
+
5
+ Creates one or more subsets of a detector results file (.json), doing either
6
+ or both of the following (if both are requested, they happen in this order):
7
+
8
+ 1) Retrieve all elements where filenames contain a specified query string,
9
+ optionally replacing that query with a replacement token. If the query is blank,
10
+ can also be used to prepend content to all filenames.
11
+
12
+ Does not support regex's, but supports a special case of ^string to indicate "must start with
13
+ to match".
14
+
15
+ 2) Create separate .jsons for each unique path, optionally making the filenames
16
+ in those .json's relative paths. In this case, you specify an output directory,
17
+ rather than an output path. All images in the folder blah/foo/bar will end up
18
+ in a .json file called blah_foo_bar.json.
19
+
20
+ Can also apply a confidence threshold.
21
+
22
+ Can also subset by categories above a threshold (programmatic invocation only, this is
23
+ not supported at the command line yet).
24
+
25
+ To subset a COCO Camera Traps .json database, see subset_json_db.py
26
+
27
+ **Sample invocation (splitting into multiple json's)**
28
+
29
+ Read from "1800_idfg_statewide_wolf_detections_w_classifications.json", split up into
30
+ individual .jsons in 'd:/temp/idfg/output', making filenames relative to their individual
31
+ folders:
32
+
33
+ python subset_json_detector_output.py "d:/temp/idfg/1800_idfg_statewide_wolf_detections_w_classifications.json" "d:/temp/idfg/output" --split_folders --make_folder_relative
34
+
35
+ Now do the same thing, but instead of writing .json's to d:/temp/idfg/output, write them to *subfolders*
36
+ corresponding to the subfolders for each .json file.
37
+
38
+ python subset_json_detector_output.py "d:/temp/idfg/1800_detections_S2.json" "d:/temp/idfg/output_to_folders" --split_folders --make_folder_relative --copy_jsons_to_folders
39
+
40
+ **Sample invocation (creating a single subset matching a query)**
41
+
42
+ Read from "1800_detections.json", write to "1800_detections_2017.json"
43
+
44
+ Include only images matching "2017", and change "2017" to "blah"
45
+
46
+ python subset_json_detector_output.py "d:/temp/1800_detections.json" "d:/temp/1800_detections_2017_blah.json" --query 2017 --replacement blah
47
+
48
+ Include all images, prepend with "prefix/"
49
+
50
+ python subset_json_detector_output.py "d:/temp/1800_detections.json" "d:/temp/1800_detections_prefix.json" --replacement "prefix/"
51
+
52
+ """
59
53
 
60
54
  #%% Constants and imports
61
55
 
@@ -68,81 +62,85 @@ import re
68
62
 
69
63
  from tqdm import tqdm
70
64
 
71
- from md_utils.ct_utils import args_to_object
72
- from md_utils.ct_utils import get_max_conf
73
- from md_utils.ct_utils import invert_dictionary
65
+ from md_utils.ct_utils import args_to_object, get_max_conf, invert_dictionary
66
+ from md_utils.path_utils import top_level_folder
74
67
 
75
68
 
76
69
  #%% Helper classes
77
70
 
78
71
  class SubsetJsonDetectorOutputOptions:
72
+ """
73
+ Options used to parameterize subset_json_detector_output()
74
+ """
79
75
 
80
- # Only process files containing the token 'query'
76
+ #: Only process files containing the token 'query'
81
77
  query = None
82
78
 
83
- # Replace 'query' with 'replacement' if 'replacement' is not None. If 'query' is None,
84
- # prepend 'replacement'
79
+ #: Replace 'query' with 'replacement' if 'replacement' is not None. If 'query' is None,
80
+ #: prepend 'replacement'
85
81
  replacement = None
86
82
 
87
- # Should we split output into individual .json files for each folder?
83
+ #: Should we split output into individual .json files for each folder?
88
84
  split_folders = False
89
85
 
90
- # Folder level to use for splitting ['bottom','top','n_from_bottom','n_from_top','dict']
91
- #
92
- # 'dict' requires 'split_folder_param' to be a dictionary mapping each filename
93
- # to a token.
86
+ #: Folder level to use for splitting ['bottom','top','n_from_bottom','n_from_top','dict']
87
+ #:
88
+ #: 'dict' requires 'split_folder_param' to be a dictionary mapping each filename
89
+ #: to a token.
94
90
  split_folder_mode = 'bottom' # 'top'
95
91
 
96
- # When using the 'n_from_bottom' parameter to define folder splitting, this
97
- # defines the number of directories from the bottom. 'n_from_bottom' with
98
- # a parameter of zero is the same as 'bottom'.
99
- #
100
- # Same story with 'n_from_top'.
101
- #
102
- # When 'split_folder_mode' is 'dict', this should be a dictionary mapping each filename
103
- # to a token.
92
+ #: When using the 'n_from_bottom' parameter to define folder splitting, this
93
+ #: defines the number of directories from the bottom. 'n_from_bottom' with
94
+ #: a parameter of zero is the same as 'bottom'.
95
+ #:
96
+ #: Same story with 'n_from_top'.
97
+ #:
98
+ #: When 'split_folder_mode' is 'dict', this should be a dictionary mapping each filename
99
+ #: to a token.
104
100
  split_folder_param = 0
105
101
 
106
- # Only meaningful if split_folders is True: should we convert pathnames to be relative
107
- # the folder for each .json file?
102
+ #: Only meaningful if split_folders is True: should we convert pathnames to be relative
103
+ #: the folder for each .json file?
108
104
  make_folder_relative = False
109
105
 
110
- # Only meaningful if split_folders and make_folder_relative are True: if not None,
111
- # will copy .json files to their corresponding output directories, relative to
112
- # output_filename
106
+ #: Only meaningful if split_folders and make_folder_relative are True: if not None,
107
+ #: will copy .json files to their corresponding output directories, relative to
108
+ #: output_filename
113
109
  copy_jsons_to_folders = False
114
110
 
115
- # Should we over-write .json files?
111
+ #: Should we over-write .json files?
116
112
  overwrite_json_files = False
117
113
 
118
- # If copy_jsons_to_folders is true, do we require that directories already exist?
114
+ #: If copy_jsons_to_folders is true, do we require that directories already exist?
119
115
  copy_jsons_to_folders_directories_must_exist = True
120
116
 
121
- # Threshold on confidence
117
+ #: Optional confidence threshold; if not None, detections below this confidence won't be
118
+ #: included in the output.
122
119
  confidence_threshold = None
123
120
 
124
- # Should we remove failed images?
121
+ #: Should we remove failed images?
125
122
  remove_failed_images = False
126
123
 
127
- # Either a list of category IDs (as string-ints) (not names), or a dictionary mapping category *IDs*
128
- # (as string-ints) (not names) to thresholds. Removes non-matching detections, does not
129
- # remove images. Not technically mutually exclusize with category_names_to_keep, but it's an esoteric
130
- # scenario indeed where you would want to specify both.
124
+ #: Either a list of category IDs (as string-ints) (not names), or a dictionary mapping category *IDs*
125
+ #: (as string-ints) (not names) to thresholds. Removes non-matching detections, does not
126
+ #: remove images. Not technically mutually exclusize with category_names_to_keep, but it's an esoteric
127
+ #: scenario indeed where you would want to specify both.
131
128
  categories_to_keep = None
132
129
 
133
- # Either a list of category names (not IDs), or a dictionary mapping category *names* (not IDs) to thresholds.
134
- # Removes non-matching detections, does not remove images. Not technically mutually exclusize with
135
- # category_ids_to_keep, but it's an esoteric scenario indeed where you would want to specify both.
130
+ #: Either a list of category names (not IDs), or a dictionary mapping category *names* (not IDs) to thresholds.
131
+ #: Removes non-matching detections, does not remove images. Not technically mutually exclusize with
132
+ #: category_ids_to_keep, but it's an esoteric scenario indeed where you would want to specify both.
136
133
  category_names_to_keep = None
137
134
 
135
+ #: Set to >0 during testing to limit the number of images that get processed.
138
136
  debug_max_images = -1
139
137
 
140
138
 
141
139
  #%% Main function
142
140
 
143
- def write_detection_results(data, output_filename, options):
141
+ def _write_detection_results(data, output_filename, options):
144
142
  """
145
- Write the detector-output-formatted dict *data* to *output_filename*.
143
+ Writes the detector-output-formatted dict *data* to *output_filename*.
146
144
  """
147
145
 
148
146
  if (not options.overwrite_json_files) and os.path.isfile(output_filename):
@@ -160,12 +158,19 @@ def write_detection_results(data, output_filename, options):
160
158
  with open(output_filename, 'w') as f:
161
159
  json.dump(data,f,indent=1)
162
160
 
163
- # ...write_detection_results()
161
+ # ..._write_detection_results()
164
162
 
165
163
 
166
164
  def subset_json_detector_output_by_confidence(data, options):
167
165
  """
168
- Remove all detections below options.confidence_threshold, update max confidences accordingly.
166
+ Removes all detections below options.confidence_threshold.
167
+
168
+ Args:
169
+ data (dict): data loaded from a MD results file
170
+ options (SubsetJsonDetectorOutputOptions): parameters for subsetting
171
+
172
+ Returns:
173
+ dict: Possibly-modified version of data (also modifies in place)
169
174
  """
170
175
 
171
176
  if options.confidence_threshold is None:
@@ -232,7 +237,14 @@ def subset_json_detector_output_by_confidence(data, options):
232
237
 
233
238
  def subset_json_detector_output_by_categories(data, options):
234
239
  """
235
- Remove all detections without detections above a threshold for specific categories.
240
+ Removes all detections without detections above a threshold for specific categories.
241
+
242
+ Args:
243
+ data (dict): data loaded from a MD results file
244
+ options (SubsetJsonDetectorOutputOptions): parameters for subsetting
245
+
246
+ Returns:
247
+ dict: Possibly-modified version of data (also modifies in place)
236
248
  """
237
249
 
238
250
  # If categories_to_keep is supplied as a list, convert to a dict
@@ -334,6 +346,13 @@ def subset_json_detector_output_by_categories(data, options):
334
346
  def remove_failed_images(data,options):
335
347
  """
336
348
  Removed failed images from [data]
349
+
350
+ Args:
351
+ data (dict): data loaded from a MD results file
352
+ options (SubsetJsonDetectorOutputOptions): parameters for subsetting
353
+
354
+ Returns:
355
+ dict: Possibly-modified version of data (also modifies in place)
337
356
  """
338
357
 
339
358
  images_in = data['images']
@@ -365,8 +384,15 @@ def remove_failed_images(data,options):
365
384
 
366
385
  def subset_json_detector_output_by_query(data, options):
367
386
  """
368
- Subset to images whose filename matches options.query; replace all instances of
369
- options.query with options.replacement.
387
+ Subsets to images whose filename matches options.query; replace all instances of
388
+ options.query with options.replacement. No-op if options.query_string is None or ''.
389
+
390
+ Args:
391
+ data (dict): data loaded from a MD results file
392
+ options (SubsetJsonDetectorOutputOptions): parameters for subsetting
393
+
394
+ Returns:
395
+ dict: Possibly-modified version of data (also modifies in place)
370
396
  """
371
397
 
372
398
  images_in = data['images']
@@ -415,74 +441,27 @@ def subset_json_detector_output_by_query(data, options):
415
441
 
416
442
  # ...subset_json_detector_output_by_query()
417
443
 
418
-
419
- def split_path(path, maxdepth=100):
420
- """
421
- Splits [path] into all its constituent tokens, e.g.:
422
-
423
- c:\blah\boo\goo.txt
424
-
425
- ...becomes:
426
-
427
- ['c:\\', 'blah', 'boo', 'goo.txt']
428
-
429
- http://nicks-liquid-soapbox.blogspot.com/2011/03/splitting-path-to-list-in-python.html
430
- """
431
-
432
- (head, tail) = os.path.split(path)
433
- return split_path(head, maxdepth - 1) + [tail] \
434
- if maxdepth and head and head != path \
435
- else [head or tail]
436
-
437
- # ...split_path()
438
-
439
-
440
- def top_level_folder(p):
441
- """
442
- Gets the top-level folder from the path *p*; on Windows, will use the top-level folder
443
- that isn't the drive. E.g., top_level_folder(r"c:\blah\foo") returns "c:\blah". Does not
444
- include the leaf node, i.e. top_level_folder('/blah/foo') returns '/blah'.
445
- """
446
-
447
- if p == '':
448
- return ''
449
-
450
- # Path('/blah').parts is ('/','blah')
451
- parts = split_path(p)
452
-
453
- if len(parts) == 1:
454
- return parts[0]
455
-
456
- # Handle paths like:
457
- #
458
- # /, \, /stuff, c:, c:\stuff
459
- drive = os.path.splitdrive(p)[0]
460
- if parts[0] == drive or parts[0] == drive + '/' or parts[0] == drive + '\\' or parts[0] in ['\\', '/']:
461
- return os.path.join(parts[0], parts[1])
462
- else:
463
- return parts[0]
464
-
465
- # ...top_level_folder()
466
-
467
-
468
- if False:
469
-
470
- p = 'blah/foo/bar'; s = top_level_folder(p); print(s); assert s == 'blah'
471
- p = '/blah/foo/bar'; s = top_level_folder(p); print(s); assert s == '/blah'
472
- p = 'bar'; s = top_level_folder(p); print(s); assert s == 'bar'
473
- p = ''; s = top_level_folder(p); print(s); assert s == ''
474
- p = 'c:\\'; s = top_level_folder(p); print(s); assert s == 'c:\\'
475
- p = r'c:\blah'; s = top_level_folder(p); print(s); assert s == 'c:\\blah'
476
- p = r'c:\foo'; s = top_level_folder(p); print(s); assert s == 'c:\\foo'
477
- p = r'c:/foo'; s = top_level_folder(p); print(s); assert s == 'c:/foo'
478
- p = r'c:\foo/bar'; s = top_level_folder(p); print(s); assert s == 'c:\\foo'
479
-
480
444
 
481
445
  def subset_json_detector_output(input_filename, output_filename, options, data=None):
482
446
  """
483
- Main internal entry point
447
+ Main entry point; creates one or more subsets of a detector results file. See the
448
+ module header comment for more information about the available subsetting approaches.
484
449
 
485
450
  Makes a copy of [data] before modifying if a data dictionary is supplied.
451
+
452
+ Args:
453
+ input_filename (str): filename to load and subset; can be None if [data] is supplied
454
+ output_filename (str): file or folder name (depending on [options]) to which we should
455
+ write subset results.
456
+ options (SubsetJsonDetectorOutputOptions): parameters for .json splitting/subsetting;
457
+ see SubsetJsonDetectorOutputOptions for details.
458
+ data (dict, optional): data loaded from a .json file; if this is not None, [input_filename]
459
+ will be ignored. If supplied, this will be copied before it's modified.
460
+
461
+ Returns:
462
+ dict: Results that are either loaded from [input_filename] and processed, or copied
463
+ from [data] and processed.
464
+
486
465
  """
487
466
 
488
467
  if options is None:
@@ -528,7 +507,7 @@ def subset_json_detector_output(input_filename, output_filename, options, data=N
528
507
 
529
508
  if not options.split_folders:
530
509
 
531
- write_detection_results(data, output_filename, options)
510
+ _write_detection_results(data, output_filename, options)
532
511
  return data
533
512
 
534
513
  else:
@@ -558,7 +537,7 @@ def subset_json_detector_output(input_filename, output_filename, options, data=N
558
537
  # Split string into folders, keeping delimiters
559
538
 
560
539
  # Don't use this, it removes delimiters
561
- # tokens = split_path(fn)
540
+ # tokens = _split_path(fn)
562
541
  tokens = re.split(r'([\\/])',fn)
563
542
 
564
543
  n_tokens_to_keep = ((options.split_folder_param + 1) * 2) - 1;
@@ -621,7 +600,7 @@ def subset_json_detector_output(input_filename, output_filename, options, data=N
621
600
  # forward-compatible in that I don't take dependencies on the other fields
622
601
  dir_data = data
623
602
  dir_data['images'] = folders_to_images[dirname]
624
- write_detection_results(dir_data, json_fn, options)
603
+ _write_detection_results(dir_data, json_fn, options)
625
604
  print('Wrote {} images to {}'.format(len(dir_data['images']), json_fn))
626
605
 
627
606
  # ...for each directory
@@ -713,7 +692,5 @@ def main():
713
692
 
714
693
  subset_json_detector_output(args.input_file, args.output_file, options)
715
694
 
716
-
717
- if __name__ == '__main__':
718
-
695
+ if __name__ == '__main__':
719
696
  main()
@@ -1,26 +1,26 @@
1
- ########
2
- #
3
- # top_folders_to_bottom.py
4
- #
5
- # Given a base folder with files like:
6
- #
7
- # A/1/2/a.jpg
8
- # B/3/4/b.jpg
9
- #
10
- # ...moves the top-level folders to the bottom in a new output folder, i.e., creates:
11
- #
12
- # 1/2/A/a.jpg
13
- # 3/4/B/b.jpg
14
- #
15
- # In practice, this is used to make this:
16
- #
17
- # animal/camera01/image01.jpg
18
- #
19
- # ...look like:
20
- #
21
- # camera01/animal/image01.jpg
22
- #
23
- ########
1
+ """
2
+
3
+ top_folders_to_bottom.py
4
+
5
+ Given a base folder with files like:
6
+
7
+ * A/1/2/a.jpg
8
+ * B/3/4/b.jpg
9
+
10
+ ...moves the top-level folders to the bottom in a new output folder, i.e., creates:
11
+
12
+ * 1/2/A/a.jpg
13
+ * 3/4/B/b.jpg
14
+
15
+ In practice, this is used to make this:
16
+
17
+ animal/camera01/image01.jpg
18
+
19
+ ...look like:
20
+
21
+ camera01/animal/image01.jpg
22
+
23
+ """
24
24
 
25
25
  #%% Constants and imports
26
26
 
@@ -35,31 +35,46 @@ from tqdm import tqdm
35
35
  from functools import partial
36
36
  from multiprocessing.pool import ThreadPool
37
37
 
38
+ from md_utils.path_utils import path_is_abs
39
+
40
+
41
+ #%% Classes
42
+
38
43
  class TopFoldersToBottomOptions:
44
+ """
45
+ Options used to parameterize top_folders_to_bottom()
46
+ """
39
47
 
40
48
  def __init__(self,input_folder,output_folder,copy=True,n_threads=1):
49
+
50
+ #: Whether to copy (True) vs. move (False) false when re-organizing
41
51
  self.copy = copy
52
+
53
+ #: Number of worker threads to use, or <1 to disable parallelization
42
54
  self.n_threads = n_threads
55
+
56
+ #: Input folder
43
57
  self.input_folder = input_folder
58
+
59
+ #: Output folder
44
60
  self.output_folder = output_folder
45
- self.overwrite = False
46
61
 
62
+ #: If this is False and an output file exists, throw an error
63
+ self.overwrite = False
47
64
 
48
- #%% Support functions
49
-
50
- def path_is_abs(p): return (len(p) > 1) and (p[0] == '/' or p[1] == ':')
51
-
52
65
 
53
66
  #%% Main functions
54
67
 
55
- def process_file(relative_filename,options,execute=True):
68
+ def _process_file(relative_filename,options,execute=True):
56
69
 
57
- assert ('/' in relative_filename) and ('\\' not in relative_filename) and (not path_is_abs(relative_filename))
70
+ assert ('/' in relative_filename) and \
71
+ ('\\' not in relative_filename) and \
72
+ (not path_is_abs(relative_filename))
58
73
 
59
74
  # Find top-level folder
60
75
  tokens = relative_filename.split('/')
61
- top_level_folder = tokens.pop(0)
62
- tokens.insert(len(tokens)-1,top_level_folder)
76
+ topmost_folder = tokens.pop(0)
77
+ tokens.insert(len(tokens)-1,topmost_folder)
63
78
 
64
79
  # Find file/folder names
65
80
  output_relative_path = '/'.join(tokens)
@@ -86,11 +101,35 @@ def process_file(relative_filename,options,execute=True):
86
101
 
87
102
  return output_absolute_path
88
103
 
89
- # ...def process_file()
104
+ # ...def _process_file()
90
105
 
91
106
 
92
107
  def top_folders_to_bottom(options):
93
-
108
+ """
109
+ top_folders_to_bottom.py
110
+
111
+ Given a base folder with files like:
112
+
113
+ * A/1/2/a.jpg
114
+ * B/3/4/b.jpg
115
+
116
+ ...moves the top-level folders to the bottom in a new output folder, i.e., creates:
117
+
118
+ * 1/2/A/a.jpg
119
+ * 3/4/B/b.jpg
120
+
121
+ In practice, this is used to make this:
122
+
123
+ animal/camera01/image01.jpg
124
+
125
+ ...look like:
126
+
127
+ camera01/animal/image01.jpg
128
+
129
+ Args:
130
+ options (TopFoldersToBottomOptions): See TopFoldersToBottomOptions for parameter details.
131
+
132
+ """
94
133
  os.makedirs(options.output_folder,exist_ok=True)
95
134
 
96
135
  # Enumerate input folder
@@ -112,7 +151,7 @@ def top_folders_to_bottom(options):
112
151
  relative_files = [s for s in relative_files if '/' in s]
113
152
 
114
153
  # Make sure each input file maps to a unique output file
115
- absolute_output_files = [process_file(s, options, execute=False) for s in relative_files]
154
+ absolute_output_files = [_process_file(s, options, execute=False) for s in relative_files]
116
155
  assert len(absolute_output_files) == len(set(absolute_output_files)),\
117
156
  "Error: input filenames don't map to unique output filenames"
118
157
 
@@ -122,13 +161,13 @@ def top_folders_to_bottom(options):
122
161
  if options.n_threads <= 1:
123
162
 
124
163
  for relative_filename in tqdm(relative_files):
125
- process_file(relative_filename,options)
164
+ _process_file(relative_filename,options)
126
165
 
127
166
  else:
128
167
 
129
168
  print('Starting a pool with {} threads'.format(options.n_threads))
130
169
  pool = ThreadPool(options.n_threads)
131
- process_file_with_options = partial(process_file, options=options)
170
+ process_file_with_options = partial(_process_file, options=options)
132
171
  _ = list(tqdm(pool.imap(process_file_with_options, relative_files), total=len(relative_files)))
133
172
 
134
173
  # ...def top_folders_to_bottom()
@@ -180,10 +219,5 @@ def main():
180
219
 
181
220
  top_folders_to_bottom(options)
182
221
 
183
-
184
- if __name__ == '__main__':
185
-
222
+ if __name__ == '__main__':
186
223
  main()
187
-
188
-
189
-
File without changes
@@ -150,5 +150,3 @@ if __name__ == '__main__':
150
150
  print('\n')
151
151
 
152
152
  detect_process()
153
-
154
-