megadetector 5.0.27__py3-none-any.whl → 5.0.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (176) hide show
  1. megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
  2. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
  3. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
  4. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  5. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  6. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  7. megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
  8. megadetector/classification/aggregate_classifier_probs.py +3 -3
  9. megadetector/classification/analyze_failed_images.py +5 -5
  10. megadetector/classification/cache_batchapi_outputs.py +5 -5
  11. megadetector/classification/create_classification_dataset.py +11 -12
  12. megadetector/classification/crop_detections.py +10 -10
  13. megadetector/classification/csv_to_json.py +8 -8
  14. megadetector/classification/detect_and_crop.py +13 -15
  15. megadetector/classification/evaluate_model.py +7 -7
  16. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  17. megadetector/classification/json_to_azcopy_list.py +1 -1
  18. megadetector/classification/json_validator.py +29 -32
  19. megadetector/classification/map_classification_categories.py +9 -9
  20. megadetector/classification/merge_classification_detection_output.py +12 -9
  21. megadetector/classification/prepare_classification_script.py +19 -19
  22. megadetector/classification/prepare_classification_script_mc.py +23 -23
  23. megadetector/classification/run_classifier.py +4 -4
  24. megadetector/classification/save_mislabeled.py +6 -6
  25. megadetector/classification/train_classifier.py +1 -1
  26. megadetector/classification/train_classifier_tf.py +9 -9
  27. megadetector/classification/train_utils.py +10 -10
  28. megadetector/data_management/annotations/annotation_constants.py +1 -1
  29. megadetector/data_management/camtrap_dp_to_coco.py +45 -45
  30. megadetector/data_management/cct_json_utils.py +101 -101
  31. megadetector/data_management/cct_to_md.py +49 -49
  32. megadetector/data_management/cct_to_wi.py +33 -33
  33. megadetector/data_management/coco_to_labelme.py +75 -75
  34. megadetector/data_management/coco_to_yolo.py +189 -189
  35. megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
  36. megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
  37. megadetector/data_management/databases/integrity_check_json_db.py +202 -188
  38. megadetector/data_management/databases/subset_json_db.py +33 -33
  39. megadetector/data_management/generate_crops_from_cct.py +38 -38
  40. megadetector/data_management/get_image_sizes.py +54 -49
  41. megadetector/data_management/labelme_to_coco.py +130 -124
  42. megadetector/data_management/labelme_to_yolo.py +78 -72
  43. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  44. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  45. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  46. megadetector/data_management/lila/download_lila_subset.py +21 -24
  47. megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
  48. megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
  49. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  50. megadetector/data_management/lila/lila_common.py +70 -70
  51. megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
  52. megadetector/data_management/mewc_to_md.py +339 -340
  53. megadetector/data_management/ocr_tools.py +258 -252
  54. megadetector/data_management/read_exif.py +232 -223
  55. megadetector/data_management/remap_coco_categories.py +26 -26
  56. megadetector/data_management/remove_exif.py +31 -20
  57. megadetector/data_management/rename_images.py +187 -187
  58. megadetector/data_management/resize_coco_dataset.py +41 -41
  59. megadetector/data_management/speciesnet_to_md.py +41 -41
  60. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  61. megadetector/data_management/yolo_output_to_md_output.py +117 -120
  62. megadetector/data_management/yolo_to_coco.py +195 -188
  63. megadetector/detection/change_detection.py +831 -0
  64. megadetector/detection/process_video.py +341 -338
  65. megadetector/detection/pytorch_detector.py +308 -266
  66. megadetector/detection/run_detector.py +186 -166
  67. megadetector/detection/run_detector_batch.py +366 -364
  68. megadetector/detection/run_inference_with_yolov5_val.py +328 -325
  69. megadetector/detection/run_tiled_inference.py +312 -253
  70. megadetector/detection/tf_detector.py +24 -24
  71. megadetector/detection/video_utils.py +291 -283
  72. megadetector/postprocessing/add_max_conf.py +15 -11
  73. megadetector/postprocessing/categorize_detections_by_size.py +44 -44
  74. megadetector/postprocessing/classification_postprocessing.py +808 -311
  75. megadetector/postprocessing/combine_batch_outputs.py +20 -21
  76. megadetector/postprocessing/compare_batch_results.py +528 -517
  77. megadetector/postprocessing/convert_output_format.py +97 -97
  78. megadetector/postprocessing/create_crop_folder.py +220 -147
  79. megadetector/postprocessing/detector_calibration.py +173 -168
  80. megadetector/postprocessing/generate_csv_report.py +508 -0
  81. megadetector/postprocessing/load_api_results.py +25 -22
  82. megadetector/postprocessing/md_to_coco.py +129 -98
  83. megadetector/postprocessing/md_to_labelme.py +89 -83
  84. megadetector/postprocessing/md_to_wi.py +40 -40
  85. megadetector/postprocessing/merge_detections.py +87 -114
  86. megadetector/postprocessing/postprocess_batch_results.py +319 -302
  87. megadetector/postprocessing/remap_detection_categories.py +36 -36
  88. megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
  89. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  90. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  91. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
  92. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  93. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  94. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  95. megadetector/postprocessing/validate_batch_results.py +70 -70
  96. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  97. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
  98. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
  99. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -69
  100. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  101. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  102. megadetector/taxonomy_mapping/species_lookup.py +33 -33
  103. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  104. megadetector/taxonomy_mapping/taxonomy_graph.py +11 -11
  105. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  106. megadetector/utils/azure_utils.py +22 -22
  107. megadetector/utils/ct_utils.py +1019 -200
  108. megadetector/utils/directory_listing.py +21 -77
  109. megadetector/utils/gpu_test.py +22 -22
  110. megadetector/utils/md_tests.py +541 -518
  111. megadetector/utils/path_utils.py +1511 -406
  112. megadetector/utils/process_utils.py +41 -41
  113. megadetector/utils/sas_blob_utils.py +53 -49
  114. megadetector/utils/split_locations_into_train_val.py +73 -60
  115. megadetector/utils/string_utils.py +147 -26
  116. megadetector/utils/url_utils.py +463 -173
  117. megadetector/utils/wi_utils.py +2629 -2868
  118. megadetector/utils/write_html_image_list.py +137 -137
  119. megadetector/visualization/plot_utils.py +21 -21
  120. megadetector/visualization/render_images_with_thumbnails.py +37 -73
  121. megadetector/visualization/visualization_utils.py +424 -404
  122. megadetector/visualization/visualize_db.py +197 -190
  123. megadetector/visualization/visualize_detector_output.py +126 -98
  124. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/METADATA +6 -3
  125. megadetector-5.0.29.dist-info/RECORD +163 -0
  126. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
  127. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  128. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  129. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  130. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  131. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  132. megadetector/data_management/importers/awc_to_json.py +0 -191
  133. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  134. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  135. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  136. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  137. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  138. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  139. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  140. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  141. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  142. megadetector/data_management/importers/ena24_to_json.py +0 -276
  143. megadetector/data_management/importers/filenames_to_json.py +0 -386
  144. megadetector/data_management/importers/helena_to_cct.py +0 -283
  145. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  146. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  147. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  148. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  149. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  150. megadetector/data_management/importers/missouri_to_json.py +0 -490
  151. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  152. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  153. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  154. megadetector/data_management/importers/pc_to_json.py +0 -365
  155. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  156. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  157. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  158. megadetector/data_management/importers/rspb_to_json.py +0 -356
  159. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  160. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  161. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  162. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  163. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  164. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  165. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  166. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  167. megadetector/data_management/importers/ubc_to_json.py +0 -399
  168. megadetector/data_management/importers/umn_to_json.py +0 -507
  169. megadetector/data_management/importers/wellington_to_json.py +0 -263
  170. megadetector/data_management/importers/wi_to_json.py +0 -442
  171. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  172. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  173. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  174. megadetector-5.0.27.dist-info/RECORD +0 -208
  175. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
  176. {megadetector-5.0.27.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
@@ -27,20 +27,20 @@ html_output_file = os.path.join(preview_base,'index.html')
27
27
 
28
28
  def parse_taxonomy_string(taxonomy_string):
29
29
 
30
- taxonomic_match = eval(taxonomy_string)
30
+ taxonomic_match = eval(taxonomy_string)
31
31
  matched_entity = taxonomic_match[0]
32
32
  assert len(matched_entity) == 4
33
-
33
+
34
34
  level = matched_entity[1]
35
-
35
+
36
36
  scientific_name = matched_entity[2]
37
-
37
+
38
38
  common_names = matched_entity[3]
39
39
  if len(common_names) == 1:
40
40
  common_name = common_names[0]
41
41
  else:
42
42
  common_name = str(common_names)
43
-
43
+
44
44
  return scientific_name,common_name,level,taxonomic_match
45
45
 
46
46
  def taxonomy_string_to_common_name(taxonomy_string):
@@ -66,9 +66,6 @@ df = pd.read_csv(lila_taxonomy_file)
66
66
  from megadetector.taxonomy_mapping.species_lookup import \
67
67
  initialize_taxonomy_lookup, get_preferred_taxonomic_match
68
68
 
69
- # from taxonomy_mapping.species_lookup import (
70
- # get_taxonomic_info, print_taxonomy_matche)
71
-
72
69
  initialize_taxonomy_lookup()
73
70
 
74
71
 
@@ -82,14 +79,14 @@ n_taxonomy_changes = 0
82
79
 
83
80
  # Look for internal inconsistency
84
81
  for i_row,row in df.iterrows():
85
-
82
+
86
83
  sn = row['scientific_name']
87
84
  if not isinstance(sn,str):
88
85
  continue
89
-
90
- ts = row['taxonomy_string']
86
+
87
+ ts = row['taxonomy_string']
91
88
  assert sn == taxonomy_string_to_scientific(ts)
92
-
89
+
93
90
  assert row['taxonomy_level'] == taxonomy_string_to_level(ts)
94
91
 
95
92
  # Look for outdated mappings
@@ -97,18 +94,18 @@ taxonomy_preference = 'inat'
97
94
 
98
95
  # i_row = 0; row = df.iloc[i_row]
99
96
  for i_row,row in tqdm(df.iterrows(),total=len(df)):
100
-
97
+
101
98
  sn = row['scientific_name']
102
99
  if not isinstance(sn,str):
103
100
  continue
104
-
101
+
105
102
  m = get_preferred_taxonomic_match(sn,taxonomy_preference)
106
103
  assert m.scientific_name == sn
107
-
104
+
108
105
  ts = row['taxonomy_string']
109
106
  assert m.taxonomy_string[0:50] == ts[0:50], 'Mismatch for {}:\n\n{}\n\n{}\n'.format(
110
107
  row['dataset_name'],ts,m.taxonomy_string)
111
-
108
+
112
109
  if ts != m.taxonomy_string:
113
110
  n_taxonomy_changes += 1
114
111
  df.loc[i_row,'taxonomy_string'] = m.taxonomy_string
@@ -164,45 +161,45 @@ suppress_multiple_matches = [
164
161
  ['porcupine','Snapshot Kruger','Idaho Camera Traps'],
165
162
  ['porcupine','Snapshot Mountain Zebra','Idaho Camera Traps'],
166
163
  ['porcupine','Snapshot Serengeti','Idaho Camera Traps'],
167
-
164
+
168
165
  ['porcupine','Snapshot Serengeti','Snapshot Mountain Zebra'],
169
166
  ['porcupine','Snapshot Serengeti','Snapshot Kruger'],
170
167
  ['porcupine','Snapshot Serengeti','Snapshot Kgalagadi'],
171
168
  ['porcupine','Snapshot Serengeti','Snapshot Karoo'],
172
169
  ['porcupine','Snapshot Serengeti','Snapshot Camdeboo'],
173
-
170
+
174
171
  ['porcupine','Snapshot Enonkishu','Snapshot Camdeboo'],
175
172
  ['porcupine','Snapshot Enonkishu','Snapshot Mountain Zebra'],
176
173
  ['porcupine','Snapshot Enonkishu','Snapshot Kruger'],
177
174
  ['porcupine','Snapshot Enonkishu','Snapshot Kgalagadi'],
178
175
  ['porcupine','Snapshot Enonkishu','Snapshot Karoo'],
179
-
176
+
180
177
  ['kudu','Snapshot Serengeti','Snapshot Mountain Zebra'],
181
178
  ['kudu','Snapshot Serengeti','Snapshot Kruger'],
182
179
  ['kudu','Snapshot Serengeti','Snapshot Kgalagadi'],
183
180
  ['kudu','Snapshot Serengeti','Snapshot Karoo'],
184
181
  ['kudu','Snapshot Serengeti','Snapshot Camdeboo'],
185
-
182
+
186
183
  ['fox','Caltech Camera Traps','Channel Islands Camera Traps'],
187
184
  ['fox','Idaho Camera Traps','Channel Islands Camera Traps'],
188
185
  ['fox','Idaho Camera Traps','Caltech Camera Traps'],
189
-
186
+
190
187
  ['pangolin','Snapshot Serengeti','SWG Camera Traps'],
191
-
188
+
192
189
  ['deer', 'Wellington Camera Traps', 'Idaho Camera Traps'],
193
190
  ['deer', 'Wellington Camera Traps', 'Caltech Camera Traps'],
194
-
191
+
195
192
  ['unknown cervid', 'WCS Camera Traps', 'Idaho Camera Traps']
196
-
193
+
197
194
  ]
198
195
 
199
196
  for i_row,row in df.iterrows():
200
-
197
+
201
198
  query = row['query']
202
199
  taxonomy_string = row['taxonomy_string']
203
-
200
+
204
201
  for previous_i_row in query_to_rows[query]:
205
-
202
+
206
203
  previous_row = df.iloc[previous_i_row]
207
204
  assert previous_row['query'] == query
208
205
  query_match = False
@@ -212,11 +209,11 @@ for i_row,row in df.iterrows():
212
209
  query_match = isnan(row['taxonomy_string'])
213
210
  else:
214
211
  query_match = previous_row['taxonomy_string'][0:10] == taxonomy_string[0:10]
215
-
212
+
216
213
  if not query_match:
217
-
214
+
218
215
  suppress = False
219
-
216
+
220
217
  # x = suppress_multiple_matches[-1]
221
218
  for x in suppress_multiple_matches:
222
219
  if x[0] == query and \
@@ -228,18 +225,18 @@ for i_row,row in df.iterrows():
228
225
  suppress = True
229
226
  n_suppressed += 1
230
227
  break
231
-
228
+
232
229
  if not suppress:
233
230
  print('Query {} in {} and {}:\n\n{}\n\n{}\n'.format(
234
231
  query, row['dataset_name'], previous_row['dataset_name'],
235
232
  taxonomy_string, previous_row['taxonomy_string']))
236
-
233
+
237
234
  queries_with_multiple_mappings.add(query)
238
-
235
+
239
236
  # ...for each row where we saw this query
240
-
237
+
241
238
  query_to_rows[query].append(i_row)
242
-
239
+
243
240
  # ...for each row
244
241
 
245
242
  print('Found {} queries with multiple mappings ({} occurrences suppressed)'.format(
@@ -270,9 +267,9 @@ for i_row,row in df.iterrows():
270
267
  ) \
271
268
  and \
272
269
  ('species' in level):
273
-
270
+
274
271
  if query not in allowable_unknown_species:
275
-
272
+
276
273
  print('Warning: query {}:{} maps to {} {}'.format(
277
274
  row['dataset_name'],
278
275
  row['query'],
@@ -288,7 +285,7 @@ for i_row,row in df.iterrows():
288
285
  if 'source' in row:
289
286
  assert isinstance(row['source'],str)
290
287
  assert isinstance(row['taxonomy_level'],str)
291
-
288
+
292
289
 
293
290
  #%% Find WCS mappings that aren't species or aren't the same as the input
294
291
 
@@ -297,22 +294,22 @@ for i_row,row in df.iterrows():
297
294
 
298
295
  # row = df.iloc[-500]
299
296
  for i_row,row in df.iterrows():
300
-
297
+
301
298
  if not isinstance(row['scientific_name'],str):
302
299
  continue
303
300
  if 'WCS' not in row['dataset_name']:
304
301
  continue
305
-
302
+
306
303
  query = row['query']
307
304
  scientific_name = row['scientific_name']
308
305
  common_name = row['common_name']
309
- level = row['taxonomy_level']
306
+ level = row['taxonomy_level']
310
307
  taxonomy_string = row['taxonomy_string']
311
-
312
- common_name_from_taxonomy = taxonomy_string_to_common_name(taxonomy_string)
308
+
309
+ common_name_from_taxonomy = taxonomy_string_to_common_name(taxonomy_string)
313
310
  query_string = query.replace(' sp','')
314
311
  query_string = query_string.replace('unknown ','')
315
-
312
+
316
313
  # Anything marked "species" or "unknown" by definition doesn't map to a species,
317
314
  # so ignore these.
318
315
  if (' sp' not in query) and ('unknown' not in query) and \
@@ -320,7 +317,7 @@ for i_row,row in df.iterrows():
320
317
  print('WCS query {} ({}) remapped to {} {} ({})'.format(
321
318
  query,common_name,level,scientific_name,common_name_from_taxonomy))
322
319
 
323
- if query_string != scientific_name:
320
+ if query_string != scientific_name:
324
321
  pass
325
322
  # print('WCS query {} ({}) remapped to {} ({})'.format(
326
323
  # query,common_name,scientific_name,common_names_from_taxonomy))
@@ -348,20 +345,20 @@ min_valid_image_size = 3000
348
345
  #
349
346
  # i_row = 0; row = df.iloc[i_row]
350
347
  for i_row,row in df.iterrows():
351
-
348
+
352
349
  s = row['scientific_name']
353
-
350
+
354
351
  if (not isinstance(s,str)) or (len(s)==0):
355
352
  continue
356
-
353
+
357
354
  query = s.replace(' ','+')
358
-
355
+
359
356
  if query in remapped_queries:
360
357
  query = remapped_queries[query]
361
-
358
+
362
359
  query_folder = os.path.join(image_base,query)
363
360
  os.makedirs(query_folder,exist_ok=True)
364
-
361
+
365
362
  # Check whether we already have enough images for this query
366
363
  image_files = os.listdir(query_folder)
367
364
  image_fullpaths = [os.path.join(query_folder,fn) for fn in image_files]
@@ -374,7 +371,7 @@ for i_row,row in df.iterrows():
374
371
  # Check whether we've already run this query for a previous row
375
372
  if query in scientific_name_to_paths:
376
373
  continue
377
-
374
+
378
375
  print('Processing query {} of {} ({})'.format(i_row,len(df),query))
379
376
  paths = retrieve_sample_image.download_images(query=query,
380
377
  output_directory=image_base,
@@ -407,40 +404,40 @@ scientific_name_to_preferred_images = {}
407
404
 
408
405
  # s = list(scientific_name_to_paths.keys())[0]
409
406
  for s in list(df.scientific_name):
410
-
407
+
411
408
  if not isinstance(s,str):
412
409
  continue
413
-
410
+
414
411
  query = s.replace(' ','+')
415
-
412
+
416
413
  if query in remapped_queries:
417
414
  query = remapped_queries[query]
418
-
415
+
419
416
  query_folder = os.path.join(image_base,query)
420
417
  assert os.path.isdir(query_folder)
421
418
  image_files = os.listdir(query_folder)
422
- image_fullpaths = [os.path.join(query_folder,fn) for fn in image_files]
419
+ image_fullpaths = [os.path.join(query_folder,fn) for fn in image_files]
423
420
  sizes = [os.path.getsize(p) for p in image_fullpaths]
424
421
  path_to_size = {}
425
422
  for i_fp,fp in enumerate(image_fullpaths):
426
423
  path_to_size[fp] = sizes[i_fp]
427
424
  paths_by_size = [x for _, x in sorted(zip(sizes, image_fullpaths),reverse=True)]
428
-
425
+
429
426
  # Be suspicious of duplicate sizes
430
427
  b_duplicate_sizes = [False] * len(paths_by_size)
431
-
428
+
432
429
  for i_path,p in enumerate(paths_by_size):
433
430
  if i_path == len(paths_by_size) - 1:
434
431
  continue
435
432
  if path_to_size[p] == path_to_size[paths_by_size[i_path+1]]:
436
433
  b_duplicate_sizes[i_path] = True
437
-
434
+
438
435
  paths_by_size_non_dup = [i for (i, v) in zip(paths_by_size, b_duplicate_sizes) if not v]
439
-
436
+
440
437
  preferred_paths = paths_by_size_non_dup[:max_images_per_query]
441
438
  scientific_name_to_preferred_images[s] = preferred_paths
442
439
 
443
- # ...for each scientific name
440
+ # ...for each scientific name
444
441
 
445
442
 
446
443
  #%% Delete unused images
@@ -448,7 +445,7 @@ for s in list(df.scientific_name):
448
445
  used_images = []
449
446
  for images in scientific_name_to_preferred_images.values():
450
447
  used_images.extend(images)
451
-
448
+
452
449
  print('Using a total of {} images'.format(len(used_images)))
453
450
  used_images_set = set(used_images)
454
451
 
@@ -464,18 +461,18 @@ print('{} of {} files unused (diff {})'.format(len(unused_images),len(all_images
464
461
  len(all_images) - len(unused_images)))
465
462
 
466
463
  for fn in tqdm(unused_images):
467
- os.remove(fn)
464
+ os.remove(fn)
468
465
 
469
466
 
470
467
  #%% Produce HTML preview
471
468
 
472
469
  with open(html_output_file, 'w', encoding='utf-8') as f:
473
-
470
+
474
471
  f.write('<html><head></head><body>\n')
475
472
 
476
473
  names = scientific_name_to_preferred_images.keys()
477
474
  names = sorted(names)
478
-
475
+
479
476
  f.write('<p class="speciesinfo_p" style="font-weight:bold;font-size:130%">'
480
477
  'dataset_name: <b><u>category</u></b> mapped to taxonomy_level scientific_name (taxonomic_common_name) (manual_common_name)</p>\n'
481
478
  '</p>')
@@ -484,10 +481,10 @@ with open(html_output_file, 'w', encoding='utf-8') as f:
484
481
  for i_row, row in tqdm(df.iterrows(), total=len(df)):
485
482
 
486
483
  s = row['scientific_name']
487
-
484
+
488
485
  taxonomy_string = row['taxonomy_string']
489
486
  if isinstance(taxonomy_string,str):
490
- taxonomic_match = eval(taxonomy_string)
487
+ taxonomic_match = eval(taxonomy_string)
491
488
  matched_entity = taxonomic_match[0]
492
489
  assert len(matched_entity) == 4
493
490
  common_names = matched_entity[3]
@@ -502,7 +499,7 @@ with open(html_output_file, 'w', encoding='utf-8') as f:
502
499
 
503
500
  if isinstance(row.scientific_name,str):
504
501
  output_string = '{}: <b><u>{}</u></b> mapped to {} {} ({}) ({})</p>\n'.format(
505
- row.dataset_name, row.query,
502
+ row.dataset_name, row.query,
506
503
  row.taxonomy_level, row.scientific_name, common_name_string,
507
504
  row.common_name)
508
505
  f.write(output_string)
@@ -17,21 +17,21 @@ import os
17
17
 
18
18
  output_folder = os.path.expanduser('~/tmp/image-download-test')
19
19
  os.makedirs(output_folder,exist_ok=True)
20
-
20
+
21
21
  method = 'simple_image_download' # 'google_images_download'
22
22
 
23
23
  if method == 'simple_image_download':
24
-
24
+
25
25
  from megadetector.taxonomy_mapping import simple_image_download
26
26
  google_image_downloader = simple_image_download.Downloader()
27
27
  google_image_downloader.directory = output_folder
28
-
28
+
29
29
  elif method == 'google_images_download':
30
-
30
+
31
31
  from google_images_download import google_images_download
32
32
 
33
33
  else:
34
-
34
+
35
35
  raise ValueError('Unrecognized method {}'.format(method))
36
36
 
37
37
 
@@ -39,33 +39,33 @@ else:
39
39
 
40
40
  def download_images(query,output_directory,limit=100,verbose=False):
41
41
 
42
- query = query.replace(' ','+')
43
-
42
+ query = query.replace(' ','+')
43
+
44
44
  if method == 'simple_image_download':
45
-
45
+
46
46
  google_image_downloader.directory = output_directory
47
47
  paths = google_image_downloader.download(query, limit=limit,
48
48
  verbose=verbose, cache=False, download_cache=False)
49
49
  return paths
50
-
50
+
51
51
  elif method == 'google_images_download':
52
-
53
- response = google_images_download.googleimagesdownload()
52
+
53
+ response = google_images_download.googleimagesdownload()
54
54
  arguments = {'keywords':query,'limit':limit,'print_urls':verbose,
55
55
  'image-directory':output_directory}
56
56
  response.download(arguments)
57
57
  return None
58
58
 
59
59
  else:
60
-
60
+
61
61
  raise ValueError('Unrecognized method {}'.format(method))
62
-
62
+
63
63
 
64
64
  #%% Test driver
65
65
 
66
66
  if False:
67
-
67
+
68
68
  #%%
69
-
69
+
70
70
  paths = download_images(query='redunca',output_directory=output_folder,
71
- limit=20,verbose=True)
71
+ limit=20,verbose=True)
@@ -49,7 +49,7 @@ def generate_urls(search):
49
49
  """
50
50
  Generate Google search URLs for all tokens in the list [search]
51
51
  """
52
-
52
+
53
53
  return [(BASE_URL+quote(word)+GOOGLE_PICTURE_ID) for word in search]
54
54
 
55
55
 
@@ -60,7 +60,7 @@ def check_webpage(url):
60
60
  if 'html' not in str(request.content):
61
61
  checked_url = request
62
62
  except Exception as err:
63
- print(err)
63
+ print(err)
64
64
  return checked_url
65
65
 
66
66
 
@@ -68,7 +68,7 @@ def scan_webpage(webpage, extensions, timer):
68
68
  """
69
69
  Scan for pictures to download based on keywords
70
70
  """
71
-
71
+
72
72
  global SCANNER_COUNTER
73
73
  scanner = webpage.find
74
74
  found = False
@@ -143,7 +143,7 @@ class Downloader:
143
143
  urls_ = generate_urls(search)
144
144
  timer = timer if timer else 1000
145
145
  # max_progressbar = count * (list(range(limit+1))[-1]+1)
146
-
146
+
147
147
  # bar = progressbar.ProgressBar(maxval=max_progressbar,
148
148
  # widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]).start()
149
149
  i = 0
@@ -172,7 +172,7 @@ class Downloader:
172
172
  print('==='*15 + ' < ' + 'NO PICTURES FOUND' + ' > ' + '==='*15)
173
173
  return cache_out
174
174
 
175
- def download(self, keywords=None, limit=1, verbose=False, cache=True, download_cache=False,
175
+ def download(self, keywords=None, limit=1, verbose=False, cache=True, download_cache=False,
176
176
  timer=None):
177
177
  if not download_cache:
178
178
  content = self.search_urls(keywords, limit, verbose, cache, timer)
@@ -180,16 +180,16 @@ class Downloader:
180
180
  content = self._cached_urls
181
181
  if not content:
182
182
  print('Downloader has not URLs saved in Memory yet, run Downloader.search_urls to find pics first')
183
- paths = []
183
+ paths = []
184
184
  for name, (path, url) in content.items():
185
185
  fullpath = os.path.join(path, name)
186
186
  paths.append(fullpath)
187
187
  with open(fullpath, 'wb') as file:
188
188
  file.write(url.content)
189
189
  if verbose:
190
- print(f'File Name={name}, Downloaded from {url.url}')
190
+ print(f'File Name={name}, Downloaded from {url.url}')
191
191
  return paths
192
-
192
+
193
193
  def _create_directories(self, name):
194
194
  dir_path = os.path.join(self._directory, name)
195
195
  try: