megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show
  1. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
  2. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
  3. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
  4. megadetector/classification/aggregate_classifier_probs.py +3 -3
  5. megadetector/classification/analyze_failed_images.py +5 -5
  6. megadetector/classification/cache_batchapi_outputs.py +5 -5
  7. megadetector/classification/create_classification_dataset.py +11 -12
  8. megadetector/classification/crop_detections.py +10 -10
  9. megadetector/classification/csv_to_json.py +8 -8
  10. megadetector/classification/detect_and_crop.py +13 -15
  11. megadetector/classification/efficientnet/model.py +8 -8
  12. megadetector/classification/efficientnet/utils.py +6 -5
  13. megadetector/classification/evaluate_model.py +7 -7
  14. megadetector/classification/identify_mislabeled_candidates.py +6 -6
  15. megadetector/classification/json_to_azcopy_list.py +1 -1
  16. megadetector/classification/json_validator.py +29 -32
  17. megadetector/classification/map_classification_categories.py +9 -9
  18. megadetector/classification/merge_classification_detection_output.py +12 -9
  19. megadetector/classification/prepare_classification_script.py +19 -19
  20. megadetector/classification/prepare_classification_script_mc.py +26 -26
  21. megadetector/classification/run_classifier.py +4 -4
  22. megadetector/classification/save_mislabeled.py +6 -6
  23. megadetector/classification/train_classifier.py +1 -1
  24. megadetector/classification/train_classifier_tf.py +9 -9
  25. megadetector/classification/train_utils.py +10 -10
  26. megadetector/data_management/annotations/annotation_constants.py +1 -2
  27. megadetector/data_management/camtrap_dp_to_coco.py +79 -46
  28. megadetector/data_management/cct_json_utils.py +103 -103
  29. megadetector/data_management/cct_to_md.py +49 -49
  30. megadetector/data_management/cct_to_wi.py +33 -33
  31. megadetector/data_management/coco_to_labelme.py +75 -75
  32. megadetector/data_management/coco_to_yolo.py +210 -193
  33. megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
  34. megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
  35. megadetector/data_management/databases/integrity_check_json_db.py +228 -200
  36. megadetector/data_management/databases/subset_json_db.py +33 -33
  37. megadetector/data_management/generate_crops_from_cct.py +88 -39
  38. megadetector/data_management/get_image_sizes.py +54 -49
  39. megadetector/data_management/labelme_to_coco.py +133 -125
  40. megadetector/data_management/labelme_to_yolo.py +159 -73
  41. megadetector/data_management/lila/create_lila_blank_set.py +81 -83
  42. megadetector/data_management/lila/create_lila_test_set.py +32 -31
  43. megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
  44. megadetector/data_management/lila/download_lila_subset.py +21 -24
  45. megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
  46. megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
  47. megadetector/data_management/lila/get_lila_image_counts.py +22 -22
  48. megadetector/data_management/lila/lila_common.py +73 -70
  49. megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
  50. megadetector/data_management/mewc_to_md.py +344 -340
  51. megadetector/data_management/ocr_tools.py +262 -255
  52. megadetector/data_management/read_exif.py +249 -227
  53. megadetector/data_management/remap_coco_categories.py +90 -28
  54. megadetector/data_management/remove_exif.py +81 -21
  55. megadetector/data_management/rename_images.py +187 -187
  56. megadetector/data_management/resize_coco_dataset.py +588 -120
  57. megadetector/data_management/speciesnet_to_md.py +41 -41
  58. megadetector/data_management/wi_download_csv_to_coco.py +55 -55
  59. megadetector/data_management/yolo_output_to_md_output.py +248 -122
  60. megadetector/data_management/yolo_to_coco.py +333 -191
  61. megadetector/detection/change_detection.py +832 -0
  62. megadetector/detection/process_video.py +340 -337
  63. megadetector/detection/pytorch_detector.py +358 -278
  64. megadetector/detection/run_detector.py +399 -186
  65. megadetector/detection/run_detector_batch.py +404 -377
  66. megadetector/detection/run_inference_with_yolov5_val.py +340 -327
  67. megadetector/detection/run_tiled_inference.py +257 -249
  68. megadetector/detection/tf_detector.py +24 -24
  69. megadetector/detection/video_utils.py +332 -295
  70. megadetector/postprocessing/add_max_conf.py +19 -11
  71. megadetector/postprocessing/categorize_detections_by_size.py +45 -45
  72. megadetector/postprocessing/classification_postprocessing.py +468 -433
  73. megadetector/postprocessing/combine_batch_outputs.py +23 -23
  74. megadetector/postprocessing/compare_batch_results.py +590 -525
  75. megadetector/postprocessing/convert_output_format.py +106 -102
  76. megadetector/postprocessing/create_crop_folder.py +347 -147
  77. megadetector/postprocessing/detector_calibration.py +173 -168
  78. megadetector/postprocessing/generate_csv_report.py +508 -499
  79. megadetector/postprocessing/load_api_results.py +48 -27
  80. megadetector/postprocessing/md_to_coco.py +133 -102
  81. megadetector/postprocessing/md_to_labelme.py +107 -90
  82. megadetector/postprocessing/md_to_wi.py +40 -40
  83. megadetector/postprocessing/merge_detections.py +92 -114
  84. megadetector/postprocessing/postprocess_batch_results.py +319 -301
  85. megadetector/postprocessing/remap_detection_categories.py +91 -38
  86. megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
  87. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
  88. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
  89. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
  90. megadetector/postprocessing/separate_detections_into_folders.py +226 -211
  91. megadetector/postprocessing/subset_json_detector_output.py +265 -262
  92. megadetector/postprocessing/top_folders_to_bottom.py +45 -45
  93. megadetector/postprocessing/validate_batch_results.py +70 -70
  94. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
  95. megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
  96. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
  97. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
  98. megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
  99. megadetector/taxonomy_mapping/simple_image_download.py +8 -8
  100. megadetector/taxonomy_mapping/species_lookup.py +156 -74
  101. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
  102. megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
  103. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
  104. megadetector/utils/ct_utils.py +1049 -211
  105. megadetector/utils/directory_listing.py +21 -77
  106. megadetector/utils/gpu_test.py +22 -22
  107. megadetector/utils/md_tests.py +632 -529
  108. megadetector/utils/path_utils.py +1520 -431
  109. megadetector/utils/process_utils.py +41 -41
  110. megadetector/utils/split_locations_into_train_val.py +62 -62
  111. megadetector/utils/string_utils.py +148 -27
  112. megadetector/utils/url_utils.py +489 -176
  113. megadetector/utils/wi_utils.py +2658 -2526
  114. megadetector/utils/write_html_image_list.py +137 -137
  115. megadetector/visualization/plot_utils.py +34 -30
  116. megadetector/visualization/render_images_with_thumbnails.py +39 -74
  117. megadetector/visualization/visualization_utils.py +487 -435
  118. megadetector/visualization/visualize_db.py +232 -198
  119. megadetector/visualization/visualize_detector_output.py +82 -76
  120. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
  121. megadetector-10.0.0.dist-info/RECORD +139 -0
  122. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
  123. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  124. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  125. megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
  126. megadetector/api/batch_processing/api_core/server.py +0 -294
  127. megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
  128. megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
  129. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
  130. megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
  131. megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
  132. megadetector/api/batch_processing/api_core/server_utils.py +0 -88
  133. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  134. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
  135. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  136. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
  137. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  138. megadetector/api/synchronous/__init__.py +0 -0
  139. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  140. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
  141. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
  142. megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
  143. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  144. megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
  145. megadetector/data_management/importers/add_nacti_sizes.py +0 -52
  146. megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
  147. megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
  148. megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
  149. megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
  150. megadetector/data_management/importers/awc_to_json.py +0 -191
  151. megadetector/data_management/importers/bellevue_to_json.py +0 -272
  152. megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
  153. megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
  154. megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
  155. megadetector/data_management/importers/cct_field_adjustments.py +0 -58
  156. megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
  157. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
  158. megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
  159. megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
  160. megadetector/data_management/importers/ena24_to_json.py +0 -276
  161. megadetector/data_management/importers/filenames_to_json.py +0 -386
  162. megadetector/data_management/importers/helena_to_cct.py +0 -283
  163. megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
  164. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
  165. megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
  166. megadetector/data_management/importers/jb_csv_to_json.py +0 -150
  167. megadetector/data_management/importers/mcgill_to_json.py +0 -250
  168. megadetector/data_management/importers/missouri_to_json.py +0 -490
  169. megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
  170. megadetector/data_management/importers/noaa_seals_2019.py +0 -181
  171. megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
  172. megadetector/data_management/importers/pc_to_json.py +0 -365
  173. megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
  174. megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
  175. megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
  176. megadetector/data_management/importers/rspb_to_json.py +0 -356
  177. megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
  178. megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
  179. megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
  180. megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
  181. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
  182. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
  183. megadetector/data_management/importers/sulross_get_exif.py +0 -65
  184. megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
  185. megadetector/data_management/importers/ubc_to_json.py +0 -399
  186. megadetector/data_management/importers/umn_to_json.py +0 -507
  187. megadetector/data_management/importers/wellington_to_json.py +0 -263
  188. megadetector/data_management/importers/wi_to_json.py +0 -442
  189. megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
  190. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
  191. megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
  192. megadetector/utils/azure_utils.py +0 -178
  193. megadetector/utils/sas_blob_utils.py +0 -509
  194. megadetector-5.0.28.dist-info/RECORD +0 -209
  195. /megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
  196. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
  197. {megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0
@@ -16,7 +16,7 @@ import os
16
16
  import pandas as pd
17
17
 
18
18
  # lila_taxonomy_file = r"c:\git\agentmorrisprivate\lila-taxonomy\lila-taxonomy-mapping.csv"
19
- lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2025.03.24.csv')
19
+ lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2025.06.23.csv')
20
20
 
21
21
  preview_base = os.path.expanduser('~/lila/lila_taxonomy_preview')
22
22
  os.makedirs(preview_base,exist_ok=True)
@@ -27,20 +27,20 @@ html_output_file = os.path.join(preview_base,'index.html')
27
27
 
28
28
  def parse_taxonomy_string(taxonomy_string):
29
29
 
30
- taxonomic_match = eval(taxonomy_string)
30
+ taxonomic_match = eval(taxonomy_string)
31
31
  matched_entity = taxonomic_match[0]
32
32
  assert len(matched_entity) == 4
33
-
33
+
34
34
  level = matched_entity[1]
35
-
35
+
36
36
  scientific_name = matched_entity[2]
37
-
37
+
38
38
  common_names = matched_entity[3]
39
39
  if len(common_names) == 1:
40
40
  common_name = common_names[0]
41
41
  else:
42
42
  common_name = str(common_names)
43
-
43
+
44
44
  return scientific_name,common_name,level,taxonomic_match
45
45
 
46
46
  def taxonomy_string_to_common_name(taxonomy_string):
@@ -79,14 +79,14 @@ n_taxonomy_changes = 0
79
79
 
80
80
  # Look for internal inconsistency
81
81
  for i_row,row in df.iterrows():
82
-
82
+
83
83
  sn = row['scientific_name']
84
84
  if not isinstance(sn,str):
85
85
  continue
86
-
87
- ts = row['taxonomy_string']
86
+
87
+ ts = row['taxonomy_string']
88
88
  assert sn == taxonomy_string_to_scientific(ts)
89
-
89
+
90
90
  assert row['taxonomy_level'] == taxonomy_string_to_level(ts)
91
91
 
92
92
  # Look for outdated mappings
@@ -94,18 +94,18 @@ taxonomy_preference = 'inat'
94
94
 
95
95
  # i_row = 0; row = df.iloc[i_row]
96
96
  for i_row,row in tqdm(df.iterrows(),total=len(df)):
97
-
97
+
98
98
  sn = row['scientific_name']
99
99
  if not isinstance(sn,str):
100
100
  continue
101
-
101
+
102
102
  m = get_preferred_taxonomic_match(sn,taxonomy_preference)
103
103
  assert m.scientific_name == sn
104
-
104
+
105
105
  ts = row['taxonomy_string']
106
106
  assert m.taxonomy_string[0:50] == ts[0:50], 'Mismatch for {}:\n\n{}\n\n{}\n'.format(
107
107
  row['dataset_name'],ts,m.taxonomy_string)
108
-
108
+
109
109
  if ts != m.taxonomy_string:
110
110
  n_taxonomy_changes += 1
111
111
  df.loc[i_row,'taxonomy_string'] = m.taxonomy_string
@@ -161,45 +161,45 @@ suppress_multiple_matches = [
161
161
  ['porcupine','Snapshot Kruger','Idaho Camera Traps'],
162
162
  ['porcupine','Snapshot Mountain Zebra','Idaho Camera Traps'],
163
163
  ['porcupine','Snapshot Serengeti','Idaho Camera Traps'],
164
-
164
+
165
165
  ['porcupine','Snapshot Serengeti','Snapshot Mountain Zebra'],
166
166
  ['porcupine','Snapshot Serengeti','Snapshot Kruger'],
167
167
  ['porcupine','Snapshot Serengeti','Snapshot Kgalagadi'],
168
168
  ['porcupine','Snapshot Serengeti','Snapshot Karoo'],
169
169
  ['porcupine','Snapshot Serengeti','Snapshot Camdeboo'],
170
-
170
+
171
171
  ['porcupine','Snapshot Enonkishu','Snapshot Camdeboo'],
172
172
  ['porcupine','Snapshot Enonkishu','Snapshot Mountain Zebra'],
173
173
  ['porcupine','Snapshot Enonkishu','Snapshot Kruger'],
174
174
  ['porcupine','Snapshot Enonkishu','Snapshot Kgalagadi'],
175
175
  ['porcupine','Snapshot Enonkishu','Snapshot Karoo'],
176
-
176
+
177
177
  ['kudu','Snapshot Serengeti','Snapshot Mountain Zebra'],
178
178
  ['kudu','Snapshot Serengeti','Snapshot Kruger'],
179
179
  ['kudu','Snapshot Serengeti','Snapshot Kgalagadi'],
180
180
  ['kudu','Snapshot Serengeti','Snapshot Karoo'],
181
181
  ['kudu','Snapshot Serengeti','Snapshot Camdeboo'],
182
-
182
+
183
183
  ['fox','Caltech Camera Traps','Channel Islands Camera Traps'],
184
184
  ['fox','Idaho Camera Traps','Channel Islands Camera Traps'],
185
185
  ['fox','Idaho Camera Traps','Caltech Camera Traps'],
186
-
186
+
187
187
  ['pangolin','Snapshot Serengeti','SWG Camera Traps'],
188
-
188
+
189
189
  ['deer', 'Wellington Camera Traps', 'Idaho Camera Traps'],
190
190
  ['deer', 'Wellington Camera Traps', 'Caltech Camera Traps'],
191
-
191
+
192
192
  ['unknown cervid', 'WCS Camera Traps', 'Idaho Camera Traps']
193
-
193
+
194
194
  ]
195
195
 
196
196
  for i_row,row in df.iterrows():
197
-
197
+
198
198
  query = row['query']
199
199
  taxonomy_string = row['taxonomy_string']
200
-
200
+
201
201
  for previous_i_row in query_to_rows[query]:
202
-
202
+
203
203
  previous_row = df.iloc[previous_i_row]
204
204
  assert previous_row['query'] == query
205
205
  query_match = False
@@ -209,11 +209,11 @@ for i_row,row in df.iterrows():
209
209
  query_match = isnan(row['taxonomy_string'])
210
210
  else:
211
211
  query_match = previous_row['taxonomy_string'][0:10] == taxonomy_string[0:10]
212
-
212
+
213
213
  if not query_match:
214
-
214
+
215
215
  suppress = False
216
-
216
+
217
217
  # x = suppress_multiple_matches[-1]
218
218
  for x in suppress_multiple_matches:
219
219
  if x[0] == query and \
@@ -225,18 +225,18 @@ for i_row,row in df.iterrows():
225
225
  suppress = True
226
226
  n_suppressed += 1
227
227
  break
228
-
228
+
229
229
  if not suppress:
230
230
  print('Query {} in {} and {}:\n\n{}\n\n{}\n'.format(
231
231
  query, row['dataset_name'], previous_row['dataset_name'],
232
232
  taxonomy_string, previous_row['taxonomy_string']))
233
-
233
+
234
234
  queries_with_multiple_mappings.add(query)
235
-
235
+
236
236
  # ...for each row where we saw this query
237
-
237
+
238
238
  query_to_rows[query].append(i_row)
239
-
239
+
240
240
  # ...for each row
241
241
 
242
242
  print('Found {} queries with multiple mappings ({} occurrences suppressed)'.format(
@@ -267,9 +267,9 @@ for i_row,row in df.iterrows():
267
267
  ) \
268
268
  and \
269
269
  ('species' in level):
270
-
270
+
271
271
  if query not in allowable_unknown_species:
272
-
272
+
273
273
  print('Warning: query {}:{} maps to {} {}'.format(
274
274
  row['dataset_name'],
275
275
  row['query'],
@@ -285,7 +285,7 @@ for i_row,row in df.iterrows():
285
285
  if 'source' in row:
286
286
  assert isinstance(row['source'],str)
287
287
  assert isinstance(row['taxonomy_level'],str)
288
-
288
+
289
289
 
290
290
  #%% Find WCS mappings that aren't species or aren't the same as the input
291
291
 
@@ -294,22 +294,22 @@ for i_row,row in df.iterrows():
294
294
 
295
295
  # row = df.iloc[-500]
296
296
  for i_row,row in df.iterrows():
297
-
297
+
298
298
  if not isinstance(row['scientific_name'],str):
299
299
  continue
300
300
  if 'WCS' not in row['dataset_name']:
301
301
  continue
302
-
302
+
303
303
  query = row['query']
304
304
  scientific_name = row['scientific_name']
305
305
  common_name = row['common_name']
306
- level = row['taxonomy_level']
306
+ level = row['taxonomy_level']
307
307
  taxonomy_string = row['taxonomy_string']
308
-
309
- common_name_from_taxonomy = taxonomy_string_to_common_name(taxonomy_string)
308
+
309
+ common_name_from_taxonomy = taxonomy_string_to_common_name(taxonomy_string)
310
310
  query_string = query.replace(' sp','')
311
311
  query_string = query_string.replace('unknown ','')
312
-
312
+
313
313
  # Anything marked "species" or "unknown" by definition doesn't map to a species,
314
314
  # so ignore these.
315
315
  if (' sp' not in query) and ('unknown' not in query) and \
@@ -317,7 +317,7 @@ for i_row,row in df.iterrows():
317
317
  print('WCS query {} ({}) remapped to {} {} ({})'.format(
318
318
  query,common_name,level,scientific_name,common_name_from_taxonomy))
319
319
 
320
- if query_string != scientific_name:
320
+ if query_string != scientific_name:
321
321
  pass
322
322
  # print('WCS query {} ({}) remapped to {} ({})'.format(
323
323
  # query,common_name,scientific_name,common_names_from_taxonomy))
@@ -345,20 +345,20 @@ min_valid_image_size = 3000
345
345
  #
346
346
  # i_row = 0; row = df.iloc[i_row]
347
347
  for i_row,row in df.iterrows():
348
-
348
+
349
349
  s = row['scientific_name']
350
-
350
+
351
351
  if (not isinstance(s,str)) or (len(s)==0):
352
352
  continue
353
-
353
+
354
354
  query = s.replace(' ','+')
355
-
355
+
356
356
  if query in remapped_queries:
357
357
  query = remapped_queries[query]
358
-
358
+
359
359
  query_folder = os.path.join(image_base,query)
360
360
  os.makedirs(query_folder,exist_ok=True)
361
-
361
+
362
362
  # Check whether we already have enough images for this query
363
363
  image_files = os.listdir(query_folder)
364
364
  image_fullpaths = [os.path.join(query_folder,fn) for fn in image_files]
@@ -371,7 +371,7 @@ for i_row,row in df.iterrows():
371
371
  # Check whether we've already run this query for a previous row
372
372
  if query in scientific_name_to_paths:
373
373
  continue
374
-
374
+
375
375
  print('Processing query {} of {} ({})'.format(i_row,len(df),query))
376
376
  paths = retrieve_sample_image.download_images(query=query,
377
377
  output_directory=image_base,
@@ -404,40 +404,40 @@ scientific_name_to_preferred_images = {}
404
404
 
405
405
  # s = list(scientific_name_to_paths.keys())[0]
406
406
  for s in list(df.scientific_name):
407
-
407
+
408
408
  if not isinstance(s,str):
409
409
  continue
410
-
410
+
411
411
  query = s.replace(' ','+')
412
-
412
+
413
413
  if query in remapped_queries:
414
414
  query = remapped_queries[query]
415
-
415
+
416
416
  query_folder = os.path.join(image_base,query)
417
417
  assert os.path.isdir(query_folder)
418
418
  image_files = os.listdir(query_folder)
419
- image_fullpaths = [os.path.join(query_folder,fn) for fn in image_files]
419
+ image_fullpaths = [os.path.join(query_folder,fn) for fn in image_files]
420
420
  sizes = [os.path.getsize(p) for p in image_fullpaths]
421
421
  path_to_size = {}
422
422
  for i_fp,fp in enumerate(image_fullpaths):
423
423
  path_to_size[fp] = sizes[i_fp]
424
424
  paths_by_size = [x for _, x in sorted(zip(sizes, image_fullpaths),reverse=True)]
425
-
425
+
426
426
  # Be suspicious of duplicate sizes
427
427
  b_duplicate_sizes = [False] * len(paths_by_size)
428
-
428
+
429
429
  for i_path,p in enumerate(paths_by_size):
430
430
  if i_path == len(paths_by_size) - 1:
431
431
  continue
432
432
  if path_to_size[p] == path_to_size[paths_by_size[i_path+1]]:
433
433
  b_duplicate_sizes[i_path] = True
434
-
434
+
435
435
  paths_by_size_non_dup = [i for (i, v) in zip(paths_by_size, b_duplicate_sizes) if not v]
436
-
436
+
437
437
  preferred_paths = paths_by_size_non_dup[:max_images_per_query]
438
438
  scientific_name_to_preferred_images[s] = preferred_paths
439
439
 
440
- # ...for each scientific name
440
+ # ...for each scientific name
441
441
 
442
442
 
443
443
  #%% Delete unused images
@@ -445,7 +445,7 @@ for s in list(df.scientific_name):
445
445
  used_images = []
446
446
  for images in scientific_name_to_preferred_images.values():
447
447
  used_images.extend(images)
448
-
448
+
449
449
  print('Using a total of {} images'.format(len(used_images)))
450
450
  used_images_set = set(used_images)
451
451
 
@@ -461,18 +461,18 @@ print('{} of {} files unused (diff {})'.format(len(unused_images),len(all_images
461
461
  len(all_images) - len(unused_images)))
462
462
 
463
463
  for fn in tqdm(unused_images):
464
- os.remove(fn)
464
+ os.remove(fn)
465
465
 
466
466
 
467
467
  #%% Produce HTML preview
468
468
 
469
469
  with open(html_output_file, 'w', encoding='utf-8') as f:
470
-
470
+
471
471
  f.write('<html><head></head><body>\n')
472
472
 
473
473
  names = scientific_name_to_preferred_images.keys()
474
474
  names = sorted(names)
475
-
475
+
476
476
  f.write('<p class="speciesinfo_p" style="font-weight:bold;font-size:130%">'
477
477
  'dataset_name: <b><u>category</u></b> mapped to taxonomy_level scientific_name (taxonomic_common_name) (manual_common_name)</p>\n'
478
478
  '</p>')
@@ -481,10 +481,10 @@ with open(html_output_file, 'w', encoding='utf-8') as f:
481
481
  for i_row, row in tqdm(df.iterrows(), total=len(df)):
482
482
 
483
483
  s = row['scientific_name']
484
-
484
+
485
485
  taxonomy_string = row['taxonomy_string']
486
486
  if isinstance(taxonomy_string,str):
487
- taxonomic_match = eval(taxonomy_string)
487
+ taxonomic_match = eval(taxonomy_string)
488
488
  matched_entity = taxonomic_match[0]
489
489
  assert len(matched_entity) == 4
490
490
  common_names = matched_entity[3]
@@ -499,7 +499,7 @@ with open(html_output_file, 'w', encoding='utf-8') as f:
499
499
 
500
500
  if isinstance(row.scientific_name,str):
501
501
  output_string = '{}: <b><u>{}</u></b> mapped to {} {} ({}) ({})</p>\n'.format(
502
- row.dataset_name, row.query,
502
+ row.dataset_name, row.query,
503
503
  row.taxonomy_level, row.scientific_name, common_name_string,
504
504
  row.common_name)
505
505
  f.write(output_string)
@@ -17,21 +17,21 @@ import os
17
17
 
18
18
  output_folder = os.path.expanduser('~/tmp/image-download-test')
19
19
  os.makedirs(output_folder,exist_ok=True)
20
-
20
+
21
21
  method = 'simple_image_download' # 'google_images_download'
22
22
 
23
23
  if method == 'simple_image_download':
24
-
24
+
25
25
  from megadetector.taxonomy_mapping import simple_image_download
26
26
  google_image_downloader = simple_image_download.Downloader()
27
27
  google_image_downloader.directory = output_folder
28
-
28
+
29
29
  elif method == 'google_images_download':
30
-
30
+
31
31
  from google_images_download import google_images_download
32
32
 
33
33
  else:
34
-
34
+
35
35
  raise ValueError('Unrecognized method {}'.format(method))
36
36
 
37
37
 
@@ -39,33 +39,33 @@ else:
39
39
 
40
40
  def download_images(query,output_directory,limit=100,verbose=False):
41
41
 
42
- query = query.replace(' ','+')
43
-
42
+ query = query.replace(' ','+')
43
+
44
44
  if method == 'simple_image_download':
45
-
45
+
46
46
  google_image_downloader.directory = output_directory
47
47
  paths = google_image_downloader.download(query, limit=limit,
48
48
  verbose=verbose, cache=False, download_cache=False)
49
49
  return paths
50
-
50
+
51
51
  elif method == 'google_images_download':
52
-
53
- response = google_images_download.googleimagesdownload()
52
+
53
+ response = google_images_download.googleimagesdownload()
54
54
  arguments = {'keywords':query,'limit':limit,'print_urls':verbose,
55
55
  'image-directory':output_directory}
56
56
  response.download(arguments)
57
57
  return None
58
58
 
59
59
  else:
60
-
60
+
61
61
  raise ValueError('Unrecognized method {}'.format(method))
62
-
62
+
63
63
 
64
64
  #%% Test driver
65
65
 
66
66
  if False:
67
-
67
+
68
68
  #%%
69
-
69
+
70
70
  paths = download_images(query='redunca',output_directory=output_folder,
71
- limit=20,verbose=True)
71
+ limit=20,verbose=True)
@@ -49,7 +49,7 @@ def generate_urls(search):
49
49
  """
50
50
  Generate Google search URLs for all tokens in the list [search]
51
51
  """
52
-
52
+
53
53
  return [(BASE_URL+quote(word)+GOOGLE_PICTURE_ID) for word in search]
54
54
 
55
55
 
@@ -60,7 +60,7 @@ def check_webpage(url):
60
60
  if 'html' not in str(request.content):
61
61
  checked_url = request
62
62
  except Exception as err:
63
- print(err)
63
+ print(err)
64
64
  return checked_url
65
65
 
66
66
 
@@ -68,7 +68,7 @@ def scan_webpage(webpage, extensions, timer):
68
68
  """
69
69
  Scan for pictures to download based on keywords
70
70
  """
71
-
71
+
72
72
  global SCANNER_COUNTER
73
73
  scanner = webpage.find
74
74
  found = False
@@ -143,7 +143,7 @@ class Downloader:
143
143
  urls_ = generate_urls(search)
144
144
  timer = timer if timer else 1000
145
145
  # max_progressbar = count * (list(range(limit+1))[-1]+1)
146
-
146
+
147
147
  # bar = progressbar.ProgressBar(maxval=max_progressbar,
148
148
  # widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]).start()
149
149
  i = 0
@@ -172,7 +172,7 @@ class Downloader:
172
172
  print('==='*15 + ' < ' + 'NO PICTURES FOUND' + ' > ' + '==='*15)
173
173
  return cache_out
174
174
 
175
- def download(self, keywords=None, limit=1, verbose=False, cache=True, download_cache=False,
175
+ def download(self, keywords=None, limit=1, verbose=False, cache=True, download_cache=False,
176
176
  timer=None):
177
177
  if not download_cache:
178
178
  content = self.search_urls(keywords, limit, verbose, cache, timer)
@@ -180,16 +180,16 @@ class Downloader:
180
180
  content = self._cached_urls
181
181
  if not content:
182
182
  print('Downloader has not URLs saved in Memory yet, run Downloader.search_urls to find pics first')
183
- paths = []
183
+ paths = []
184
184
  for name, (path, url) in content.items():
185
185
  fullpath = os.path.join(path, name)
186
186
  paths.append(fullpath)
187
187
  with open(fullpath, 'wb') as file:
188
188
  file.write(url.content)
189
189
  if verbose:
190
- print(f'File Name={name}, Downloaded from {url.url}')
190
+ print(f'File Name={name}, Downloaded from {url.url}')
191
191
  return paths
192
-
192
+
193
193
  def _create_directories(self, name):
194
194
  dir_path = os.path.join(self._directory, name)
195
195
  try: