megadetector 5.0.11__py3-none-any.whl → 5.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (201) hide show
  1. megadetector/api/__init__.py +0 -0
  2. megadetector/api/batch_processing/__init__.py +0 -0
  3. megadetector/api/batch_processing/api_core/__init__.py +0 -0
  4. megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. megadetector/api/batch_processing/api_core/batch_service/score.py +439 -0
  6. megadetector/api/batch_processing/api_core/server.py +294 -0
  7. megadetector/api/batch_processing/api_core/server_api_config.py +98 -0
  8. megadetector/api/batch_processing/api_core/server_app_config.py +55 -0
  9. megadetector/api/batch_processing/api_core/server_batch_job_manager.py +220 -0
  10. megadetector/api/batch_processing/api_core/server_job_status_table.py +152 -0
  11. megadetector/api/batch_processing/api_core/server_orchestration.py +360 -0
  12. megadetector/api/batch_processing/api_core/server_utils.py +92 -0
  13. megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
  14. megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +46 -0
  15. megadetector/api/batch_processing/api_support/__init__.py +0 -0
  16. megadetector/api/batch_processing/api_support/summarize_daily_activity.py +152 -0
  17. megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
  18. megadetector/api/batch_processing/integration/digiKam/setup.py +6 -0
  19. megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +465 -0
  20. megadetector/api/batch_processing/integration/eMammal/test_scripts/config_template.py +5 -0
  21. megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +126 -0
  22. megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +55 -0
  23. megadetector/api/synchronous/__init__.py +0 -0
  24. megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  25. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +152 -0
  26. megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -0
  27. megadetector/api/synchronous/api_core/animal_detection_api/config.py +35 -0
  28. megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
  29. megadetector/api/synchronous/api_core/tests/load_test.py +110 -0
  30. megadetector/classification/__init__.py +0 -0
  31. megadetector/classification/aggregate_classifier_probs.py +108 -0
  32. megadetector/classification/analyze_failed_images.py +227 -0
  33. megadetector/classification/cache_batchapi_outputs.py +198 -0
  34. megadetector/classification/create_classification_dataset.py +627 -0
  35. megadetector/classification/crop_detections.py +516 -0
  36. megadetector/classification/csv_to_json.py +226 -0
  37. megadetector/classification/detect_and_crop.py +855 -0
  38. megadetector/classification/efficientnet/__init__.py +9 -0
  39. megadetector/classification/efficientnet/model.py +415 -0
  40. megadetector/classification/efficientnet/utils.py +610 -0
  41. megadetector/classification/evaluate_model.py +520 -0
  42. megadetector/classification/identify_mislabeled_candidates.py +152 -0
  43. megadetector/classification/json_to_azcopy_list.py +63 -0
  44. megadetector/classification/json_validator.py +699 -0
  45. megadetector/classification/map_classification_categories.py +276 -0
  46. megadetector/classification/merge_classification_detection_output.py +506 -0
  47. megadetector/classification/prepare_classification_script.py +194 -0
  48. megadetector/classification/prepare_classification_script_mc.py +228 -0
  49. megadetector/classification/run_classifier.py +287 -0
  50. megadetector/classification/save_mislabeled.py +110 -0
  51. megadetector/classification/train_classifier.py +827 -0
  52. megadetector/classification/train_classifier_tf.py +725 -0
  53. megadetector/classification/train_utils.py +323 -0
  54. megadetector/data_management/__init__.py +0 -0
  55. megadetector/data_management/annotations/__init__.py +0 -0
  56. megadetector/data_management/annotations/annotation_constants.py +34 -0
  57. megadetector/data_management/camtrap_dp_to_coco.py +239 -0
  58. megadetector/data_management/cct_json_utils.py +395 -0
  59. megadetector/data_management/cct_to_md.py +176 -0
  60. megadetector/data_management/cct_to_wi.py +289 -0
  61. megadetector/data_management/coco_to_labelme.py +272 -0
  62. megadetector/data_management/coco_to_yolo.py +662 -0
  63. megadetector/data_management/databases/__init__.py +0 -0
  64. megadetector/data_management/databases/add_width_and_height_to_db.py +33 -0
  65. megadetector/data_management/databases/combine_coco_camera_traps_files.py +206 -0
  66. megadetector/data_management/databases/integrity_check_json_db.py +477 -0
  67. megadetector/data_management/databases/subset_json_db.py +115 -0
  68. megadetector/data_management/generate_crops_from_cct.py +149 -0
  69. megadetector/data_management/get_image_sizes.py +189 -0
  70. megadetector/data_management/importers/add_nacti_sizes.py +52 -0
  71. megadetector/data_management/importers/add_timestamps_to_icct.py +79 -0
  72. megadetector/data_management/importers/animl_results_to_md_results.py +158 -0
  73. megadetector/data_management/importers/auckland_doc_test_to_json.py +373 -0
  74. megadetector/data_management/importers/auckland_doc_to_json.py +201 -0
  75. megadetector/data_management/importers/awc_to_json.py +191 -0
  76. megadetector/data_management/importers/bellevue_to_json.py +273 -0
  77. megadetector/data_management/importers/cacophony-thermal-importer.py +796 -0
  78. megadetector/data_management/importers/carrizo_shrubfree_2018.py +269 -0
  79. megadetector/data_management/importers/carrizo_trail_cam_2017.py +289 -0
  80. megadetector/data_management/importers/cct_field_adjustments.py +58 -0
  81. megadetector/data_management/importers/channel_islands_to_cct.py +913 -0
  82. megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +180 -0
  83. megadetector/data_management/importers/eMammal/eMammal_helpers.py +249 -0
  84. megadetector/data_management/importers/eMammal/make_eMammal_json.py +223 -0
  85. megadetector/data_management/importers/ena24_to_json.py +276 -0
  86. megadetector/data_management/importers/filenames_to_json.py +386 -0
  87. megadetector/data_management/importers/helena_to_cct.py +283 -0
  88. megadetector/data_management/importers/idaho-camera-traps.py +1407 -0
  89. megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +294 -0
  90. megadetector/data_management/importers/jb_csv_to_json.py +150 -0
  91. megadetector/data_management/importers/mcgill_to_json.py +250 -0
  92. megadetector/data_management/importers/missouri_to_json.py +490 -0
  93. megadetector/data_management/importers/nacti_fieldname_adjustments.py +79 -0
  94. megadetector/data_management/importers/noaa_seals_2019.py +181 -0
  95. megadetector/data_management/importers/pc_to_json.py +365 -0
  96. megadetector/data_management/importers/plot_wni_giraffes.py +123 -0
  97. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -0
  98. megadetector/data_management/importers/prepare_zsl_imerit.py +131 -0
  99. megadetector/data_management/importers/rspb_to_json.py +356 -0
  100. megadetector/data_management/importers/save_the_elephants_survey_A.py +320 -0
  101. megadetector/data_management/importers/save_the_elephants_survey_B.py +329 -0
  102. megadetector/data_management/importers/snapshot_safari_importer.py +758 -0
  103. megadetector/data_management/importers/snapshot_safari_importer_reprise.py +665 -0
  104. megadetector/data_management/importers/snapshot_serengeti_lila.py +1067 -0
  105. megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +150 -0
  106. megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +153 -0
  107. megadetector/data_management/importers/sulross_get_exif.py +65 -0
  108. megadetector/data_management/importers/timelapse_csv_set_to_json.py +490 -0
  109. megadetector/data_management/importers/ubc_to_json.py +399 -0
  110. megadetector/data_management/importers/umn_to_json.py +507 -0
  111. megadetector/data_management/importers/wellington_to_json.py +263 -0
  112. megadetector/data_management/importers/wi_to_json.py +442 -0
  113. megadetector/data_management/importers/zamba_results_to_md_results.py +181 -0
  114. megadetector/data_management/labelme_to_coco.py +547 -0
  115. megadetector/data_management/labelme_to_yolo.py +272 -0
  116. megadetector/data_management/lila/__init__.py +0 -0
  117. megadetector/data_management/lila/add_locations_to_island_camera_traps.py +97 -0
  118. megadetector/data_management/lila/add_locations_to_nacti.py +147 -0
  119. megadetector/data_management/lila/create_lila_blank_set.py +558 -0
  120. megadetector/data_management/lila/create_lila_test_set.py +152 -0
  121. megadetector/data_management/lila/create_links_to_md_results_files.py +106 -0
  122. megadetector/data_management/lila/download_lila_subset.py +178 -0
  123. megadetector/data_management/lila/generate_lila_per_image_labels.py +516 -0
  124. megadetector/data_management/lila/get_lila_annotation_counts.py +170 -0
  125. megadetector/data_management/lila/get_lila_image_counts.py +112 -0
  126. megadetector/data_management/lila/lila_common.py +300 -0
  127. megadetector/data_management/lila/test_lila_metadata_urls.py +132 -0
  128. megadetector/data_management/ocr_tools.py +874 -0
  129. megadetector/data_management/read_exif.py +681 -0
  130. megadetector/data_management/remap_coco_categories.py +84 -0
  131. megadetector/data_management/remove_exif.py +66 -0
  132. megadetector/data_management/resize_coco_dataset.py +189 -0
  133. megadetector/data_management/wi_download_csv_to_coco.py +246 -0
  134. megadetector/data_management/yolo_output_to_md_output.py +441 -0
  135. megadetector/data_management/yolo_to_coco.py +676 -0
  136. megadetector/detection/__init__.py +0 -0
  137. megadetector/detection/detector_training/__init__.py +0 -0
  138. megadetector/detection/detector_training/model_main_tf2.py +114 -0
  139. megadetector/detection/process_video.py +702 -0
  140. megadetector/detection/pytorch_detector.py +341 -0
  141. megadetector/detection/run_detector.py +779 -0
  142. megadetector/detection/run_detector_batch.py +1219 -0
  143. megadetector/detection/run_inference_with_yolov5_val.py +917 -0
  144. megadetector/detection/run_tiled_inference.py +934 -0
  145. megadetector/detection/tf_detector.py +189 -0
  146. megadetector/detection/video_utils.py +606 -0
  147. megadetector/postprocessing/__init__.py +0 -0
  148. megadetector/postprocessing/add_max_conf.py +64 -0
  149. megadetector/postprocessing/categorize_detections_by_size.py +163 -0
  150. megadetector/postprocessing/combine_api_outputs.py +249 -0
  151. megadetector/postprocessing/compare_batch_results.py +958 -0
  152. megadetector/postprocessing/convert_output_format.py +396 -0
  153. megadetector/postprocessing/load_api_results.py +195 -0
  154. megadetector/postprocessing/md_to_coco.py +310 -0
  155. megadetector/postprocessing/md_to_labelme.py +330 -0
  156. megadetector/postprocessing/merge_detections.py +401 -0
  157. megadetector/postprocessing/postprocess_batch_results.py +1902 -0
  158. megadetector/postprocessing/remap_detection_categories.py +170 -0
  159. megadetector/postprocessing/render_detection_confusion_matrix.py +660 -0
  160. megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +211 -0
  161. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +83 -0
  162. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +1631 -0
  163. megadetector/postprocessing/separate_detections_into_folders.py +730 -0
  164. megadetector/postprocessing/subset_json_detector_output.py +696 -0
  165. megadetector/postprocessing/top_folders_to_bottom.py +223 -0
  166. megadetector/taxonomy_mapping/__init__.py +0 -0
  167. megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +491 -0
  168. megadetector/taxonomy_mapping/map_new_lila_datasets.py +150 -0
  169. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -0
  170. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +590 -0
  171. megadetector/taxonomy_mapping/retrieve_sample_image.py +71 -0
  172. megadetector/taxonomy_mapping/simple_image_download.py +219 -0
  173. megadetector/taxonomy_mapping/species_lookup.py +834 -0
  174. megadetector/taxonomy_mapping/taxonomy_csv_checker.py +159 -0
  175. megadetector/taxonomy_mapping/taxonomy_graph.py +346 -0
  176. megadetector/taxonomy_mapping/validate_lila_category_mappings.py +83 -0
  177. megadetector/utils/__init__.py +0 -0
  178. megadetector/utils/azure_utils.py +178 -0
  179. megadetector/utils/ct_utils.py +612 -0
  180. megadetector/utils/directory_listing.py +246 -0
  181. megadetector/utils/md_tests.py +968 -0
  182. megadetector/utils/path_utils.py +1044 -0
  183. megadetector/utils/process_utils.py +157 -0
  184. megadetector/utils/sas_blob_utils.py +509 -0
  185. megadetector/utils/split_locations_into_train_val.py +228 -0
  186. megadetector/utils/string_utils.py +92 -0
  187. megadetector/utils/url_utils.py +323 -0
  188. megadetector/utils/write_html_image_list.py +225 -0
  189. megadetector/visualization/__init__.py +0 -0
  190. megadetector/visualization/plot_utils.py +293 -0
  191. megadetector/visualization/render_images_with_thumbnails.py +275 -0
  192. megadetector/visualization/visualization_utils.py +1536 -0
  193. megadetector/visualization/visualize_db.py +550 -0
  194. megadetector/visualization/visualize_detector_output.py +405 -0
  195. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/METADATA +1 -1
  196. megadetector-5.0.12.dist-info/RECORD +199 -0
  197. megadetector-5.0.12.dist-info/top_level.txt +1 -0
  198. megadetector-5.0.11.dist-info/RECORD +0 -5
  199. megadetector-5.0.11.dist-info/top_level.txt +0 -1
  200. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/LICENSE +0 -0
  201. {megadetector-5.0.11.dist-info → megadetector-5.0.12.dist-info}/WHEEL +0 -0
@@ -0,0 +1,181 @@
1
+ """
2
+
3
+ noaa_seals_2019.py
4
+
5
+ Prepare the NOAA Arctic Seals 2019 metadata for LILA.
6
+
7
+ """
8
+
9
+ #%% Imports and constants
10
+
11
+ import os
12
+ import pandas as pd
13
+ from tqdm import tqdm
14
+
15
+ from megadetector.utils import url_utils
16
+ from megadetector.visualization import visualization_utils
17
+
18
+ # A list of files in the lilablobssc container for this data set
19
+ container_file_list = r'C:\temp\seals\seal_files.txt'
20
+
21
+ # The raw detection files provided by NOAA
22
+ detections_fn = r'C:\temp\seals\surv_test_kamera_detections_20210212.csv'
23
+
24
+ # A version of the above with filename columns added
25
+ detections_fn_full_paths = detections_fn.replace('.csv','_full_paths.csv')
26
+
27
+ base_url = 'https://lilablobssc.blob.core.windows.net/noaa-kotz'
28
+
29
+
30
+ #%% Read input .csv
31
+
32
+ df = pd.read_csv(detections_fn)
33
+ df['rgb_image_path'] = ''
34
+ df['ir_image_path'] = ''
35
+ print('Read {} rows from {}'.format(len(df),detections_fn))
36
+
37
+ camera_view_to_path = {}
38
+ camera_view_to_path['C'] = 'CENT'
39
+ camera_view_to_path['L'] = 'LEFT'
40
+
41
+ valid_flights = set(['fl04','fl05','fl06','fl07'])
42
+
43
+
44
+ #%% Read list of files
45
+
46
+ with open(container_file_list,'r') as f:
47
+ all_files = f.readlines()
48
+ all_files = [s.strip() for s in all_files]
49
+ all_files = set(all_files)
50
+
51
+
52
+ #%% Convert paths to full paths
53
+
54
+ missing_ir_files = []
55
+
56
+ # i_row = 0; row = df.iloc[i_row]
57
+ for i_row,row in tqdm(df.iterrows(),total=len(df)):
58
+
59
+ assert row['flight'] in valid_flights
60
+ assert row['camera_view'] in camera_view_to_path
61
+
62
+ assert isinstance(row['rgb_image_name'],str)
63
+ rgb_image_path = 'Images/{}/{}/{}'.format(row['flight'],camera_view_to_path[row['camera_view']],
64
+ row['rgb_image_name'])
65
+ assert rgb_image_path in all_files
66
+ df.loc[i_row,'rgb_image_path'] = rgb_image_path
67
+
68
+ if not isinstance(row['ir_image_name'],str):
69
+ continue
70
+
71
+ ir_image_path = 'Images/{}/{}/{}'.format(row['flight'],camera_view_to_path[row['camera_view']],
72
+ row['ir_image_name'])
73
+ # assert ir_image_path in all_files
74
+ if ir_image_path not in all_files:
75
+ missing_ir_files.append(ir_image_path)
76
+ df.loc[i_row,'ir_image_path'] = ir_image_path
77
+
78
+ # ...for each row
79
+
80
+ missing_ir_files = list(set(missing_ir_files))
81
+ missing_ir_files.sort()
82
+ print('{} missing IR files (of {})'.format(len(missing_ir_files),len(df)))
83
+
84
+ for s in missing_ir_files:
85
+ print(s)
86
+
87
+
88
+ #%% Write results
89
+
90
+ df.to_csv(detections_fn_full_paths,index=False)
91
+
92
+
93
+ #%% Load output file, just to be sure
94
+
95
+ df = pd.read_csv(detections_fn_full_paths)
96
+
97
+
98
+ #%% Render annotations on an image
99
+
100
+ import random; i_image = random.randint(0,len(df))
101
+ # i_image = 2004
102
+ row = df.iloc[i_image]
103
+ rgb_image_path = row['rgb_image_path']
104
+ rgb_image_url = base_url + '/' + rgb_image_path
105
+ ir_image_path = row['ir_image_path']
106
+ ir_image_url = base_url + '/' + ir_image_path
107
+
108
+
109
+ #%% Download the image
110
+
111
+ rgb_image_fn = url_utils.download_url(rgb_image_url,progress_updater=True)
112
+ ir_image_fn = url_utils.download_url(ir_image_url,progress_updater=True)
113
+
114
+
115
+ #%% Find all the rows (detections) associated with this image
116
+
117
+ # as l,r,t,b
118
+ rgb_boxes = []
119
+ ir_boxes = []
120
+
121
+ for i_row,row in df.iterrows():
122
+
123
+ if row['rgb_image_path'] == rgb_image_path:
124
+ box_l = row['rgb_left']
125
+ box_r = row['rgb_right']
126
+ box_t = row['rgb_top']
127
+ box_b = row['rgb_bottom']
128
+ rgb_boxes.append([box_l,box_r,box_t,box_b])
129
+
130
+ if row['ir_image_path'] == ir_image_path:
131
+ box_l = row['ir_left']
132
+ box_r = row['ir_right']
133
+ box_t = row['ir_top']
134
+ box_b = row['ir_bottom']
135
+ ir_boxes.append([box_l,box_r,box_t,box_b])
136
+
137
+ print('Found {} RGB, {} IR annotations for this image'.format(len(rgb_boxes),
138
+ len(ir_boxes)))
139
+
140
+
141
+ #%% Render the detections on the image(s)
142
+
143
+ img_rgb = visualization_utils.load_image(rgb_image_fn)
144
+ img_ir = visualization_utils.load_image(ir_image_fn)
145
+
146
+ for b in rgb_boxes:
147
+
148
+ # In pixel coordinates
149
+ box_left = b[0]; box_right = b[1]; box_top = b[2]; box_bottom = b[3]
150
+ assert box_top > box_bottom; assert box_right > box_left
151
+ ymin = box_bottom; ymax = box_top; xmin = box_left; xmax = box_right
152
+
153
+ visualization_utils.draw_bounding_box_on_image(img_rgb,ymin,xmin,ymax,xmax,
154
+ use_normalized_coordinates=False,
155
+ thickness=3)
156
+
157
+ for b in ir_boxes:
158
+
159
+ # In pixel coordinates
160
+ box_left = b[0]; box_right = b[1]; box_top = b[2]; box_bottom = b[3]
161
+ assert box_top > box_bottom; assert box_right > box_left
162
+ ymin = box_bottom; ymax = box_top; xmin = box_left; xmax = box_right
163
+
164
+ visualization_utils.draw_bounding_box_on_image(img_ir,ymin,xmin,ymax,xmax,
165
+ use_normalized_coordinates=False,
166
+ thickness=3)
167
+
168
+ # visualization_utils.show_images_in_a_row([img_rgb,img_ir])
169
+
170
+
171
+ #%% Save images
172
+
173
+ img_rgb.save(r'c:\temp\seals_rgb.png')
174
+ img_ir.save(r'c:\temp\seals_ir.png')
175
+
176
+
177
+ #%% Clean up
178
+
179
+ import shutil
180
+ tmp_dir = os.path.dirname(rgb_image_fn)
181
+ shutil.rmtree(tmp_dir)
@@ -0,0 +1,365 @@
1
+ """
2
+
3
+ pc_to_json.py
4
+
5
+ Convert a particular collection of .csv files from Parks Canada to CCT format.
6
+
7
+ """
8
+
9
+ #%% Constants and environment
10
+
11
+ import pandas as pd
12
+ import uuid
13
+ import json
14
+ import time
15
+
16
+ import numpy as np
17
+ from tqdm import tqdm
18
+
19
+ import humanfriendly
20
+ import os
21
+ import PIL
22
+
23
+ from megadetector.data_management.databases import integrity_check_json_db
24
+ from megadetector.data_management.cct_json_utils import IndexedJsonDb
25
+ from megadetector.data_management import cct_json_to_filename_json
26
+ from megadetector.visualization import visualize_db
27
+ from megadetector.utils import path_utils
28
+
29
+ input_base = r"g:\20190715"
30
+ output_file = r"D:\wildlife_data\parks_canada\pc_20190715.json"
31
+ preview_base = r"D:\wildlife_data\parks_canada\preview"
32
+
33
+ filename_replacements = {}
34
+ category_mappings = {'':'unlabeled'}
35
+
36
+ csv_prefix = 'ImageData_Microsoft___'
37
+
38
+ expected_columns = 'Location,DateImage,TimeImage,Species,Total,Horses,DogsOnLeash,DogsOffLeash,AdultFemale,AdultMale,AdultUnknown,Subadult,YLY,YOY,ImageName'.split(',')
39
+ columns_to_copy = {'Total':'count','Horses':'horses','DogsOnLeash':'dogsonleash','DogsOffLeash':'dogsoffleash',
40
+ 'AdultFemale':'adultfemale','AdultMale':'adultmale','AdultUnknown':'adultunknown',
41
+ 'Subadult':'subadult','YLY':'yearling','YOY':'youngofyear'}
42
+
43
+ retrieve_image_sizes = False
44
+
45
+ max_num_csvs = -1
46
+
47
+ db_sampling_scheme = 'preview' # 'labeled','all'
48
+ n_unlabeled_to_sample = -1
49
+ cap_unlabeled_to_labeled = True
50
+
51
+
52
+ #%% Read and concatenate source data
53
+
54
+ # List files
55
+ input_files = os.listdir(input_base)
56
+
57
+ # List of dataframes, one per .csv file; we'll concatenate later
58
+ all_input_metadata = []
59
+
60
+ # i_file = 87; fn = input_files[i_file]
61
+ for i_file,fn in enumerate(input_files):
62
+
63
+ if max_num_csvs > 0 and len(all_input_metadata) >= max_num_csvs:
64
+ break
65
+
66
+ if not fn.endswith('.csv'):
67
+ continue
68
+ if not fn.startswith(csv_prefix):
69
+ continue
70
+ dirname = fn.replace(csv_prefix,'').replace('.csv','')
71
+ dirfullpath = os.path.join(input_base,dirname)
72
+ if not os.path.isdir(dirfullpath):
73
+ dirname = fn.replace(csv_prefix,'').replace('.csv','').replace(' ',' ')
74
+ dirfullpath = os.path.join(input_base,dirname)
75
+ assert(os.path.isdir(dirfullpath))
76
+
77
+ metadata_fullpath = os.path.join(input_base,fn)
78
+ print('Reading {}'.format(metadata_fullpath))
79
+ df = pd.read_csv(metadata_fullpath)
80
+ assert list(df.columns) == expected_columns
81
+ df['DirName'] = dirname
82
+ all_input_metadata.append(df)
83
+
84
+ # Concatenate into a giant data frame
85
+ input_metadata = pd.concat(all_input_metadata)
86
+
87
+ print('Read {} rows total'.format(len(input_metadata)))
88
+
89
+
90
+ #%% List files
91
+
92
+ print('Listing images...')
93
+ image_full_paths = path_utils.find_images(input_base,bRecursive=True)
94
+ print('Finished listing {} images'.format(len(image_full_paths)))
95
+
96
+ image_relative_paths = []
97
+ for s in image_full_paths:
98
+ image_relative_paths.append(os.path.relpath(s,input_base))
99
+ image_relative_paths = set(image_relative_paths)
100
+
101
+ image_relative_paths_lower = set()
102
+ for s in image_relative_paths:
103
+ image_relative_paths_lower.add(s.lower())
104
+
105
+
106
+ #%% Main loop over labels (prep)
107
+
108
+ start_time = time.time()
109
+
110
+ relative_path_to_image = {}
111
+
112
+ images = []
113
+ annotations = []
114
+ category_name_to_category = {}
115
+ missing_files = []
116
+
117
+ # Force the empty category to be ID 0
118
+ empty_category = {}
119
+ empty_category['name'] = 'empty'
120
+ empty_category['id'] = 0
121
+ category_name_to_category['empty'] = empty_category
122
+ next_category_id = 1
123
+
124
+ labeled_images = []
125
+ unlabeled_images = []
126
+
127
+
128
+ #%% Main loop over labels (loop)
129
+
130
+ # iRow = 0; row = input_metadata.iloc[iRow]
131
+ for iRow,row in tqdm(input_metadata.iterrows(),total=len(input_metadata)):
132
+
133
+ # ImageID,FileName,FilePath,SpeciesID,CommonName
134
+ image_id = str(uuid.uuid1())
135
+ relative_path = os.path.normpath(row['ImageName'])
136
+
137
+ if relative_path not in image_relative_paths:
138
+ if relative_path.lower() in image_relative_paths_lower:
139
+ print('Warning: lower-case version of {} in path list'.format(relative_path))
140
+ else:
141
+ missing_files.append(relative_path)
142
+ continue
143
+
144
+ full_path = os.path.join(input_base,relative_path)
145
+
146
+ # assert os.path.isfile(full_path)
147
+
148
+ if relative_path in relative_path_to_image:
149
+
150
+ im = relative_path_to_image[relative_path]
151
+
152
+ else:
153
+
154
+ im = {}
155
+ im['id'] = image_id
156
+ im['file_name'] = relative_path
157
+ im['seq_id'] = '-1'
158
+ im['location'] = row['Location']
159
+ im['datetime'] = row['DateImage'] + ' ' + row['TimeImage']
160
+
161
+ images.append(im)
162
+ relative_path_to_image[relative_path] = im
163
+
164
+ if retrieve_image_sizes:
165
+
166
+ # Retrieve image width and height
167
+ pil_im = PIL.Image.open(full_path)
168
+ width, height = pil_im.size
169
+ im['width'] = width
170
+ im['height'] = height
171
+
172
+ species = row['Species']
173
+ if isinstance(species,float):
174
+ assert np.isnan(species)
175
+ species = 'unlabeled'
176
+
177
+ category_name = species.lower().strip()
178
+ if category_name in category_mappings:
179
+ category_name = category_mappings[category_name]
180
+
181
+ if category_name not in category_name_to_category:
182
+ category = {}
183
+ category['name'] = category_name
184
+ category['id'] = next_category_id
185
+ next_category_id += 1
186
+ category_name_to_category[category_name] = category
187
+ else:
188
+ category = category_name_to_category[category_name]
189
+ assert category['name'] == category_name
190
+
191
+ category_id = category['id']
192
+
193
+ if category_name == 'unlabeled':
194
+ unlabeled_images.append(im)
195
+ else:
196
+ labeled_images.append(im)
197
+
198
+ # Create an annotation
199
+ ann = {}
200
+
201
+ # The Internet tells me this guarantees uniqueness to a reasonable extent, even
202
+ # beyond the sheer improbability of collisions.
203
+ ann['id'] = str(uuid.uuid1())
204
+ ann['image_id'] = im['id']
205
+ ann['category_id'] = category_id
206
+
207
+ for col in columns_to_copy:
208
+ ann[columns_to_copy[col]] = row[col]
209
+
210
+ annotations.append(ann)
211
+
212
+ # ...for each image
213
+
214
+ categories = list(category_name_to_category.values())
215
+
216
+ elapsed = time.time() - start_time
217
+ print('Finished verifying file loop in {}, {} matched images, {} missing images, {} unlabeled images'.format(
218
+ humanfriendly.format_timespan(elapsed), len(images), len(missing_files), len(unlabeled_images)))
219
+
220
+
221
+ #%% See what's up with missing files
222
+
223
+ dirnames = set()
224
+ # s = list(image_relative_paths)[0]
225
+ for s in image_relative_paths:
226
+ image_dir = os.path.dirname(s)
227
+ dirnames.add(image_dir)
228
+
229
+ missing_images_with_missing_dirs = []
230
+ missing_images_with_non_missing_dirs = []
231
+
232
+ missing_dirs = set()
233
+
234
+ # s = missing_files[0]
235
+ for s in missing_files:
236
+ assert s not in image_relative_paths
237
+ dirname = os.path.dirname(s)
238
+ if dirname not in dirnames:
239
+ missing_images_with_missing_dirs.append(s)
240
+ missing_dirs.add(dirname)
241
+ else:
242
+ missing_images_with_non_missing_dirs.append(s)
243
+
244
+ print('Of {} missing files, {} are due to {} missing folders'.format(
245
+ len(missing_files),len(missing_images_with_missing_dirs),len(missing_dirs)))
246
+
247
+
248
+ #%% Check for images that aren't included in the metadata file
249
+
250
+ unmatched_files = []
251
+
252
+ for i_image,relative_path in tqdm(enumerate(image_relative_paths),total=len(image_relative_paths)):
253
+
254
+ if relative_path not in relative_path_to_image:
255
+ unmatched_files.append(relative_path)
256
+
257
+ print('Finished checking {} images to make sure they\'re in the metadata, found {} mismatches'.format(
258
+ len(image_relative_paths),len(unmatched_files)))
259
+
260
+
261
+ #%% Sample the database
262
+
263
+ images_all = images
264
+ annotations_all = annotations
265
+
266
+ #%%
267
+
268
+ if db_sampling_scheme == 'all':
269
+
270
+ pass
271
+
272
+ elif db_sampling_scheme == 'labeled' or db_sampling_scheme == 'preview':
273
+
274
+ json_data = {}
275
+ json_data['images'] = images
276
+ json_data['annotations'] = annotations
277
+ json_data['categories'] = categories
278
+
279
+ indexed_db = IndexedJsonDb(json_data)
280
+
281
+ # Collect the images we want
282
+ sampled_images = []
283
+ for im in images:
284
+ classes = indexed_db.get_classes_for_image(im)
285
+ if 'unlabeled' in classes and len(classes) == 1:
286
+ pass
287
+ else:
288
+ sampled_images.append(im)
289
+
290
+ if db_sampling_scheme == 'preview':
291
+ n_sample = n_unlabeled_to_sample
292
+ if n_sample == -1:
293
+ n_sample = len(labeled_images)
294
+ if n_sample > len(labeled_images) and cap_unlabeled_to_labeled:
295
+ n_sample = len(labeled_images)
296
+ if n_sample > len(unlabeled_images):
297
+ n_sample = len(unlabeled_images)
298
+ print('Sampling {} of {} unlabeled images'.format(n_sample,len(unlabeled_images)))
299
+ from random import sample
300
+ sampled_images.extend(sample(unlabeled_images,n_sample))
301
+
302
+ sampled_annotations = []
303
+ for im in sampled_images:
304
+ sampled_annotations.extend(indexed_db.get_annotations_for_image(im))
305
+
306
+ print('Sampling {} of {} images, {} of {} annotations'.format(
307
+ len(sampled_images),len(images),len(sampled_annotations),len(annotations)))
308
+
309
+ images = sampled_images
310
+ annotations = sampled_annotations
311
+
312
+ else:
313
+
314
+ raise ValueError('Unrecognized DB sampling scheme {}'.format(db_sampling_scheme))
315
+
316
+
317
+ #%% Create info struct
318
+
319
+ info = {}
320
+ info['year'] = 2019
321
+ info['version'] = 1
322
+ info['description'] = 'COCO style database'
323
+ info['secondary_contributor'] = 'Converted to COCO .json by Dan Morris'
324
+ info['contributor'] = 'Parks Canada'
325
+
326
+
327
+ #%% Write output
328
+
329
+ json_data = {}
330
+ json_data['images'] = images
331
+ json_data['annotations'] = annotations
332
+ json_data['categories'] = categories
333
+ json_data['info'] = info
334
+ json.dump(json_data, open(output_file,'w'), indent=4)
335
+
336
+ print('Finished writing .json file with {} images, {} annotations, and {} categories'.format(
337
+ len(images),len(annotations),len(categories)))
338
+
339
+
340
+ #%% Validate the database's integrity
341
+
342
+ json_data = json.load(open(output_file))
343
+ options = integrity_check_json_db.IntegrityCheckOptions()
344
+ sortedCategories,data = integrity_check_json_db.integrity_check_json_db(json_data, options)
345
+
346
+
347
+ #%% Render a bunch of images to make sure the labels got carried along correctly
348
+
349
+ output_dir = preview_base
350
+
351
+ options = visualize_db.DbVizOptions()
352
+ options.num_to_visualize = 100
353
+ options.sort_by_filename = False
354
+ # options.classes_to_exclude = ['unlabeled']
355
+ options.classes_to_exclude = None
356
+
357
+ htmlOutputFile,_ = visualize_db.visualize_db(json_data,output_dir,input_base,options)
358
+ os.startfile(htmlOutputFile)
359
+
360
+
361
+ #%% Write out a list of files to annotate
362
+
363
+ _,file_list = cct_json_to_filename_json.convertJsonToStringList(output_file,prepend="20190715/")
364
+ os.startfile(file_list)
365
+
@@ -0,0 +1,123 @@
1
+ """
2
+
3
+ plot_wni_giraffes.py
4
+
5
+ Plot keypoints on a random sample of images from the wni-giraffes data set.
6
+
7
+ """
8
+
9
+ #%% Constants and imports
10
+
11
+ import os
12
+ import json
13
+ import random
14
+
15
+ from PIL import Image
16
+ from PIL import ImageDraw
17
+ from tqdm import tqdm
18
+
19
+ input_file = r"G:\data_staging\wni-out\wni_giraffes_train.json"
20
+ image_base = r"G:\data_staging\wni-out\images"
21
+ output_base = r"G:\data_staging\wni-out\test-plots"
22
+ os.makedirs(output_base,exist_ok=True)
23
+
24
+ tool_colors = ['red','green','blue','magenta']
25
+ use_fancy_ellipses = True
26
+ draw_individual_samples = False
27
+
28
+ median_radius = 20
29
+ median_linewidth = 8
30
+
31
+ sample_radius = 10
32
+
33
+ n_images_to_plot = 100
34
+
35
+
36
+ #%% Load and select data
37
+
38
+ with open(input_file,'r') as f:
39
+ d = json.load(f)
40
+ annotations = d['annotations']
41
+ print(d['info'])
42
+
43
+ short_tool_names = list(d['info']['tool_names'].keys())
44
+ annotations_to_plot = random.sample(annotations,n_images_to_plot)
45
+
46
+
47
+ #%% Support functions
48
+
49
+ # https://stackoverflow.com/questions/32504246/draw-ellipse-in-python-pil-with-line-thickness
50
+ def draw_fancy_ellipse(image, x, y, radius, width=1, outline='white', antialias=4):
51
+
52
+ bounds = (x-radius,y-radius,x+radius,y+radius)
53
+
54
+ # Use a single channel image (mode='L') as mask.
55
+ # The size of the mask can be increased relative to the input image
56
+ # to get smoother looking results.
57
+ mask = Image.new(
58
+ size=[int(dim * antialias) for dim in image.size],
59
+ mode='L', color='black')
60
+ draw = ImageDraw.Draw(mask)
61
+
62
+ # draw outer shape in white (color) and inner shape in black (transparent)
63
+ for offset, fill in (width/-2.0, 'white'), (width/2.0, 'black'):
64
+ left, top = [(value + offset) * antialias for value in bounds[:2]]
65
+ right, bottom = [(value - offset) * antialias for value in bounds[2:]]
66
+ draw.ellipse([left, top, right, bottom], fill=fill)
67
+
68
+ # downsample the mask using PIL.Image.LANCZOS
69
+ # (a high-quality downsampling filter).
70
+ mask = mask.resize(image.size, Image.LANCZOS)
71
+
72
+ # paste outline color to input image through the mask
73
+ image.paste(outline, mask=mask)
74
+
75
+
76
+ def draw_ellipse(image, x, y, radius, linewidth, color_index, use_imagedraw=False):
77
+
78
+ if use_imagedraw:
79
+ draw_fancy_ellipse(image, x, y, radius=radius, width=linewidth, outline=tool_colors[color_index])
80
+ else:
81
+ draw = ImageDraw.Draw(image)
82
+ bounds = (x-radius,y-radius,x+radius,y+radius)
83
+ draw.ellipse(bounds, fill=tool_colors[color_index])
84
+
85
+
86
+ #%% Plot some images
87
+
88
+ # ann = annotations_to_plot[0]
89
+ for ann in tqdm(annotations_to_plot):
90
+
91
+ input_path = os.path.join(image_base,ann['filename'])
92
+ output_path = os.path.join(output_base,ann['filename'].replace('/','_'))
93
+
94
+ im = None
95
+ im = Image.open(input_path)
96
+
97
+ # i_tool = 0; tool_name = short_tool_names[i_tool]
98
+ for i_tool,tool_name in enumerate(short_tool_names):
99
+
100
+ tool_keypoints = ann['keypoints'][tool_name]
101
+
102
+ # Don't plot tools that don't have a consensus annotation
103
+ if tool_keypoints['median_x'] is None:
104
+ continue
105
+
106
+ median_x = tool_keypoints['median_x']
107
+ median_y = tool_keypoints['median_y']
108
+
109
+ draw_ellipse(im, median_x, median_y, median_radius, median_linewidth, color_index=i_tool,
110
+ use_imagedraw=use_fancy_ellipses)
111
+
112
+ if draw_individual_samples:
113
+ for i_sample in range(0,len(tool_keypoints['x'])):
114
+ x = tool_keypoints['x'][i_sample]
115
+ y = tool_keypoints['y'][i_sample]
116
+ draw_ellipse(im, x, y, sample_radius, None, color_index=i_tool,
117
+ use_imagedraw=False)
118
+
119
+ # ...for each tool
120
+
121
+ im.save(output_path)
122
+
123
+ # ...for each annotation