megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (191) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +93 -79
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
  20. api/batch_processing/postprocessing/compare_batch_results.py +114 -44
  21. api/batch_processing/postprocessing/convert_output_format.py +62 -19
  22. api/batch_processing/postprocessing/load_api_results.py +17 -20
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +165 -68
  25. api/batch_processing/postprocessing/merge_detections.py +40 -15
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
  27. api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +107 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -0
  71. data_management/coco_to_yolo.py +86 -62
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +130 -83
  76. data_management/databases/subset_json_db.py +25 -16
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -144
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -160
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +8 -8
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +309 -159
  120. data_management/labelme_to_yolo.py +103 -60
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +114 -31
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +92 -90
  128. data_management/lila/generate_lila_per_image_labels.py +56 -43
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +103 -70
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +161 -99
  135. data_management/remap_coco_categories.py +84 -0
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +32 -44
  138. data_management/wi_download_csv_to_coco.py +246 -0
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +535 -95
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +189 -114
  147. detection/run_inference_with_yolov5_val.py +118 -51
  148. detection/run_tiled_inference.py +113 -42
  149. detection/tf_detector.py +51 -28
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +249 -70
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -862
  157. md_utils/path_utils.py +655 -155
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +208 -27
  163. md_utils/write_html_image_list.py +51 -35
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +908 -311
  168. md_visualization/visualize_db.py +109 -58
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
  171. megadetector-5.0.9.dist-info/RECORD +224 -0
  172. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
  173. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
  174. taxonomy_mapping/__init__.py +0 -0
  175. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  176. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  177. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  178. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  179. taxonomy_mapping/retrieve_sample_image.py +12 -12
  180. taxonomy_mapping/simple_image_download.py +11 -11
  181. taxonomy_mapping/species_lookup.py +10 -10
  182. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  183. taxonomy_mapping/taxonomy_graph.py +47 -47
  184. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  185. data_management/cct_json_to_filename_json.py +0 -89
  186. data_management/cct_to_csv.py +0 -140
  187. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  188. detection/detector_training/copy_checkpoints.py +0 -43
  189. md_visualization/visualize_megadb.py +0 -183
  190. megadetector-5.0.7.dist-info/RECORD +0 -202
  191. {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
@@ -1,17 +1,17 @@
1
- ########
2
- #
3
- # snapshot_serengeti_lila.py
4
- #
5
- # Create zipfiles of Snapshot Serengeti S1-S11.
6
- #
7
- # Create a metadata file for S1-S10, plus separate metadata files
8
- # for S1-S11. At the time this code was written, S11 was under embargo.
9
- #
10
- # Create zip archives of each season without humans.
11
- #
12
- # Create a human zip archive.
13
- #
14
- ########
1
+ """
2
+
3
+ snapshot_serengeti_lila.py
4
+
5
+ Create zipfiles of Snapshot Serengeti S1-S11.
6
+
7
+ Create a metadata file for S1-S10, plus separate metadata files
8
+ for S1-S11. At the time this code was written, S11 was under embargo.
9
+
10
+ Create zip archives of each season without humans.
11
+
12
+ Create a human zip archive.
13
+
14
+ """
15
15
 
16
16
  #%% Constants and imports
17
17
 
@@ -1,11 +1,11 @@
1
- ########
2
- #
3
- # sulross_get_exif.py
4
- #
5
- # For the Sul Ross dataset, species informationw was stored in XMP metadata; pull
6
- # all that metadata out to .json.
7
- #
8
- ########
1
+ """
2
+
3
+ sulross_get_exif.py
4
+
5
+ For the Sul Ross dataset, species informationw was stored in XMP metadata; pull
6
+ all that metadata out to .json.
7
+
8
+ """
9
9
 
10
10
  import os
11
11
  import json
@@ -61,6 +61,5 @@ def get_metadata():
61
61
  json.dump(image_id_to_metadata, f, indent=1)
62
62
  print('Results saved. Done!')
63
63
 
64
-
65
64
  if __name__ == '__main__':
66
65
  get_metadata()
@@ -1,14 +1,14 @@
1
- ########
2
- #
3
- # timelapse_csv_set_to_json.py
4
- #
5
- # Given a directory full of reasonably-consistent Timelapse-exported
6
- # .csvs, assemble a CCT .json.
7
- #
8
- # Assumes that you have a list of all files in the directory tree, including
9
- # image and .csv files.
10
- #
11
- ########
1
+ """
2
+
3
+ timelapse_csv_set_to_json.py
4
+
5
+ Given a directory full of reasonably-consistent Timelapse-exported
6
+ .csvs, assemble a CCT .json.
7
+
8
+ Assumes that you have a list of all files in the directory tree, including
9
+ image and .csv files.
10
+
11
+ """
12
12
 
13
13
  #%% Constants and imports
14
14
 
@@ -1,16 +1,16 @@
1
- ########
2
- #
3
- # ubc_to_json.py
4
- #
5
- # Convert the .csv file provided for the UBC data set to a
6
- # COCO-camera-traps .json file
7
- #
8
- # Images were provided in eight folders, each of which contained a .csv
9
- # file with annotations. Those annotations came in two slightly different
10
- # formats, the two formats corresponding to folders starting with "SC_" and
11
- # otherwise.
12
- #
13
- ########
1
+ """
2
+
3
+ ubc_to_json.py
4
+
5
+ Convert the .csv file provided for the UBC data set to a
6
+ COCO-camera-traps .json file
7
+
8
+ Images were provided in eight folders, each of which contained a .csv
9
+ file with annotations. Those annotations came in two slightly different
10
+ formats, the two formats corresponding to folders starting with "SC_" and
11
+ otherwise.
12
+
13
+ """
14
14
 
15
15
  #%% Constants and environment
16
16
 
@@ -1,10 +1,10 @@
1
- ########
2
- #
3
- # umn_to_json.py
4
- #
5
- # Prepare images and metadata for the Orinoquía Camera Traps dataset.
6
- #
7
- ########
1
+ """
2
+
3
+ umn_to_json.py
4
+
5
+ Prepare images and metadata for the Orinoquía Camera Traps dataset.
6
+
7
+ """
8
8
 
9
9
  #%% Imports and constants
10
10
 
@@ -1,11 +1,11 @@
1
- ########
2
- #
3
- # wellington_to_json.py
4
- #
5
- # Convert the .csv file provided for the Wellington data set to a
6
- # COCO-camera-traps .json file
7
- #
8
- ########
1
+ """
2
+
3
+ wellington_to_json.py
4
+
5
+ Convert the .csv file provided for the Wellington data set to a
6
+ COCO-camera-traps .json file
7
+
8
+ """
9
9
 
10
10
  #%% Constants and environment
11
11
 
@@ -1,12 +1,12 @@
1
- ########
2
- #
3
- # wi_to_json
4
- #
5
- # Prepares CCT-formatted metadata based on a Wildlife Insights data export.
6
- #
7
- # Mostly assumes you have the images also, for validation/QA.
8
- #
9
- ########
1
+ """
2
+
3
+ wi_to_json
4
+
5
+ Prepares CCT-formatted metadata based on a Wildlife Insights data export.
6
+
7
+ Mostly assumes you have the images also, for validation/QA.
8
+
9
+ """
10
10
 
11
11
  #%% Imports and constants
12
12
 
@@ -1,181 +1,181 @@
1
- ########
2
- #
3
- # zamba_results_to_md_results.py
4
- #
5
- # Convert a labels.csv file produced by Zamba Cloud to a MD results file suitable
6
- # for import into Timelapse.
7
- #
8
- # Columns are expected to be:
9
- #
10
- # video_uuid (not used)
11
- # original_filename (assumed to be a relative path name)
12
- # top_k_label,top_k_probability, for k = 1..N
13
- # [category name 1],[category name 2],...
14
- # corrected_label
15
- #
16
- # Because the MD results file fundamentally stores detections, what we'll
17
- # actually do is created bogus detections that fill the entire image. Detection
18
- # coordinates are not currently used in Timelapse video video anyway.
19
- #
20
- # There is no special handling of empty/blank categories; because these results are
21
- # based on a classifier, rather than a detector (where "blank" would be the absence of
22
- # all other categories), "blank" can be queried in Timelapse just like any other class.
23
- #
24
- ########
25
-
26
- #%% Imports and constants
27
-
28
- import pandas as pd
29
- import json
30
-
31
-
32
- #%% Main function
33
-
34
- def zamba_results_to_md_results(input_file,output_file=None):
35
- """
36
- Converts the .csv file [input_file] to the MD-formatted .json file [output_file].
37
-
38
- If [output_file] is None, '.json' will be appended to the input file.
39
- """
40
-
41
- if output_file is None:
42
- output_file = input_file + '.json'
43
-
44
- df = pd.read_csv(input_file)
45
-
46
- expected_columns = ('video_uuid','corrected_label','original_filename')
47
- for s in expected_columns:
48
- assert s in df.columns,\
49
- 'Expected column {} not found, are you sure this is a Zamba results .csv file?'.format(
50
- s)
51
-
52
- # How many results are included per file?
53
- assert 'top_1_probability' in df.columns and 'top_1_label' in df.columns
54
- top_k = 2
55
- while(True):
56
- p_string = 'top_' + str(top_k) + '_probability'
57
- label_string = 'top_' + str(top_k) + '_label'
58
-
59
- if p_string in df.columns:
60
- assert label_string in df.columns,\
61
- 'Oops, {} is a column but {} is not'.format(
62
- p_string,label_string)
63
- top_k += 1
64
- continue
65
- else:
66
- assert label_string not in df.columns,\
67
- 'Oops, {} is a column but {} is not'.format(
68
- label_string,p_string)
69
- top_k -= 1
70
- break
71
-
72
- print('Found {} probability column pairs'.format(top_k))
73
-
74
- # Category names start after the fixed columns and the probability columns
75
- category_names = []
76
- column_names = list(df.columns)
77
- first_category_name_index = 0
78
- while('top_' in column_names[first_category_name_index] or \
79
- column_names[first_category_name_index] in expected_columns):
80
- first_category_name_index += 1
81
-
82
- i_column = first_category_name_index
83
- while( (i_column < len(column_names)) and (column_names[i_column] != 'corrected_label') ):
84
- category_names.append(column_names[i_column])
85
- i_column += 1
86
-
87
- print('Found {} categories:\n'.format(len(category_names)))
88
-
89
- for s in category_names:
90
- print(s)
91
-
92
- info = {}
93
- info['format_version'] = '1.3'
94
- info['detector'] = 'Zamba Cloud'
95
- info['classifier'] = 'Zamba Cloud'
96
-
97
- detection_category_id_to_name = {}
98
- for category_id,category_name in enumerate(category_names):
99
- detection_category_id_to_name[str(category_id)] = category_name
100
- detection_category_name_to_id = {v: k for k, v in detection_category_id_to_name.items()}
101
-
102
- images = []
103
-
104
- # i_row = 0; row = df.iloc[i_row]
105
- for i_row,row in df.iterrows():
106
-
107
- im = {}
108
- images.append(im)
109
- im['file'] = row['original_filename']
110
-
111
- detections = []
112
-
113
- # k = 1
114
- for k in range(1,top_k+1):
115
- label = row['top_{}_label'.format(k)]
116
- confidence = row['top_{}_probability'.format(k)]
117
- det = {}
118
- det['category'] = detection_category_name_to_id[label]
119
- det['conf'] = confidence
120
- det['bbox'] = [0,0,1.0,1.0]
121
- detections.append(det)
122
-
123
- im['detections'] = detections
124
-
125
- # ...for each row
126
-
127
- results = {}
128
- results['info'] = info
129
- results['detection_categories'] = detection_category_id_to_name
130
- results['images'] = images
131
-
132
- with open(output_file,'w') as f:
133
- json.dump(results,f,indent=1)
134
-
135
- # ...zamba_results_to_md_results(...)
136
-
137
-
138
- #%% Interactive driver
139
-
140
- if False:
141
-
142
- pass
143
-
144
- #%%
145
-
146
- input_file = r"G:\temp\labels-job-b95a4b76-e332-4e17-ab40-03469392d36a-2023-11-04_16-28-50.060130.csv"
147
- output_file = None
148
- zamba_results_to_md_results(input_file,output_file)
149
-
150
-
151
- #%% Command-line driver
152
-
153
- import sys,argparse
154
-
155
- def main():
156
-
157
- parser = argparse.ArgumentParser(
158
- description='Convert a Zamba-formatted .csv results file to a MD-formatted .json results file')
159
-
160
- parser.add_argument(
161
- 'input_file',
162
- type=str,
163
- help='input .csv file')
164
-
165
- parser.add_argument(
166
- '--output_file',
167
- type=str,
168
- default=None,
169
- help='output .json file (defaults to input file appended with ".json")')
170
-
171
- if len(sys.argv[1:]) == 0:
172
- parser.print_help()
173
- parser.exit()
174
-
175
- args = parser.parse_args()
176
-
177
- zamba_results_to_md_results(args.input_file,args.output_file)
178
-
179
- if __name__ == '__main__':
180
- main()
181
-
1
+ """
2
+
3
+ zamba_results_to_md_results.py
4
+
5
+ Convert a labels.csv file produced by Zamba Cloud to a MD results file suitable
6
+ for import into Timelapse.
7
+
8
+ Columns are expected to be:
9
+
10
+ video_uuid (not used)
11
+ original_filename (assumed to be a relative path name)
12
+ top_k_label,top_k_probability, for k = 1..N
13
+ [category name 1],[category name 2],...
14
+ corrected_label
15
+
16
+ Because the MD results file fundamentally stores detections, what we'll
17
+ actually do is created bogus detections that fill the entire image. Detection
18
+ coordinates are not currently used in Timelapse video video anyway.
19
+
20
+ There is no special handling of empty/blank categories; because these results are
21
+ based on a classifier, rather than a detector (where "blank" would be the absence of
22
+ all other categories), "blank" can be queried in Timelapse just like any other class.
23
+
24
+ """
25
+
26
+ #%% Imports and constants
27
+
28
+ import pandas as pd
29
+ import json
30
+
31
+
32
+ #%% Main function
33
+
34
+ def zamba_results_to_md_results(input_file,output_file=None):
35
+ """
36
+ Converts the .csv file [input_file] to the MD-formatted .json file [output_file].
37
+
38
+ If [output_file] is None, '.json' will be appended to the input file.
39
+ """
40
+
41
+ if output_file is None:
42
+ output_file = input_file + '.json'
43
+
44
+ df = pd.read_csv(input_file)
45
+
46
+ expected_columns = ('video_uuid','corrected_label','original_filename')
47
+ for s in expected_columns:
48
+ assert s in df.columns,\
49
+ 'Expected column {} not found, are you sure this is a Zamba results .csv file?'.format(
50
+ s)
51
+
52
+ # How many results are included per file?
53
+ assert 'top_1_probability' in df.columns and 'top_1_label' in df.columns
54
+ top_k = 2
55
+ while(True):
56
+ p_string = 'top_' + str(top_k) + '_probability'
57
+ label_string = 'top_' + str(top_k) + '_label'
58
+
59
+ if p_string in df.columns:
60
+ assert label_string in df.columns,\
61
+ 'Oops, {} is a column but {} is not'.format(
62
+ p_string,label_string)
63
+ top_k += 1
64
+ continue
65
+ else:
66
+ assert label_string not in df.columns,\
67
+ 'Oops, {} is a column but {} is not'.format(
68
+ label_string,p_string)
69
+ top_k -= 1
70
+ break
71
+
72
+ print('Found {} probability column pairs'.format(top_k))
73
+
74
+ # Category names start after the fixed columns and the probability columns
75
+ category_names = []
76
+ column_names = list(df.columns)
77
+ first_category_name_index = 0
78
+ while('top_' in column_names[first_category_name_index] or \
79
+ column_names[first_category_name_index] in expected_columns):
80
+ first_category_name_index += 1
81
+
82
+ i_column = first_category_name_index
83
+ while( (i_column < len(column_names)) and (column_names[i_column] != 'corrected_label') ):
84
+ category_names.append(column_names[i_column])
85
+ i_column += 1
86
+
87
+ print('Found {} categories:\n'.format(len(category_names)))
88
+
89
+ for s in category_names:
90
+ print(s)
91
+
92
+ info = {}
93
+ info['format_version'] = '1.3'
94
+ info['detector'] = 'Zamba Cloud'
95
+ info['classifier'] = 'Zamba Cloud'
96
+
97
+ detection_category_id_to_name = {}
98
+ for category_id,category_name in enumerate(category_names):
99
+ detection_category_id_to_name[str(category_id)] = category_name
100
+ detection_category_name_to_id = {v: k for k, v in detection_category_id_to_name.items()}
101
+
102
+ images = []
103
+
104
+ # i_row = 0; row = df.iloc[i_row]
105
+ for i_row,row in df.iterrows():
106
+
107
+ im = {}
108
+ images.append(im)
109
+ im['file'] = row['original_filename']
110
+
111
+ detections = []
112
+
113
+ # k = 1
114
+ for k in range(1,top_k+1):
115
+ label = row['top_{}_label'.format(k)]
116
+ confidence = row['top_{}_probability'.format(k)]
117
+ det = {}
118
+ det['category'] = detection_category_name_to_id[label]
119
+ det['conf'] = confidence
120
+ det['bbox'] = [0,0,1.0,1.0]
121
+ detections.append(det)
122
+
123
+ im['detections'] = detections
124
+
125
+ # ...for each row
126
+
127
+ results = {}
128
+ results['info'] = info
129
+ results['detection_categories'] = detection_category_id_to_name
130
+ results['images'] = images
131
+
132
+ with open(output_file,'w') as f:
133
+ json.dump(results,f,indent=1)
134
+
135
+ # ...zamba_results_to_md_results(...)
136
+
137
+
138
+ #%% Interactive driver
139
+
140
+ if False:
141
+
142
+ pass
143
+
144
+ #%%
145
+
146
+ input_file = r"G:\temp\labels-job-b95a4b76-e332-4e17-ab40-03469392d36a-2023-11-04_16-28-50.060130.csv"
147
+ output_file = None
148
+ zamba_results_to_md_results(input_file,output_file)
149
+
150
+
151
+ #%% Command-line driver
152
+
153
+ import sys,argparse
154
+
155
+ def main():
156
+
157
+ parser = argparse.ArgumentParser(
158
+ description='Convert a Zamba-formatted .csv results file to a MD-formatted .json results file')
159
+
160
+ parser.add_argument(
161
+ 'input_file',
162
+ type=str,
163
+ help='input .csv file')
164
+
165
+ parser.add_argument(
166
+ '--output_file',
167
+ type=str,
168
+ default=None,
169
+ help='output .json file (defaults to input file appended with ".json")')
170
+
171
+ if len(sys.argv[1:]) == 0:
172
+ parser.print_help()
173
+ parser.exit()
174
+
175
+ args = parser.parse_args()
176
+
177
+ zamba_results_to_md_results(args.input_file,args.output_file)
178
+
179
+ if __name__ == '__main__':
180
+ main()
181
+