megadetector 5.0.8__py3-none-any.whl → 5.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (190) hide show
  1. api/__init__.py +0 -0
  2. api/batch_processing/__init__.py +0 -0
  3. api/batch_processing/api_core/__init__.py +0 -0
  4. api/batch_processing/api_core/batch_service/__init__.py +0 -0
  5. api/batch_processing/api_core/batch_service/score.py +0 -1
  6. api/batch_processing/api_core/server_job_status_table.py +0 -1
  7. api/batch_processing/api_core_support/__init__.py +0 -0
  8. api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
  9. api/batch_processing/api_support/__init__.py +0 -0
  10. api/batch_processing/api_support/summarize_daily_activity.py +0 -1
  11. api/batch_processing/data_preparation/__init__.py +0 -0
  12. api/batch_processing/data_preparation/manage_local_batch.py +65 -65
  13. api/batch_processing/data_preparation/manage_video_batch.py +8 -8
  14. api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
  15. api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
  16. api/batch_processing/postprocessing/__init__.py +0 -0
  17. api/batch_processing/postprocessing/add_max_conf.py +12 -12
  18. api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
  19. api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
  20. api/batch_processing/postprocessing/compare_batch_results.py +113 -43
  21. api/batch_processing/postprocessing/convert_output_format.py +41 -16
  22. api/batch_processing/postprocessing/load_api_results.py +16 -17
  23. api/batch_processing/postprocessing/md_to_coco.py +31 -21
  24. api/batch_processing/postprocessing/md_to_labelme.py +52 -22
  25. api/batch_processing/postprocessing/merge_detections.py +14 -14
  26. api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
  27. api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
  28. api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
  29. api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
  30. api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
  31. api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
  32. api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
  33. api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
  34. api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
  35. api/synchronous/__init__.py +0 -0
  36. api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
  37. api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
  38. api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
  39. api/synchronous/api_core/animal_detection_api/config.py +35 -35
  40. api/synchronous/api_core/tests/__init__.py +0 -0
  41. api/synchronous/api_core/tests/load_test.py +109 -109
  42. classification/__init__.py +0 -0
  43. classification/aggregate_classifier_probs.py +21 -24
  44. classification/analyze_failed_images.py +11 -13
  45. classification/cache_batchapi_outputs.py +51 -51
  46. classification/create_classification_dataset.py +69 -68
  47. classification/crop_detections.py +54 -53
  48. classification/csv_to_json.py +97 -100
  49. classification/detect_and_crop.py +105 -105
  50. classification/evaluate_model.py +43 -42
  51. classification/identify_mislabeled_candidates.py +47 -46
  52. classification/json_to_azcopy_list.py +10 -10
  53. classification/json_validator.py +72 -71
  54. classification/map_classification_categories.py +44 -43
  55. classification/merge_classification_detection_output.py +68 -68
  56. classification/prepare_classification_script.py +157 -154
  57. classification/prepare_classification_script_mc.py +228 -228
  58. classification/run_classifier.py +27 -26
  59. classification/save_mislabeled.py +30 -30
  60. classification/train_classifier.py +20 -20
  61. classification/train_classifier_tf.py +21 -22
  62. classification/train_utils.py +10 -10
  63. data_management/__init__.py +0 -0
  64. data_management/annotations/__init__.py +0 -0
  65. data_management/annotations/annotation_constants.py +18 -31
  66. data_management/camtrap_dp_to_coco.py +238 -0
  67. data_management/cct_json_utils.py +102 -59
  68. data_management/cct_to_md.py +176 -158
  69. data_management/cct_to_wi.py +247 -219
  70. data_management/coco_to_labelme.py +272 -263
  71. data_management/coco_to_yolo.py +79 -58
  72. data_management/databases/__init__.py +0 -0
  73. data_management/databases/add_width_and_height_to_db.py +20 -16
  74. data_management/databases/combine_coco_camera_traps_files.py +35 -31
  75. data_management/databases/integrity_check_json_db.py +62 -24
  76. data_management/databases/subset_json_db.py +24 -15
  77. data_management/generate_crops_from_cct.py +27 -45
  78. data_management/get_image_sizes.py +188 -162
  79. data_management/importers/add_nacti_sizes.py +8 -8
  80. data_management/importers/add_timestamps_to_icct.py +78 -78
  81. data_management/importers/animl_results_to_md_results.py +158 -158
  82. data_management/importers/auckland_doc_test_to_json.py +9 -9
  83. data_management/importers/auckland_doc_to_json.py +8 -8
  84. data_management/importers/awc_to_json.py +7 -7
  85. data_management/importers/bellevue_to_json.py +15 -15
  86. data_management/importers/cacophony-thermal-importer.py +13 -13
  87. data_management/importers/carrizo_shrubfree_2018.py +8 -8
  88. data_management/importers/carrizo_trail_cam_2017.py +8 -8
  89. data_management/importers/cct_field_adjustments.py +9 -9
  90. data_management/importers/channel_islands_to_cct.py +10 -10
  91. data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
  92. data_management/importers/ena24_to_json.py +7 -7
  93. data_management/importers/filenames_to_json.py +8 -8
  94. data_management/importers/helena_to_cct.py +7 -7
  95. data_management/importers/idaho-camera-traps.py +7 -7
  96. data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
  97. data_management/importers/jb_csv_to_json.py +9 -9
  98. data_management/importers/mcgill_to_json.py +8 -8
  99. data_management/importers/missouri_to_json.py +18 -18
  100. data_management/importers/nacti_fieldname_adjustments.py +10 -10
  101. data_management/importers/noaa_seals_2019.py +7 -7
  102. data_management/importers/pc_to_json.py +7 -7
  103. data_management/importers/plot_wni_giraffes.py +7 -7
  104. data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
  105. data_management/importers/prepare_zsl_imerit.py +7 -7
  106. data_management/importers/rspb_to_json.py +8 -8
  107. data_management/importers/save_the_elephants_survey_A.py +8 -8
  108. data_management/importers/save_the_elephants_survey_B.py +9 -9
  109. data_management/importers/snapshot_safari_importer.py +26 -26
  110. data_management/importers/snapshot_safari_importer_reprise.py +665 -665
  111. data_management/importers/snapshot_serengeti_lila.py +14 -14
  112. data_management/importers/sulross_get_exif.py +8 -9
  113. data_management/importers/timelapse_csv_set_to_json.py +11 -11
  114. data_management/importers/ubc_to_json.py +13 -13
  115. data_management/importers/umn_to_json.py +7 -7
  116. data_management/importers/wellington_to_json.py +8 -8
  117. data_management/importers/wi_to_json.py +9 -9
  118. data_management/importers/zamba_results_to_md_results.py +181 -181
  119. data_management/labelme_to_coco.py +65 -24
  120. data_management/labelme_to_yolo.py +8 -8
  121. data_management/lila/__init__.py +0 -0
  122. data_management/lila/add_locations_to_island_camera_traps.py +9 -9
  123. data_management/lila/add_locations_to_nacti.py +147 -147
  124. data_management/lila/create_lila_blank_set.py +13 -13
  125. data_management/lila/create_lila_test_set.py +8 -8
  126. data_management/lila/create_links_to_md_results_files.py +106 -106
  127. data_management/lila/download_lila_subset.py +44 -110
  128. data_management/lila/generate_lila_per_image_labels.py +55 -42
  129. data_management/lila/get_lila_annotation_counts.py +18 -15
  130. data_management/lila/get_lila_image_counts.py +11 -11
  131. data_management/lila/lila_common.py +96 -33
  132. data_management/lila/test_lila_metadata_urls.py +132 -116
  133. data_management/ocr_tools.py +173 -128
  134. data_management/read_exif.py +110 -97
  135. data_management/remap_coco_categories.py +83 -83
  136. data_management/remove_exif.py +58 -62
  137. data_management/resize_coco_dataset.py +30 -23
  138. data_management/wi_download_csv_to_coco.py +246 -239
  139. data_management/yolo_output_to_md_output.py +86 -73
  140. data_management/yolo_to_coco.py +300 -60
  141. detection/__init__.py +0 -0
  142. detection/detector_training/__init__.py +0 -0
  143. detection/process_video.py +85 -33
  144. detection/pytorch_detector.py +43 -25
  145. detection/run_detector.py +157 -72
  146. detection/run_detector_batch.py +179 -113
  147. detection/run_inference_with_yolov5_val.py +108 -48
  148. detection/run_tiled_inference.py +111 -40
  149. detection/tf_detector.py +51 -29
  150. detection/video_utils.py +606 -521
  151. docs/source/conf.py +43 -0
  152. md_utils/__init__.py +0 -0
  153. md_utils/azure_utils.py +9 -9
  154. md_utils/ct_utils.py +228 -68
  155. md_utils/directory_listing.py +59 -64
  156. md_utils/md_tests.py +968 -871
  157. md_utils/path_utils.py +460 -134
  158. md_utils/process_utils.py +157 -133
  159. md_utils/sas_blob_utils.py +20 -20
  160. md_utils/split_locations_into_train_val.py +45 -32
  161. md_utils/string_utils.py +33 -10
  162. md_utils/url_utils.py +176 -60
  163. md_utils/write_html_image_list.py +40 -33
  164. md_visualization/__init__.py +0 -0
  165. md_visualization/plot_utils.py +102 -109
  166. md_visualization/render_images_with_thumbnails.py +34 -34
  167. md_visualization/visualization_utils.py +597 -291
  168. md_visualization/visualize_db.py +76 -48
  169. md_visualization/visualize_detector_output.py +61 -42
  170. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/METADATA +13 -7
  171. megadetector-5.0.10.dist-info/RECORD +224 -0
  172. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/top_level.txt +1 -0
  173. taxonomy_mapping/__init__.py +0 -0
  174. taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
  175. taxonomy_mapping/map_new_lila_datasets.py +154 -154
  176. taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
  177. taxonomy_mapping/preview_lila_taxonomy.py +591 -591
  178. taxonomy_mapping/retrieve_sample_image.py +12 -12
  179. taxonomy_mapping/simple_image_download.py +11 -11
  180. taxonomy_mapping/species_lookup.py +10 -10
  181. taxonomy_mapping/taxonomy_csv_checker.py +18 -18
  182. taxonomy_mapping/taxonomy_graph.py +47 -47
  183. taxonomy_mapping/validate_lila_category_mappings.py +83 -76
  184. data_management/cct_json_to_filename_json.py +0 -89
  185. data_management/cct_to_csv.py +0 -140
  186. data_management/databases/remove_corrupted_images_from_db.py +0 -191
  187. detection/detector_training/copy_checkpoints.py +0 -43
  188. megadetector-5.0.8.dist-info/RECORD +0 -205
  189. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/LICENSE +0 -0
  190. {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/WHEEL +0 -0
@@ -1,15 +1,15 @@
1
- ########
2
- #
3
- # retrieve_sample_image.py
4
- #
5
- # Downloader that retrieves images from Google images, used for verifying taxonomy
6
- # lookups and looking for egregious mismappings (e.g., "snake" being mapped to a fish called
7
- # "snake").
8
- #
9
- # Simple wrapper around simple_image_download, but I've had to swap in and out the underlying
10
- # downloader a few times.
11
- #
12
- ########
1
+ """
2
+
3
+ retrieve_sample_image.py
4
+
5
+ Downloader that retrieves images from Google images, used for verifying taxonomy
6
+ lookups and looking for egregious mismappings (e.g., "snake" being mapped to a fish called
7
+ "snake").
8
+
9
+ Simple wrapper around simple_image_download, but I've had to swap in and out the underlying
10
+ downloader a few times.
11
+
12
+ """
13
13
 
14
14
  #%% Imports and environment
15
15
 
@@ -1,14 +1,14 @@
1
- ########
2
- #
3
- # simple_image_download.py
4
- #
5
- # Web image downloader, used in preview_lila_taxonomy.py
6
- #
7
- # Slightly modified from:
8
- #
9
- # https://github.com/RiddlerQ/simple_image_download
10
- #
11
- ########
1
+ """
2
+
3
+ simple_image_download.py
4
+
5
+ Web image downloader, used in preview_lila_taxonomy.py
6
+
7
+ Slightly modified from:
8
+
9
+ https://github.com/RiddlerQ/simple_image_download
10
+
11
+ """
12
12
 
13
13
  #%% Imports
14
14
 
@@ -1,13 +1,13 @@
1
- ########
2
- #
3
- # species_lookup.py
4
- #
5
- # Look up species names (common or scientific) in the GBIF and iNaturalist
6
- # taxonomies.
7
- #
8
- # Run initialize_taxonomy_lookup() before calling any other function.
9
- #
10
- ########
1
+ """
2
+
3
+ species_lookup.py
4
+
5
+ Look up species names (common or scientific) in the GBIF and iNaturalist
6
+ taxonomies.
7
+
8
+ Run initialize_taxonomy_lookup() before calling any other function.
9
+
10
+ """
11
11
 
12
12
  #%% Constants and imports
13
13
 
@@ -1,21 +1,21 @@
1
- ########
2
- #
3
- # taxonomy_csv_checker.py
4
- #
5
- # Checks the taxonomy CSV file to make sure that for each row:
6
- #
7
- # 1) The 'taxonomy_level' column matches the lowest-level taxon level in the
8
- # 'taxonomy_string' column.
9
- #
10
- # 2) The 'scientific_name' column matches the scientific name from the
11
- # lowest-level taxon level in the 'taxonomy_string' column.
12
- #
13
- # Prints out any mismatches.
14
- #
15
- # Also prints out nodes that have 2 ambiguous parents. See "CASE 2" from the
16
- # module docstring of taxonomy_graph.py.
17
- #
18
- ########
1
+ """
2
+
3
+ taxonomy_csv_checker.py
4
+
5
+ Checks the taxonomy CSV file to make sure that for each row:
6
+
7
+ 1) The 'taxonomy_level' column matches the lowest-level taxon level in the
8
+ 'taxonomy_string' column.
9
+
10
+ 2) The 'scientific_name' column matches the scientific name from the
11
+ lowest-level taxon level in the 'taxonomy_string' column.
12
+
13
+ Prints out any mismatches.
14
+
15
+ Also prints out nodes that have 2 ambiguous parents. See "CASE 2" from the
16
+ module docstring of taxonomy_graph.py.
17
+
18
+ """
19
19
 
20
20
  #%% Imports
21
21
 
@@ -1,50 +1,50 @@
1
- ########
2
- #
3
- # taxonomy_graph.py
4
- #
5
- # Methods for transforming taxonomy CSV into a graph structure backed by
6
- # NetworkX.
7
- #
8
- # We treat each taxon in the taxonomy as a node in a graph, represented by the
9
- # TaxonNode class. We use a NetworkX directed graph (nx.DiGraph) to keep track of
10
- # the edges (parent-child relationships) between the nodes.
11
- #
12
- # In theory, the true biological taxonomy graph should be a tree, where every
13
- # taxon node has exactly 1 parent. However, because we use both GBIF and INAT
14
- # taxonomies, there are 2 situations where a taxon node ends up with two parents.
15
- # Thus, the graph is actually a "directed acyclic graph" (DAG) instead of a tree.
16
- #
17
- # The two situations are explained in detail below. This module includes a
18
- # function dag_to_tree() which converts a DAG to a tree by heuristically removing
19
- # edges from the DAG so that each node only has 1 parent.
20
- #
21
- # CASE 1: INAT and GBIF have different granularity in their taxonomy levels
22
- # ======
23
- # An example is shown below. In dag_to_tree(), the lower parent is kept, while
24
- # the higher-up parent is discarded. In this example, the "sciurini -> sciurus"
25
- # edge would be kept, while "sciuridae -> sciurus" would be removed.
26
- #
27
- # "eastern gray squirrel" (inat) "squirrel" (gbif)
28
- # ------------------------------ -----------------
29
- # family: sciuridae
30
- # / \
31
- # subfamily: sciurinae | # skips subfamily
32
- # | |
33
- # tribe: sciurini | # skips tribe
34
- # \ /
35
- # genus: sciurus
36
- #
37
- #
38
- # CASE 2: INAT and GBIF have different taxonomies
39
- # ======
40
- # An example is shown below. In dag_to_tree(), the resolution to these
41
- # discrepancies are hard-coded.
42
- #
43
- # order: cathartiformes (inat) accipitriformes (gbif)
44
- # \ /
45
- # family: cathartidae
46
- #
47
- ########
1
+ """
2
+
3
+ taxonomy_graph.py
4
+
5
+ Methods for transforming taxonomy CSV into a graph structure backed by
6
+ NetworkX.
7
+
8
+ We treat each taxon in the taxonomy as a node in a graph, represented by the
9
+ TaxonNode class. We use a NetworkX directed graph (nx.DiGraph) to keep track of
10
+ the edges (parent-child relationships) between the nodes.
11
+
12
+ In theory, the true biological taxonomy graph should be a tree, where every
13
+ taxon node has exactly 1 parent. However, because we use both GBIF and INAT
14
+ taxonomies, there are 2 situations where a taxon node ends up with two parents.
15
+ Thus, the graph is actually a "directed acyclic graph" (DAG) instead of a tree.
16
+
17
+ The two situations are explained in detail below. This module includes a
18
+ function dag_to_tree() which converts a DAG to a tree by heuristically removing
19
+ edges from the DAG so that each node only has 1 parent.
20
+
21
+ CASE 1: INAT and GBIF have different granularity in their taxonomy levels
22
+ ======
23
+ An example is shown below. In dag_to_tree(), the lower parent is kept, while
24
+ the higher-up parent is discarded. In this example, the "sciurini -> sciurus"
25
+ edge would be kept, while "sciuridae -> sciurus" would be removed.
26
+
27
+ "eastern gray squirrel" (inat) "squirrel" (gbif)
28
+ ------------------------------ -----------------
29
+ family: sciuridae
30
+ / \
31
+ subfamily: sciurinae | # skips subfamily
32
+ | |
33
+ tribe: sciurini | # skips tribe
34
+ \ /
35
+ genus: sciurus
36
+
37
+
38
+ CASE 2: INAT and GBIF have different taxonomies
39
+ ======
40
+ An example is shown below. In dag_to_tree(), the resolution to these
41
+ discrepancies are hard-coded.
42
+
43
+ order: cathartiformes (inat) accipitriformes (gbif)
44
+ \ /
45
+ family: cathartidae
46
+
47
+ """
48
48
 
49
49
  #%% Imports and constants
50
50
 
@@ -1,76 +1,83 @@
1
- ########
2
- #
3
- # validate_lila_category_mappings.py
4
- #
5
- # Confirm that all category names on LILA have mappings in the taxonomy file.
6
- #
7
- ########
8
-
9
- #%% Constants and imports
10
-
11
- import json
12
- import os
13
-
14
- from data_management.lila.lila_common import read_lila_taxonomy_mapping
15
-
16
- lila_local_base = os.path.expanduser('~/lila')
17
-
18
- metadata_dir = os.path.join(lila_local_base,'metadata')
19
- os.makedirs(metadata_dir,exist_ok=True)
20
-
21
- # Created by get_lila_category_list.py... contains counts for each category
22
- category_list_dir = os.path.join(lila_local_base,'lila_categories_list')
23
- lila_dataset_to_categories_file = os.path.join(category_list_dir,'lila_dataset_to_categories.json')
24
-
25
- assert os.path.isfile(lila_dataset_to_categories_file)
26
-
27
-
28
- #%% Load category and taxonomy files
29
-
30
- with open(lila_dataset_to_categories_file,'r') as f:
31
- lila_dataset_to_categories = json.load(f)
32
-
33
- taxonomy_df = read_lila_taxonomy_mapping(metadata_dir)
34
-
35
-
36
- #%% Map dataset names and category names to scientific names
37
-
38
- ds_query_to_scientific_name = {}
39
-
40
- unmapped_queries = set()
41
-
42
- # i_row = 1; row = taxonomy_df.iloc[i_row]; row
43
- for i_row,row in taxonomy_df.iterrows():
44
-
45
- ds_query = row['dataset_name'] + ':' + row['query']
46
- ds_query = ds_query.lower()
47
-
48
- if not isinstance(row['scientific_name'],str):
49
- unmapped_queries.add(ds_query)
50
- ds_query_to_scientific_name[ds_query] = 'unmapped'
51
- continue
52
-
53
- ds_query_to_scientific_name[ds_query] = row['scientific_name']
54
-
55
-
56
- #%% For each dataset, make sure we can map every category to the taxonomy
57
-
58
- # dataset_name = list(lila_dataset_to_categories.keys())[0]
59
- for _dataset_name in lila_dataset_to_categories.keys():
60
-
61
- if '_bbox' in _dataset_name:
62
- dataset_name = _dataset_name.replace('_bbox','')
63
- else:
64
- dataset_name = _dataset_name
65
-
66
- categories = lila_dataset_to_categories[dataset_name]
67
-
68
- # c = categories[0]
69
- for c in categories:
70
- ds_query = dataset_name + ':' + c['name']
71
- ds_query = ds_query.lower()
72
-
73
- if ds_query not in ds_query_to_scientific_name:
74
- print('Could not find mapping for {}'.format(ds_query))
75
- else:
76
- scientific_name = ds_query_to_scientific_name[ds_query]
1
+ """
2
+
3
+ validate_lila_category_mappings.py
4
+
5
+ Confirm that all category names on LILA have mappings in the taxonomy file.
6
+
7
+ """
8
+
9
+ #%% Constants and imports
10
+
11
+ import json
12
+ import os
13
+
14
+ from data_management.lila.lila_common import read_lila_taxonomy_mapping
15
+
16
+
17
+ #%% Prevent execution during infrastructural imports
18
+
19
+ if False:
20
+
21
+ #%% Constants
22
+
23
+ lila_local_base = os.path.expanduser('~/lila')
24
+
25
+ metadata_dir = os.path.join(lila_local_base,'metadata')
26
+ os.makedirs(metadata_dir,exist_ok=True)
27
+
28
+ # Created by get_lila_category_list.py... contains counts for each category
29
+ category_list_dir = os.path.join(lila_local_base,'lila_categories_list')
30
+ lila_dataset_to_categories_file = os.path.join(category_list_dir,'lila_dataset_to_categories.json')
31
+
32
+ assert os.path.isfile(lila_dataset_to_categories_file)
33
+
34
+
35
+ #%% Load category and taxonomy files
36
+
37
+ with open(lila_dataset_to_categories_file,'r') as f:
38
+ lila_dataset_to_categories = json.load(f)
39
+
40
+ taxonomy_df = read_lila_taxonomy_mapping(metadata_dir)
41
+
42
+
43
+ #%% Map dataset names and category names to scientific names
44
+
45
+ ds_query_to_scientific_name = {}
46
+
47
+ unmapped_queries = set()
48
+
49
+ # i_row = 1; row = taxonomy_df.iloc[i_row]; row
50
+ for i_row,row in taxonomy_df.iterrows():
51
+
52
+ ds_query = row['dataset_name'] + ':' + row['query']
53
+ ds_query = ds_query.lower()
54
+
55
+ if not isinstance(row['scientific_name'],str):
56
+ unmapped_queries.add(ds_query)
57
+ ds_query_to_scientific_name[ds_query] = 'unmapped'
58
+ continue
59
+
60
+ ds_query_to_scientific_name[ds_query] = row['scientific_name']
61
+
62
+
63
+ #%% For each dataset, make sure we can map every category to the taxonomy
64
+
65
+ # dataset_name = list(lila_dataset_to_categories.keys())[0]
66
+ for _dataset_name in lila_dataset_to_categories.keys():
67
+
68
+ if '_bbox' in _dataset_name:
69
+ dataset_name = _dataset_name.replace('_bbox','')
70
+ else:
71
+ dataset_name = _dataset_name
72
+
73
+ categories = lila_dataset_to_categories[dataset_name]
74
+
75
+ # c = categories[0]
76
+ for c in categories:
77
+ ds_query = dataset_name + ':' + c['name']
78
+ ds_query = ds_query.lower()
79
+
80
+ if ds_query not in ds_query_to_scientific_name:
81
+ print('Could not find mapping for {}'.format(ds_query))
82
+ else:
83
+ scientific_name = ds_query_to_scientific_name[ds_query]
@@ -1,89 +0,0 @@
1
- ########
2
- #
3
- # cct_json_to_filename_json.py
4
- #
5
- # Given a .json file in COCO Camera Traps format, outputs a .json-formatted list of
6
- # relative file names present in the CCT file.
7
- #
8
- ########
9
-
10
- #%% Constants and environment
11
-
12
- import json
13
- import sys
14
- import os
15
- from itertools import compress
16
-
17
-
18
- #%% Main function
19
-
20
- def convertJsonToStringList(inputFilename,outputFilename=None,prepend='',bConfirmExists=False,
21
- bForceForwardSlash=True,imageBase=''):
22
-
23
- assert os.path.isfile(inputFilename), '.json file {} does not exist'.format(inputFilename)
24
- if outputFilename is None:
25
- outputFilename = inputFilename + '_images.json'
26
-
27
- with open(inputFilename,'r') as f:
28
- data = json.load(f)
29
-
30
- images = data['images']
31
-
32
- filenames = [im['file_name'] for im in images]
33
-
34
- if bConfirmExists:
35
- bValid = [False] * len(filenames)
36
- for iFile,f in enumerate(filenames):
37
- fullPath = os.path.join(imageBase,f)
38
- if os.path.isfile(fullPath):
39
- bValid[iFile] = True
40
- nFilesTotal = len(filenames)
41
- filenames = list(compress(filenames, bValid))
42
- nFilesValid = len(filenames)
43
- print('Marking {} of {} as valid'.format(nFilesValid,nFilesTotal))
44
-
45
- filenames = [prepend + s for s in filenames]
46
- if bForceForwardSlash:
47
- filenames = [s.replace('\\','/') for s in filenames]
48
-
49
- # json.dump(s,open(outputFilename,'w'))
50
-
51
- s = json.dumps(filenames)
52
- with open(outputFilename, 'w') as f:
53
- f.write(s)
54
-
55
- return s,outputFilename
56
-
57
-
58
- #%% Command-line driver
59
-
60
- import argparse
61
-
62
- def main():
63
-
64
- parser = argparse.ArgumentParser()
65
- parser.add_argument('inputFilename')
66
- parser.add_argument('outputFilename')
67
-
68
- if len(sys.argv[1:]) == 0:
69
- parser.print_help()
70
- parser.exit()
71
-
72
- args = parser.parse_args()
73
- convertJsonToStringList(args.jsonFile,args)
74
-
75
-
76
- if __name__ == '__main__':
77
-
78
- main()
79
-
80
- #%% Interactive driver
81
-
82
- if False:
83
-
84
- #%%
85
- prepend = '20190430cameratraps/'
86
- inputFilename = r"D:\wildlife_data\awc\awc_imageinfo.json"
87
- outputFilename = r"D:\wildlife_data\awc\awc_image_list.json"
88
- convertJsonToStringList(inputFilename,outputFilename,prepend=prepend,bConfirmExists=True,imageBase=r'D:\wildlife_data\awc')
89
- print('Finished converting {} to {}'.format(inputFilename,outputFilename))
@@ -1,140 +0,0 @@
1
- ########
2
- #
3
- # cct_to_csv.py
4
- #
5
- # "Converts" a COCO Camera Traps .json file to .csv, in quotes because
6
- # all kinds of assumptions are made here, and if you have a particular .csv
7
- # format in mind, YMMV. Most notably, does not include any bounding box information
8
- # or any non-standard fields that may be present in the .json file. Does not
9
- # propagate information about sequence-level vs. image-level annotations.
10
- #
11
- # Does not assume access to the images, therefore does not open .jpg files to find
12
- # datetime information if it's not in the metadata, just writes datetime as 'unknown'.
13
- #
14
- ########
15
-
16
- #%% Imports
17
-
18
- import os
19
- import sys
20
- import json
21
-
22
- from tqdm import tqdm
23
- from collections import defaultdict
24
-
25
-
26
- #%% Main function
27
-
28
- def cct_to_csv(input_file,output_file=None):
29
-
30
- if output_file is None:
31
- output_file = input_file + '.csv'
32
-
33
- ##%% Read input
34
-
35
- print('Loading input data')
36
-
37
- with open(input_file,'r') as f:
38
- input_data = json.load(f)
39
-
40
-
41
- ##%% Build internal mappings
42
-
43
- print('Processing input data')
44
-
45
- images = input_data['images']
46
-
47
- category_id_to_name = {cat['id']:cat['name'] for cat in input_data['categories']}
48
-
49
- image_id_to_class_names = defaultdict(set)
50
-
51
- annotations = input_data['annotations']
52
-
53
- # annotation = annotations[0]
54
- for annotation in tqdm(annotations):
55
- image_id = annotation['image_id']
56
- class_name = annotation['category_id']
57
- image_id_to_class_names[image_id].add(
58
- category_id_to_name[class_name])
59
-
60
-
61
- ##%% Write output file
62
-
63
- print('Writing output file')
64
-
65
- with open(output_file,'w') as f:
66
-
67
- f.write('relative_path,datetime,location,sequence_id,class_name\n')
68
-
69
- # im = images[0]
70
- for im in tqdm(images):
71
-
72
- file_name = im['file_name']
73
- class_names_set = image_id_to_class_names[im['id']]
74
- assert len(class_names_set) > 0
75
-
76
- if 'datetime' in im:
77
- datetime = im['datetime']
78
- else:
79
- datetime = 'unknown'
80
-
81
- if 'location' in im:
82
- location = im['location']
83
- else:
84
- location = 'unknown'
85
-
86
- if 'seq_id' in im:
87
- sequence_id = im['seq_id']
88
- else:
89
- sequence_id = 'unknown'
90
-
91
- # Write out one line per class:
92
- for class_name in class_names_set:
93
- f.write('{},{},{},{},{}\n'.format(file_name,
94
- datetime,location,sequence_id,class_name))
95
-
96
- # ...for each class name
97
-
98
- # ...for each image
99
-
100
- # ...with open(output_file)
101
-
102
- # ...def cct_to_csv
103
-
104
-
105
- #%% Interactive driver
106
-
107
- if False:
108
-
109
- #%%
110
-
111
- input_dir = r"G:\temp\cct-to-csv"
112
- files = os.listdir(input_dir)
113
- files = [s for s in files if s.endswith('.json')]
114
- for fn in files:
115
- input_file = os.path.join(input_dir,fn)
116
- assert os.path.isfile(input_file)
117
- cct_to_csv(input_file)
118
-
119
-
120
- #%% Command-line driver
121
-
122
- import argparse
123
-
124
- def main():
125
-
126
- parser = argparse.ArgumentParser(description=(
127
- '"Convert" a COCO Camera Traps .json file to .csv (read code to see why "convert" is in quotes)'))
128
-
129
- parser.add_argument('input_file', type=str)
130
- parser.add_argument('--output_file', type=str, default=None)
131
-
132
- if len(sys.argv[1:]) == 0:
133
- parser.print_help()
134
- parser.exit()
135
-
136
- args = parser.parse_args()
137
- cct_to_csv(args.input_file,args.output_file)
138
-
139
- if __name__ == '__main__':
140
- main()