megadetector 5.0.8__py3-none-any.whl → 5.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/__init__.py +0 -0
- api/batch_processing/__init__.py +0 -0
- api/batch_processing/api_core/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/score.py +0 -1
- api/batch_processing/api_core/server_job_status_table.py +0 -1
- api/batch_processing/api_core_support/__init__.py +0 -0
- api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
- api/batch_processing/api_support/__init__.py +0 -0
- api/batch_processing/api_support/summarize_daily_activity.py +0 -1
- api/batch_processing/data_preparation/__init__.py +0 -0
- api/batch_processing/data_preparation/manage_local_batch.py +65 -65
- api/batch_processing/data_preparation/manage_video_batch.py +8 -8
- api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
- api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- api/batch_processing/postprocessing/__init__.py +0 -0
- api/batch_processing/postprocessing/add_max_conf.py +12 -12
- api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
- api/batch_processing/postprocessing/compare_batch_results.py +113 -43
- api/batch_processing/postprocessing/convert_output_format.py +41 -16
- api/batch_processing/postprocessing/load_api_results.py +16 -17
- api/batch_processing/postprocessing/md_to_coco.py +31 -21
- api/batch_processing/postprocessing/md_to_labelme.py +52 -22
- api/batch_processing/postprocessing/merge_detections.py +14 -14
- api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
- api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
- api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
- api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
- api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
- api/synchronous/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
- api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
- api/synchronous/api_core/animal_detection_api/config.py +35 -35
- api/synchronous/api_core/tests/__init__.py +0 -0
- api/synchronous/api_core/tests/load_test.py +109 -109
- classification/__init__.py +0 -0
- classification/aggregate_classifier_probs.py +21 -24
- classification/analyze_failed_images.py +11 -13
- classification/cache_batchapi_outputs.py +51 -51
- classification/create_classification_dataset.py +69 -68
- classification/crop_detections.py +54 -53
- classification/csv_to_json.py +97 -100
- classification/detect_and_crop.py +105 -105
- classification/evaluate_model.py +43 -42
- classification/identify_mislabeled_candidates.py +47 -46
- classification/json_to_azcopy_list.py +10 -10
- classification/json_validator.py +72 -71
- classification/map_classification_categories.py +44 -43
- classification/merge_classification_detection_output.py +68 -68
- classification/prepare_classification_script.py +157 -154
- classification/prepare_classification_script_mc.py +228 -228
- classification/run_classifier.py +27 -26
- classification/save_mislabeled.py +30 -30
- classification/train_classifier.py +20 -20
- classification/train_classifier_tf.py +21 -22
- classification/train_utils.py +10 -10
- data_management/__init__.py +0 -0
- data_management/annotations/__init__.py +0 -0
- data_management/annotations/annotation_constants.py +18 -31
- data_management/camtrap_dp_to_coco.py +238 -0
- data_management/cct_json_utils.py +102 -59
- data_management/cct_to_md.py +176 -158
- data_management/cct_to_wi.py +247 -219
- data_management/coco_to_labelme.py +272 -263
- data_management/coco_to_yolo.py +79 -58
- data_management/databases/__init__.py +0 -0
- data_management/databases/add_width_and_height_to_db.py +20 -16
- data_management/databases/combine_coco_camera_traps_files.py +35 -31
- data_management/databases/integrity_check_json_db.py +62 -24
- data_management/databases/subset_json_db.py +24 -15
- data_management/generate_crops_from_cct.py +27 -45
- data_management/get_image_sizes.py +188 -162
- data_management/importers/add_nacti_sizes.py +8 -8
- data_management/importers/add_timestamps_to_icct.py +78 -78
- data_management/importers/animl_results_to_md_results.py +158 -158
- data_management/importers/auckland_doc_test_to_json.py +9 -9
- data_management/importers/auckland_doc_to_json.py +8 -8
- data_management/importers/awc_to_json.py +7 -7
- data_management/importers/bellevue_to_json.py +15 -15
- data_management/importers/cacophony-thermal-importer.py +13 -13
- data_management/importers/carrizo_shrubfree_2018.py +8 -8
- data_management/importers/carrizo_trail_cam_2017.py +8 -8
- data_management/importers/cct_field_adjustments.py +9 -9
- data_management/importers/channel_islands_to_cct.py +10 -10
- data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
- data_management/importers/ena24_to_json.py +7 -7
- data_management/importers/filenames_to_json.py +8 -8
- data_management/importers/helena_to_cct.py +7 -7
- data_management/importers/idaho-camera-traps.py +7 -7
- data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
- data_management/importers/jb_csv_to_json.py +9 -9
- data_management/importers/mcgill_to_json.py +8 -8
- data_management/importers/missouri_to_json.py +18 -18
- data_management/importers/nacti_fieldname_adjustments.py +10 -10
- data_management/importers/noaa_seals_2019.py +7 -7
- data_management/importers/pc_to_json.py +7 -7
- data_management/importers/plot_wni_giraffes.py +7 -7
- data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
- data_management/importers/prepare_zsl_imerit.py +7 -7
- data_management/importers/rspb_to_json.py +8 -8
- data_management/importers/save_the_elephants_survey_A.py +8 -8
- data_management/importers/save_the_elephants_survey_B.py +9 -9
- data_management/importers/snapshot_safari_importer.py +26 -26
- data_management/importers/snapshot_safari_importer_reprise.py +665 -665
- data_management/importers/snapshot_serengeti_lila.py +14 -14
- data_management/importers/sulross_get_exif.py +8 -9
- data_management/importers/timelapse_csv_set_to_json.py +11 -11
- data_management/importers/ubc_to_json.py +13 -13
- data_management/importers/umn_to_json.py +7 -7
- data_management/importers/wellington_to_json.py +8 -8
- data_management/importers/wi_to_json.py +9 -9
- data_management/importers/zamba_results_to_md_results.py +181 -181
- data_management/labelme_to_coco.py +65 -24
- data_management/labelme_to_yolo.py +8 -8
- data_management/lila/__init__.py +0 -0
- data_management/lila/add_locations_to_island_camera_traps.py +9 -9
- data_management/lila/add_locations_to_nacti.py +147 -147
- data_management/lila/create_lila_blank_set.py +13 -13
- data_management/lila/create_lila_test_set.py +8 -8
- data_management/lila/create_links_to_md_results_files.py +106 -106
- data_management/lila/download_lila_subset.py +44 -110
- data_management/lila/generate_lila_per_image_labels.py +55 -42
- data_management/lila/get_lila_annotation_counts.py +18 -15
- data_management/lila/get_lila_image_counts.py +11 -11
- data_management/lila/lila_common.py +96 -33
- data_management/lila/test_lila_metadata_urls.py +132 -116
- data_management/ocr_tools.py +173 -128
- data_management/read_exif.py +110 -97
- data_management/remap_coco_categories.py +83 -83
- data_management/remove_exif.py +58 -62
- data_management/resize_coco_dataset.py +30 -23
- data_management/wi_download_csv_to_coco.py +246 -239
- data_management/yolo_output_to_md_output.py +86 -73
- data_management/yolo_to_coco.py +300 -60
- detection/__init__.py +0 -0
- detection/detector_training/__init__.py +0 -0
- detection/process_video.py +85 -33
- detection/pytorch_detector.py +43 -25
- detection/run_detector.py +157 -72
- detection/run_detector_batch.py +179 -113
- detection/run_inference_with_yolov5_val.py +108 -48
- detection/run_tiled_inference.py +111 -40
- detection/tf_detector.py +51 -29
- detection/video_utils.py +606 -521
- docs/source/conf.py +43 -0
- md_utils/__init__.py +0 -0
- md_utils/azure_utils.py +9 -9
- md_utils/ct_utils.py +228 -68
- md_utils/directory_listing.py +59 -64
- md_utils/md_tests.py +968 -871
- md_utils/path_utils.py +460 -134
- md_utils/process_utils.py +157 -133
- md_utils/sas_blob_utils.py +20 -20
- md_utils/split_locations_into_train_val.py +45 -32
- md_utils/string_utils.py +33 -10
- md_utils/url_utils.py +176 -60
- md_utils/write_html_image_list.py +40 -33
- md_visualization/__init__.py +0 -0
- md_visualization/plot_utils.py +102 -109
- md_visualization/render_images_with_thumbnails.py +34 -34
- md_visualization/visualization_utils.py +597 -291
- md_visualization/visualize_db.py +76 -48
- md_visualization/visualize_detector_output.py +61 -42
- {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/METADATA +13 -7
- megadetector-5.0.10.dist-info/RECORD +224 -0
- {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/top_level.txt +1 -0
- taxonomy_mapping/__init__.py +0 -0
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
- taxonomy_mapping/map_new_lila_datasets.py +154 -154
- taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
- taxonomy_mapping/preview_lila_taxonomy.py +591 -591
- taxonomy_mapping/retrieve_sample_image.py +12 -12
- taxonomy_mapping/simple_image_download.py +11 -11
- taxonomy_mapping/species_lookup.py +10 -10
- taxonomy_mapping/taxonomy_csv_checker.py +18 -18
- taxonomy_mapping/taxonomy_graph.py +47 -47
- taxonomy_mapping/validate_lila_category_mappings.py +83 -76
- data_management/cct_json_to_filename_json.py +0 -89
- data_management/cct_to_csv.py +0 -140
- data_management/databases/remove_corrupted_images_from_db.py +0 -191
- detection/detector_training/copy_checkpoints.py +0 -43
- megadetector-5.0.8.dist-info/RECORD +0 -205
- {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/LICENSE +0 -0
- {megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/WHEEL +0 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
labelme_to_coco.py
|
|
4
|
+
|
|
5
|
+
Converts a folder of labelme-formatted .json files to COCO.
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
8
|
|
|
9
9
|
#%% Constants and imports
|
|
10
10
|
|
|
@@ -23,10 +23,10 @@ from tqdm import tqdm
|
|
|
23
23
|
|
|
24
24
|
#%% Support functions
|
|
25
25
|
|
|
26
|
-
def
|
|
26
|
+
def _add_category(category_name,category_name_to_id,candidate_category_id=0):
|
|
27
27
|
"""
|
|
28
|
-
|
|
29
|
-
using the next available integer index.
|
|
28
|
+
Adds the category [category_name] to the dict [category_name_to_id], by default
|
|
29
|
+
using the next available integer index.
|
|
30
30
|
"""
|
|
31
31
|
|
|
32
32
|
if category_name in category_name_to_id:
|
|
@@ -121,7 +121,7 @@ def _process_labelme_file(image_fn_relative,input_folder,use_folders_as_labels,
|
|
|
121
121
|
if len(shapes) == 0:
|
|
122
122
|
|
|
123
123
|
if allow_new_categories:
|
|
124
|
-
category_id =
|
|
124
|
+
category_id = _add_category('empty',category_name_to_id)
|
|
125
125
|
else:
|
|
126
126
|
assert 'empty' in category_name_to_id
|
|
127
127
|
category_id = category_name_to_id['empty']
|
|
@@ -148,7 +148,7 @@ def _process_labelme_file(image_fn_relative,input_folder,use_folders_as_labels,
|
|
|
148
148
|
category_name = shape['label']
|
|
149
149
|
|
|
150
150
|
if allow_new_categories:
|
|
151
|
-
category_id =
|
|
151
|
+
category_id = _add_category(category_name,category_name_to_id)
|
|
152
152
|
else:
|
|
153
153
|
assert category_name in category_name_to_id
|
|
154
154
|
category_id = category_name_to_id[category_name]
|
|
@@ -202,7 +202,7 @@ def labelme_to_coco(input_folder,
|
|
|
202
202
|
max_workers=1,
|
|
203
203
|
use_threads=True):
|
|
204
204
|
"""
|
|
205
|
-
|
|
205
|
+
Finds all images in [input_folder] that have corresponding .json files, and converts
|
|
206
206
|
to a COCO .json file.
|
|
207
207
|
|
|
208
208
|
Currently only supports bounding box annotations and image-level flags (i.e., does not
|
|
@@ -224,11 +224,38 @@ def labelme_to_coco(input_folder,
|
|
|
224
224
|
file. Empty images in the "lion" folder will still be given the label "empty" (or
|
|
225
225
|
[empty_category_name]).
|
|
226
226
|
|
|
227
|
-
|
|
227
|
+
Args:
|
|
228
|
+
input_folder (str): input folder to search for images and Labelme .json files
|
|
229
|
+
output_file (str, optional): output file to which we should write COCO-formatted data; if None
|
|
230
|
+
this function just returns the COCO-formatted dict
|
|
231
|
+
category_id_to_category_name (dict, optional): dict mapping category IDs to category names;
|
|
232
|
+
really used to map Labelme category names to COCO category IDs. IDs will be auto-generated
|
|
233
|
+
if this is None.
|
|
234
|
+
empty_category_id (int, optional): category ID to use for the not-very-COCO-like "empty" category;
|
|
235
|
+
also see the no_json_handling parameter.
|
|
236
|
+
info_struct (dict, optional): dict to stash in the "info" field of the resulting COCO dict
|
|
237
|
+
relative_paths_to_include (list, optional): allowlist of relative paths to include in the COCO
|
|
238
|
+
dict; there's no reason to specify this along with relative_paths_to_exclude.
|
|
239
|
+
relative_paths_to_exclude (list, optional): blocklist of relative paths to exclude from the COCO
|
|
240
|
+
dict; there's no reason to specify this along with relative_paths_to_include.
|
|
241
|
+
use_folders_as_labels (bool, optional): if this is True, class names will be pulled from folder names,
|
|
242
|
+
useful if you have images like a/b/cat/image001.jpg, a/b/dog/image002.jpg, etc.
|
|
243
|
+
recursive (bool, optional): whether to recurse into [input_folder]
|
|
244
|
+
no_json_handling (str, optional): how to deal with image files that have no corresponding .json files,
|
|
245
|
+
can be:
|
|
246
|
+
|
|
247
|
+
- 'skip': ignore image files with no corresponding .json files
|
|
248
|
+
- 'empty': treat image files with no corresponding .json files as empty
|
|
249
|
+
- 'error': throw an error when an image file has no corresponding .json file
|
|
250
|
+
validate_image_sizes (bool, optional): whether to load images to verify that the sizes specified
|
|
251
|
+
in the labelme files are correct
|
|
252
|
+
max_workers (int, optional): number of workers to use for parallelization, set to <=1 to disable
|
|
253
|
+
parallelization
|
|
254
|
+
use_threads (bool, optional): whether to use threads (True) or processes (False) for parallelization,
|
|
255
|
+
not relevant if max_workers <= 1
|
|
228
256
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
* 'error': throw an error when an image file has no corresponding .json file
|
|
257
|
+
Returns:
|
|
258
|
+
dict: a COCO-formatted dictionary, identical to what's written to [output_file] if [output_file] is not None.
|
|
232
259
|
"""
|
|
233
260
|
|
|
234
261
|
if max_workers > 1:
|
|
@@ -288,7 +315,7 @@ def labelme_to_coco(input_folder,
|
|
|
288
315
|
raise ValueError('Category IDs must be ints or string-formatted ints')
|
|
289
316
|
|
|
290
317
|
if empty_category_id is None:
|
|
291
|
-
empty_category_id =
|
|
318
|
+
empty_category_id = _add_category(empty_category_name,category_name_to_id)
|
|
292
319
|
|
|
293
320
|
if max_workers <= 1:
|
|
294
321
|
|
|
@@ -366,12 +393,26 @@ def find_empty_labelme_files(input_folder,recursive=True):
|
|
|
366
393
|
Returns a list of all image files in in [input_folder] associated with .json files that have
|
|
367
394
|
no boxes in them. Also returns a list of images with no associated .json files. Specifically,
|
|
368
395
|
returns a dict:
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
396
|
+
|
|
397
|
+
.. code-block: none
|
|
398
|
+
|
|
399
|
+
{
|
|
400
|
+
'images_with_empty_json_files':[list],
|
|
401
|
+
'images_with_no_json_files':[list],
|
|
402
|
+
'images_with_non_empty_json_files':[list]
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
Args:
|
|
406
|
+
input_folder (str): the folder to search for empty (i.e., box-less) Labelme .json files
|
|
407
|
+
recursive (bool, optional): whether to recurse into [input_folder]
|
|
408
|
+
|
|
409
|
+
Returns:
|
|
410
|
+
dict: a dict with fields:
|
|
411
|
+
- images_with_empty_json_files: a list of all image files in [input_folder] associated with
|
|
412
|
+
.json files that have no boxes in them
|
|
413
|
+
- images_with_no_json_files: a list of images in [input_folder] with no associated .json files
|
|
414
|
+
- images_with_non_empty_json_files: a list of images in [input_folder] associated with .json
|
|
415
|
+
files that have at least one box
|
|
375
416
|
"""
|
|
376
417
|
image_filenames_relative = path_utils.find_images(input_folder,recursive=True,
|
|
377
418
|
return_relative_paths=True)
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
labelme_to_yolo.py
|
|
4
|
+
|
|
5
|
+
Create YOLO .txt files in a folder containing labelme .json files.
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
8
|
|
|
9
9
|
#%% Imports
|
|
10
10
|
|
|
@@ -77,7 +77,7 @@ def labelme_file_to_yolo_file(labelme_file,
|
|
|
77
77
|
p0 = shape['points'][0]
|
|
78
78
|
p1 = shape['points'][1]
|
|
79
79
|
|
|
80
|
-
#
|
|
80
|
+
# Labelme: [[x0,y0],[x1,y1]] (arbitrarily sorted) (absolute coordinates)
|
|
81
81
|
#
|
|
82
82
|
# YOLO: [class, x_center, y_center, width, height] (normalized coordinates)
|
|
83
83
|
minx_abs = min(p0[0],p1[0])
|
|
File without changes
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
add_locations_to_island_camera_traps.py
|
|
4
|
+
|
|
5
|
+
The Island Conservation Camera Traps dataset had unique camera identifiers embedded
|
|
6
|
+
in filenames, but not in the proper metadata fields. This script copies that information
|
|
7
|
+
to metadata.
|
|
8
|
+
|
|
9
|
+
"""
|
|
10
10
|
|
|
11
11
|
#%% Imports and constants
|
|
12
12
|
|
|
@@ -1,147 +1,147 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
#%% Imports and constants
|
|
12
|
-
|
|
13
|
-
import os
|
|
14
|
-
import json
|
|
15
|
-
import shutil
|
|
16
|
-
|
|
17
|
-
from tqdm import tqdm
|
|
18
|
-
from collections import defaultdict
|
|
19
|
-
|
|
20
|
-
input_file = r'd:\lila\nacti\nacti_metadata.json.1.13\nacti_metadata.json'
|
|
21
|
-
output_file = r'g:\temp\nacti_metadata.1.14.json'
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
#%% Read metadata
|
|
25
|
-
|
|
26
|
-
with open(input_file,'r') as f:
|
|
27
|
-
d = json.load(f)
|
|
28
|
-
|
|
29
|
-
assert d['info']['version'] == 1.13
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
#%% Map images to locations (according to the metadata)
|
|
33
|
-
|
|
34
|
-
file_name_to_original_location = {}
|
|
35
|
-
|
|
36
|
-
# im = dataset_labels['images'][0]
|
|
37
|
-
for im in tqdm(d['images']):
|
|
38
|
-
file_name_to_original_location[im['file_name']] = im['location']
|
|
39
|
-
|
|
40
|
-
original_locations = set(file_name_to_original_location.values())
|
|
41
|
-
|
|
42
|
-
print('Found {} locations in the original metadata:'.format(len(original_locations)))
|
|
43
|
-
for loc in original_locations:
|
|
44
|
-
print('[{}]'.format(loc))
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
#%% Map images to new locations
|
|
48
|
-
|
|
49
|
-
def path_to_location(relative_path):
|
|
50
|
-
|
|
51
|
-
relative_path = relative_path.replace('\\','/')
|
|
52
|
-
if relative_path in file_name_to_original_location:
|
|
53
|
-
location_name = file_name_to_original_location[relative_path]
|
|
54
|
-
if location_name == 'San Juan Mntns, Colorado':
|
|
55
|
-
# "part0/sub000/2010_Unit150_Ivan097_img0003.jpg"
|
|
56
|
-
tokens = relative_path.split('/')[-1].split('_')
|
|
57
|
-
assert tokens[1].startswith('Unit')
|
|
58
|
-
location_name = 'sanjuan_{}_{}_{}'.format(tokens[0],tokens[1],tokens[2])
|
|
59
|
-
elif location_name == 'Lebec, California':
|
|
60
|
-
# "part0/sub035/CA-03_08_13_2015_CA-03_0009738.jpg"
|
|
61
|
-
tokens = relative_path.split('/')[-1].split('_')
|
|
62
|
-
assert tokens[0].startswith('CA-') or tokens[0].startswith('TAG-')
|
|
63
|
-
location_name = 'lebec_{}'.format(tokens[0])
|
|
64
|
-
elif location_name == 'Archbold, FL':
|
|
65
|
-
# "part1/sub110/FL-01_01_25_2016_FL-01_0040421.jpg"
|
|
66
|
-
tokens = relative_path.split('/')[-1].split('_')
|
|
67
|
-
assert tokens[0].startswith('FL-')
|
|
68
|
-
location_name = 'archbold_{}'.format(tokens[0])
|
|
69
|
-
else:
|
|
70
|
-
assert location_name == ''
|
|
71
|
-
tokens = relative_path.split('/')[-1].split('_')
|
|
72
|
-
if tokens[0].startswith('CA-') or tokens[0].startswith('TAG-') or tokens[0].startswith('FL-'):
|
|
73
|
-
location_name = '{}'.format(tokens[0])
|
|
74
|
-
|
|
75
|
-
else:
|
|
76
|
-
|
|
77
|
-
location_name = 'unknown'
|
|
78
|
-
|
|
79
|
-
# print('Returning location {} for file {}'.format(location_name,relative_path))
|
|
80
|
-
|
|
81
|
-
return location_name
|
|
82
|
-
|
|
83
|
-
file_name_to_updated_location = {}
|
|
84
|
-
updated_location_to_count = defaultdict(int)
|
|
85
|
-
for im in tqdm(d['images']):
|
|
86
|
-
|
|
87
|
-
updated_location = path_to_location(im['file_name'])
|
|
88
|
-
file_name_to_updated_location[im['file_name']] = updated_location
|
|
89
|
-
updated_location_to_count[updated_location] += 1
|
|
90
|
-
|
|
91
|
-
updated_location_to_count = {k: v for k, v in sorted(updated_location_to_count.items(),
|
|
92
|
-
key=lambda item: item[1],
|
|
93
|
-
reverse=True)}
|
|
94
|
-
|
|
95
|
-
updated_locations = set(file_name_to_updated_location.values())
|
|
96
|
-
|
|
97
|
-
print('Found {} updated locations in the original metadata:'.format(len(updated_locations)))
|
|
98
|
-
for loc in updated_location_to_count:
|
|
99
|
-
print('{}: {}'.format(loc,updated_location_to_count[loc]))
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
#%% Re-write metadata
|
|
103
|
-
|
|
104
|
-
for im in d['images']:
|
|
105
|
-
im['location'] = file_name_to_updated_location[im['file_name']]
|
|
106
|
-
d['info']['version'] = 1.14
|
|
107
|
-
|
|
108
|
-
with open(output_file,'w') as f:
|
|
109
|
-
json.dump(d,f,indent=1)
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
#%% For each location, sample some random images to make sure they look consistent
|
|
113
|
-
|
|
114
|
-
input_base = r'd:\lila\nacti-unzipped'
|
|
115
|
-
assert os.path.isdir(input_base)
|
|
116
|
-
|
|
117
|
-
location_to_images = defaultdict(list)
|
|
118
|
-
|
|
119
|
-
for im in d['images']:
|
|
120
|
-
location_to_images[im['location']].append(im)
|
|
121
|
-
|
|
122
|
-
n_to_sample = 10
|
|
123
|
-
import random
|
|
124
|
-
random.seed(0)
|
|
125
|
-
sampling_folder_base = r'g:\temp\nacti_samples'
|
|
126
|
-
|
|
127
|
-
for location in tqdm(location_to_images):
|
|
128
|
-
|
|
129
|
-
images_this_location = location_to_images[location]
|
|
130
|
-
if len(images_this_location) > n_to_sample:
|
|
131
|
-
images_this_location = random.sample(images_this_location,n_to_sample)
|
|
132
|
-
|
|
133
|
-
for i_image,im in enumerate(images_this_location):
|
|
134
|
-
|
|
135
|
-
fn_relative = im['file_name']
|
|
136
|
-
source_fn_abs = os.path.join(input_base,fn_relative)
|
|
137
|
-
assert os.path.isfile(source_fn_abs)
|
|
138
|
-
ext = os.path.splitext(fn_relative)[1]
|
|
139
|
-
target_fn_abs = os.path.join(sampling_folder_base,'{}/{}'.format(
|
|
140
|
-
location,'image_{}{}'.format(str(i_image).zfill(2),ext)))
|
|
141
|
-
os.makedirs(os.path.dirname(target_fn_abs),exist_ok=True)
|
|
142
|
-
shutil.copyfile(source_fn_abs,target_fn_abs)
|
|
143
|
-
|
|
144
|
-
# ...for each image
|
|
145
|
-
|
|
146
|
-
# ...for each location
|
|
147
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
add_locations_to_nacti.py
|
|
4
|
+
|
|
5
|
+
As of 10.2023, NACTI metadata only has very coarse location information (e.g. "Florida"),
|
|
6
|
+
but camera IDs are embedded in filenames. This script pulls that information from filenames
|
|
7
|
+
and adds it to metadata.
|
|
8
|
+
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
#%% Imports and constants
|
|
12
|
+
|
|
13
|
+
import os
|
|
14
|
+
import json
|
|
15
|
+
import shutil
|
|
16
|
+
|
|
17
|
+
from tqdm import tqdm
|
|
18
|
+
from collections import defaultdict
|
|
19
|
+
|
|
20
|
+
input_file = r'd:\lila\nacti\nacti_metadata.json.1.13\nacti_metadata.json'
|
|
21
|
+
output_file = r'g:\temp\nacti_metadata.1.14.json'
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
#%% Read metadata
|
|
25
|
+
|
|
26
|
+
with open(input_file,'r') as f:
|
|
27
|
+
d = json.load(f)
|
|
28
|
+
|
|
29
|
+
assert d['info']['version'] == 1.13
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
#%% Map images to locations (according to the metadata)
|
|
33
|
+
|
|
34
|
+
file_name_to_original_location = {}
|
|
35
|
+
|
|
36
|
+
# im = dataset_labels['images'][0]
|
|
37
|
+
for im in tqdm(d['images']):
|
|
38
|
+
file_name_to_original_location[im['file_name']] = im['location']
|
|
39
|
+
|
|
40
|
+
original_locations = set(file_name_to_original_location.values())
|
|
41
|
+
|
|
42
|
+
print('Found {} locations in the original metadata:'.format(len(original_locations)))
|
|
43
|
+
for loc in original_locations:
|
|
44
|
+
print('[{}]'.format(loc))
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
#%% Map images to new locations
|
|
48
|
+
|
|
49
|
+
def path_to_location(relative_path):
|
|
50
|
+
|
|
51
|
+
relative_path = relative_path.replace('\\','/')
|
|
52
|
+
if relative_path in file_name_to_original_location:
|
|
53
|
+
location_name = file_name_to_original_location[relative_path]
|
|
54
|
+
if location_name == 'San Juan Mntns, Colorado':
|
|
55
|
+
# "part0/sub000/2010_Unit150_Ivan097_img0003.jpg"
|
|
56
|
+
tokens = relative_path.split('/')[-1].split('_')
|
|
57
|
+
assert tokens[1].startswith('Unit')
|
|
58
|
+
location_name = 'sanjuan_{}_{}_{}'.format(tokens[0],tokens[1],tokens[2])
|
|
59
|
+
elif location_name == 'Lebec, California':
|
|
60
|
+
# "part0/sub035/CA-03_08_13_2015_CA-03_0009738.jpg"
|
|
61
|
+
tokens = relative_path.split('/')[-1].split('_')
|
|
62
|
+
assert tokens[0].startswith('CA-') or tokens[0].startswith('TAG-')
|
|
63
|
+
location_name = 'lebec_{}'.format(tokens[0])
|
|
64
|
+
elif location_name == 'Archbold, FL':
|
|
65
|
+
# "part1/sub110/FL-01_01_25_2016_FL-01_0040421.jpg"
|
|
66
|
+
tokens = relative_path.split('/')[-1].split('_')
|
|
67
|
+
assert tokens[0].startswith('FL-')
|
|
68
|
+
location_name = 'archbold_{}'.format(tokens[0])
|
|
69
|
+
else:
|
|
70
|
+
assert location_name == ''
|
|
71
|
+
tokens = relative_path.split('/')[-1].split('_')
|
|
72
|
+
if tokens[0].startswith('CA-') or tokens[0].startswith('TAG-') or tokens[0].startswith('FL-'):
|
|
73
|
+
location_name = '{}'.format(tokens[0])
|
|
74
|
+
|
|
75
|
+
else:
|
|
76
|
+
|
|
77
|
+
location_name = 'unknown'
|
|
78
|
+
|
|
79
|
+
# print('Returning location {} for file {}'.format(location_name,relative_path))
|
|
80
|
+
|
|
81
|
+
return location_name
|
|
82
|
+
|
|
83
|
+
file_name_to_updated_location = {}
|
|
84
|
+
updated_location_to_count = defaultdict(int)
|
|
85
|
+
for im in tqdm(d['images']):
|
|
86
|
+
|
|
87
|
+
updated_location = path_to_location(im['file_name'])
|
|
88
|
+
file_name_to_updated_location[im['file_name']] = updated_location
|
|
89
|
+
updated_location_to_count[updated_location] += 1
|
|
90
|
+
|
|
91
|
+
updated_location_to_count = {k: v for k, v in sorted(updated_location_to_count.items(),
|
|
92
|
+
key=lambda item: item[1],
|
|
93
|
+
reverse=True)}
|
|
94
|
+
|
|
95
|
+
updated_locations = set(file_name_to_updated_location.values())
|
|
96
|
+
|
|
97
|
+
print('Found {} updated locations in the original metadata:'.format(len(updated_locations)))
|
|
98
|
+
for loc in updated_location_to_count:
|
|
99
|
+
print('{}: {}'.format(loc,updated_location_to_count[loc]))
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
#%% Re-write metadata
|
|
103
|
+
|
|
104
|
+
for im in d['images']:
|
|
105
|
+
im['location'] = file_name_to_updated_location[im['file_name']]
|
|
106
|
+
d['info']['version'] = 1.14
|
|
107
|
+
|
|
108
|
+
with open(output_file,'w') as f:
|
|
109
|
+
json.dump(d,f,indent=1)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
#%% For each location, sample some random images to make sure they look consistent
|
|
113
|
+
|
|
114
|
+
input_base = r'd:\lila\nacti-unzipped'
|
|
115
|
+
assert os.path.isdir(input_base)
|
|
116
|
+
|
|
117
|
+
location_to_images = defaultdict(list)
|
|
118
|
+
|
|
119
|
+
for im in d['images']:
|
|
120
|
+
location_to_images[im['location']].append(im)
|
|
121
|
+
|
|
122
|
+
n_to_sample = 10
|
|
123
|
+
import random
|
|
124
|
+
random.seed(0)
|
|
125
|
+
sampling_folder_base = r'g:\temp\nacti_samples'
|
|
126
|
+
|
|
127
|
+
for location in tqdm(location_to_images):
|
|
128
|
+
|
|
129
|
+
images_this_location = location_to_images[location]
|
|
130
|
+
if len(images_this_location) > n_to_sample:
|
|
131
|
+
images_this_location = random.sample(images_this_location,n_to_sample)
|
|
132
|
+
|
|
133
|
+
for i_image,im in enumerate(images_this_location):
|
|
134
|
+
|
|
135
|
+
fn_relative = im['file_name']
|
|
136
|
+
source_fn_abs = os.path.join(input_base,fn_relative)
|
|
137
|
+
assert os.path.isfile(source_fn_abs)
|
|
138
|
+
ext = os.path.splitext(fn_relative)[1]
|
|
139
|
+
target_fn_abs = os.path.join(sampling_folder_base,'{}/{}'.format(
|
|
140
|
+
location,'image_{}{}'.format(str(i_image).zfill(2),ext)))
|
|
141
|
+
os.makedirs(os.path.dirname(target_fn_abs),exist_ok=True)
|
|
142
|
+
shutil.copyfile(source_fn_abs,target_fn_abs)
|
|
143
|
+
|
|
144
|
+
# ...for each image
|
|
145
|
+
|
|
146
|
+
# ...for each location
|
|
147
|
+
|
|
@@ -1,16 +1,16 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
create_lila_blank_set.py
|
|
4
|
+
|
|
5
|
+
Create a folder of blank images sampled from LILA. We'll aim for diversity, so less-common
|
|
6
|
+
locations will be oversampled relative to more common locations. We'll also run MegaDetector
|
|
7
|
+
(with manual review) to remove some incorrectly-labeled, not-actually-empty images from our
|
|
8
|
+
blank set.
|
|
9
|
+
|
|
10
|
+
We'll store location information for each image in a .json file, so we can split locations
|
|
11
|
+
into train/val in downstream tasks.
|
|
12
|
+
|
|
13
|
+
"""
|
|
14
14
|
|
|
15
15
|
#%% Constants and imports
|
|
16
16
|
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
create_lila_test_set.py
|
|
4
|
+
|
|
5
|
+
Create a test set of camera trap images, containing N empty and N non-empty
|
|
6
|
+
images from each LILA data set.
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
9
|
|
|
10
10
|
#%% Constants and imports
|
|
11
11
|
|