PyPI - megadetector - Versions diffs - 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl - Mend

megadetector 5.0.28py3-none-any.whl → 10.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (197) hide show

megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
megadetector/classification/aggregate_classifier_probs.py +3 -3
megadetector/classification/analyze_failed_images.py +5 -5
megadetector/classification/cache_batchapi_outputs.py +5 -5
megadetector/classification/create_classification_dataset.py +11 -12
megadetector/classification/crop_detections.py +10 -10
megadetector/classification/csv_to_json.py +8 -8
megadetector/classification/detect_and_crop.py +13 -15
megadetector/classification/efficientnet/model.py +8 -8
megadetector/classification/efficientnet/utils.py +6 -5
megadetector/classification/evaluate_model.py +7 -7
megadetector/classification/identify_mislabeled_candidates.py +6 -6
megadetector/classification/json_to_azcopy_list.py +1 -1
megadetector/classification/json_validator.py +29 -32
megadetector/classification/map_classification_categories.py +9 -9
megadetector/classification/merge_classification_detection_output.py +12 -9
megadetector/classification/prepare_classification_script.py +19 -19
megadetector/classification/prepare_classification_script_mc.py +26 -26
megadetector/classification/run_classifier.py +4 -4
megadetector/classification/save_mislabeled.py +6 -6
megadetector/classification/train_classifier.py +1 -1
megadetector/classification/train_classifier_tf.py +9 -9
megadetector/classification/train_utils.py +10 -10
megadetector/data_management/annotations/annotation_constants.py +1 -2
megadetector/data_management/camtrap_dp_to_coco.py +79 -46
megadetector/data_management/cct_json_utils.py +103 -103
megadetector/data_management/cct_to_md.py +49 -49
megadetector/data_management/cct_to_wi.py +33 -33
megadetector/data_management/coco_to_labelme.py +75 -75
megadetector/data_management/coco_to_yolo.py +210 -193
megadetector/data_management/databases/add_width_and_height_to_db.py +86 -12
megadetector/data_management/databases/combine_coco_camera_traps_files.py +40 -40
megadetector/data_management/databases/integrity_check_json_db.py +228 -200
megadetector/data_management/databases/subset_json_db.py +33 -33
megadetector/data_management/generate_crops_from_cct.py +88 -39
megadetector/data_management/get_image_sizes.py +54 -49
megadetector/data_management/labelme_to_coco.py +133 -125
megadetector/data_management/labelme_to_yolo.py +159 -73
megadetector/data_management/lila/create_lila_blank_set.py +81 -83
megadetector/data_management/lila/create_lila_test_set.py +32 -31
megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
megadetector/data_management/lila/download_lila_subset.py +21 -24
megadetector/data_management/lila/generate_lila_per_image_labels.py +365 -107
megadetector/data_management/lila/get_lila_annotation_counts.py +35 -33
megadetector/data_management/lila/get_lila_image_counts.py +22 -22
megadetector/data_management/lila/lila_common.py +73 -70
megadetector/data_management/lila/test_lila_metadata_urls.py +28 -19
megadetector/data_management/mewc_to_md.py +344 -340
megadetector/data_management/ocr_tools.py +262 -255
megadetector/data_management/read_exif.py +249 -227
megadetector/data_management/remap_coco_categories.py +90 -28
megadetector/data_management/remove_exif.py +81 -21
megadetector/data_management/rename_images.py +187 -187
megadetector/data_management/resize_coco_dataset.py +588 -120
megadetector/data_management/speciesnet_to_md.py +41 -41
megadetector/data_management/wi_download_csv_to_coco.py +55 -55
megadetector/data_management/yolo_output_to_md_output.py +248 -122
megadetector/data_management/yolo_to_coco.py +333 -191
megadetector/detection/change_detection.py +832 -0
megadetector/detection/process_video.py +340 -337
megadetector/detection/pytorch_detector.py +358 -278
megadetector/detection/run_detector.py +399 -186
megadetector/detection/run_detector_batch.py +404 -377
megadetector/detection/run_inference_with_yolov5_val.py +340 -327
megadetector/detection/run_tiled_inference.py +257 -249
megadetector/detection/tf_detector.py +24 -24
megadetector/detection/video_utils.py +332 -295
megadetector/postprocessing/add_max_conf.py +19 -11
megadetector/postprocessing/categorize_detections_by_size.py +45 -45
megadetector/postprocessing/classification_postprocessing.py +468 -433
megadetector/postprocessing/combine_batch_outputs.py +23 -23
megadetector/postprocessing/compare_batch_results.py +590 -525
megadetector/postprocessing/convert_output_format.py +106 -102
megadetector/postprocessing/create_crop_folder.py +347 -147
megadetector/postprocessing/detector_calibration.py +173 -168
megadetector/postprocessing/generate_csv_report.py +508 -499
megadetector/postprocessing/load_api_results.py +48 -27
megadetector/postprocessing/md_to_coco.py +133 -102
megadetector/postprocessing/md_to_labelme.py +107 -90
megadetector/postprocessing/md_to_wi.py +40 -40
megadetector/postprocessing/merge_detections.py +92 -114
megadetector/postprocessing/postprocess_batch_results.py +319 -301
megadetector/postprocessing/remap_detection_categories.py +91 -38
megadetector/postprocessing/render_detection_confusion_matrix.py +214 -205
megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +704 -679
megadetector/postprocessing/separate_detections_into_folders.py +226 -211
megadetector/postprocessing/subset_json_detector_output.py +265 -262
megadetector/postprocessing/top_folders_to_bottom.py +45 -45
megadetector/postprocessing/validate_batch_results.py +70 -70
megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
megadetector/taxonomy_mapping/map_new_lila_datasets.py +18 -19
megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +54 -33
megadetector/taxonomy_mapping/preview_lila_taxonomy.py +67 -67
megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
megadetector/taxonomy_mapping/simple_image_download.py +8 -8
megadetector/taxonomy_mapping/species_lookup.py +156 -74
megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
megadetector/utils/ct_utils.py +1049 -211
megadetector/utils/directory_listing.py +21 -77
megadetector/utils/gpu_test.py +22 -22
megadetector/utils/md_tests.py +632 -529
megadetector/utils/path_utils.py +1520 -431
megadetector/utils/process_utils.py +41 -41
megadetector/utils/split_locations_into_train_val.py +62 -62
megadetector/utils/string_utils.py +148 -27
megadetector/utils/url_utils.py +489 -176
megadetector/utils/wi_utils.py +2658 -2526
megadetector/utils/write_html_image_list.py +137 -137
megadetector/visualization/plot_utils.py +34 -30
megadetector/visualization/render_images_with_thumbnails.py +39 -74
megadetector/visualization/visualization_utils.py +487 -435
megadetector/visualization/visualize_db.py +232 -198
megadetector/visualization/visualize_detector_output.py +82 -76
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/METADATA +5 -2
megadetector-10.0.0.dist-info/RECORD +139 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/WHEEL +1 -1
megadetector/api/batch_processing/api_core/__init__.py +0 -0
megadetector/api/batch_processing/api_core/batch_service/__init__.py +0 -0
megadetector/api/batch_processing/api_core/batch_service/score.py +0 -439
megadetector/api/batch_processing/api_core/server.py +0 -294
megadetector/api/batch_processing/api_core/server_api_config.py +0 -97
megadetector/api/batch_processing/api_core/server_app_config.py +0 -55
megadetector/api/batch_processing/api_core/server_batch_job_manager.py +0 -220
megadetector/api/batch_processing/api_core/server_job_status_table.py +0 -149
megadetector/api/batch_processing/api_core/server_orchestration.py +0 -360
megadetector/api/batch_processing/api_core/server_utils.py +0 -88
megadetector/api/batch_processing/api_core_support/__init__.py +0 -0
megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +0 -46
megadetector/api/batch_processing/api_support/__init__.py +0 -0
megadetector/api/batch_processing/api_support/summarize_daily_activity.py +0 -152
megadetector/api/batch_processing/data_preparation/__init__.py +0 -0
megadetector/api/synchronous/__init__.py +0 -0
megadetector/api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +0 -151
megadetector/api/synchronous/api_core/animal_detection_api/api_frontend.py +0 -263
megadetector/api/synchronous/api_core/animal_detection_api/config.py +0 -35
megadetector/api/synchronous/api_core/tests/__init__.py +0 -0
megadetector/api/synchronous/api_core/tests/load_test.py +0 -110
megadetector/data_management/importers/add_nacti_sizes.py +0 -52
megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
megadetector/data_management/importers/awc_to_json.py +0 -191
megadetector/data_management/importers/bellevue_to_json.py +0 -272
megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
megadetector/data_management/importers/cct_field_adjustments.py +0 -58
megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
megadetector/data_management/importers/ena24_to_json.py +0 -276
megadetector/data_management/importers/filenames_to_json.py +0 -386
megadetector/data_management/importers/helena_to_cct.py +0 -283
megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
megadetector/data_management/importers/jb_csv_to_json.py +0 -150
megadetector/data_management/importers/mcgill_to_json.py +0 -250
megadetector/data_management/importers/missouri_to_json.py +0 -490
megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
megadetector/data_management/importers/noaa_seals_2019.py +0 -181
megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
megadetector/data_management/importers/pc_to_json.py +0 -365
megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
megadetector/data_management/importers/rspb_to_json.py +0 -356
megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
megadetector/data_management/importers/sulross_get_exif.py +0 -65
megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
megadetector/data_management/importers/ubc_to_json.py +0 -399
megadetector/data_management/importers/umn_to_json.py +0 -507
megadetector/data_management/importers/wellington_to_json.py +0 -263
megadetector/data_management/importers/wi_to_json.py +0 -442
megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
megadetector/utils/azure_utils.py +0 -178
megadetector/utils/sas_blob_utils.py +0 -509
megadetector-5.0.28.dist-info/RECORD +0 -209
/megadetector/{api/batch_processing/__init__.py → __init__.py} +0 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/licenses/LICENSE +0 -0
{megadetector-5.0.28.dist-info → megadetector-10.0.0.dist-info}/top_level.txt +0 -0

megadetector/data_management/importers/rspb_to_json.py DELETED Viewed

@@ -1,356 +0,0 @@
-"""
- rspb_to_json.py
- Convert the .csv file provided for the RSPB data set to a
- COCO-camera-traps .json file
-"""
-#%% Constants and environment
-import pandas as pd
-import os
-import glob
-import json
-import re
-import uuid
-import tqdm
-import time
-import ntpath
-import humanfriendly
-import PIL
-from megadetector.data_management.databases import integrity_check_json_db
-from megadetector.visualization import visualize_db
-# [location] is an obfuscation
-baseDir = r'e:\wildlife_data\rspb_gola_data'
-metadataFile = os.path.join(baseDir,'gola_camtrapr_master_renaming_table_2019-01-31.csv')
-outputFile = os.path.join(baseDir,'rspb_gola_labeled.json')
-imageBaseDir = os.path.join(baseDir,'gola_camtrapr_data')
-imageFlatDir = os.path.join(baseDir,'gola_camtrapr_data_flat')
-unmatchedImagesFile = os.path.join(baseDir,'unmatchedImages.txt')
-assert(os.path.isdir(imageBaseDir))
-#%% Create info struct
-info = {}
-info['year'] = 2019
-info['version'] = 1
-info['description'] = 'COCO style database for RSPB gola data'
-info['secondary_contributor'] = 'Converted to COCO .json by Dan Morris'
-info['contributor'] = 'RSPB'
-#%% Read source data
-metadataTable = pd.read_csv(metadataFile)
-print('Read {} columns and {} rows from metadata file'.format(len(metadataTable.columns),
-      len(metadataTable)))
-# metadataTable.columns.values
-#
-# array(['Project', 'inDir', 'FileName', 'Station', 'Camera',
-#        'StationCameraFileName', 'DateTimeOriginal', 'DateReadable',
-#        'outDir', 'filename_new', 'fileExistsAlready', 'CopyStatus',
-#        'Species'], dtype=object)
-metadataTable[['Species']] = metadataTable[['Species']].fillna(value='unlabeled')
-# We'll populate these later
-metadataTable['sequenceID'] = ''
-metadataTable['frameNumber'] = ''
-metadataTable['filePath'] = ''
-failedCopies = metadataTable[~metadataTable.CopyStatus]
-print('Removing {} rows that were failed copies'.format(len(failedCopies)))
-metadataTable = metadataTable[metadataTable.CopyStatus]
-species = list(metadataTable.Species)
-uniqueSpecies = set(species)
-print('Read {} unique species in {} rows'.format(len(uniqueSpecies),len(metadataTable)))
-speciesMappings = {}
-# keys should be lowercase
-speciesMappings['blank'] = 'empty'
-speciesMappings[''] = 'unlabeled'
-#%% Enumerate images, confirm filename uniqueness
-imageFullPaths = glob.glob(os.path.join(imageBaseDir,r'**\*.JPG'),recursive=True)
-print('Counted {} images'.format(len(imageFullPaths)))
-filenamesOnly = set()
-for p in imageFullPaths:
-    fn = ntpath.basename(p)
-    assert fn not in filenamesOnly
-    filenamesOnly.add(fn)
-print('Finished uniqueness checking')
-#%% Update metadata filenames to include site and camera folders, check existence
-#
-# Takes ~1min
-filenamesToRows = {}
-startTime = time.time()
-newRows = []
-matchFailures = []
-# iRow = 0; row = metadataTable.iloc[iRow]
-for iRow,row in tqdm.tqdm(metadataTable.iterrows(), total=metadataTable.shape[0]):
-    baseFn = row['filename_new']
-    station = row['Station']
-    filenamesToRows[baseFn] = iRow
-    # There's a bug in the metadata; the 'camera' column isn't correct.
-    # camera = row['Camera']
-    # These appear as, e.g., '3.22e12'
-    # camera = str(int(float(camera)))
-    # Let's pull this out of the file name instead
-    #
-    # Filenames look like one of the following:
-    #
-    # A1__03224850850507__2015-11-28__10-45-04(1).JPG
-    # Bayama2PH__C05__NA(NA).JPG
-    pat = '^(?P<station>.+?)__(?P<camera>.+?)__((?P<date>.+?)__)?(?P<time>[^_\()]+?)\((?P<frame>.+?)\)\.JPG'
-    match = re.match(pat,baseFn)
-    if match is None:
-        raise ValueError('Regex failure at row {}: {}'.format(iRow,baseFn))
-    assert(station == match.group('station'))
-    camera = match.group('camera')
-    row['Camera'] = camera
-    assert match.group('station') is not None
-    assert match.group('camera') is not None
-    assert match.group('frame') is not None
-    if match.group('date') is None:
-        imgDate = ''
-    else:
-        imgDate = match.group('date')
-    if match.group('time') is None:
-        imgTime = ''
-    else:
-        imgTime = match.group('time')
-    frame = -1
-    try:
-        frame = int(match.group['frame'])
-    except:
-        pass
-    row['frameNumber'] = frame
-    fn = os.path.join(station,camera,baseFn)
-    fullPath = os.path.join(imageBaseDir,fn)
-    row['filePath'] = fn
-    # assert(os.path.isfile(fullPath))
-    if not os.path.isfile(fullPath):
-        print('Failed to match image {}'.format(fullPath))
-        matchFailures.append(fullPath)
-        continue
-    # metadataTable.iloc[iRow] = row
-    newRows.append(row)
-elapsed = time.time() - startTime
-# Re-assemble into an updated table
-metadataTable = pd.DataFrame(newRows)
-print('Finished checking file existence, extracting metadata in {}, couldn''t find {} images'.format(
-      humanfriendly.format_timespan(elapsed),len(matchFailures)))
-#%% Check for images that aren't included in the metadata file
-imagesNotInMetadata = []
-# Enumerate all images
-for iImage,imagePath in enumerate(imageFullPaths):
-    fn = ntpath.basename(imagePath)
-    if(fn not in filenamesToRows):
-        imagesNotInMetadata.append(imagePath)
-print('Finished matching {} images, failed to match {}'.format(
-        len(imageFullPaths),len(imagesNotInMetadata)))
-# Write to a text file
-with open(unmatchedImagesFile, 'w') as f:
-    for fn in imagesNotInMetadata:
-        f.write('{}\n'.format(fn))
-#%% Create CCT dictionaries
-# Also gets image sizes, so this takes ~6 minutes
-#
-# Implicitly checks images for overt corruptness, i.e. by not crashing.
-images = []
-annotations = []
-# Map categories to integer IDs (that's what COCO likes)
-nextCategoryID = 1
-categoriesToCategoryId = {'empty':0}
-categoriesToCounts = {'empty':0}
-# For each image
-#
-# Because in practice images are 1:1 with annotations in this data set,
-# this is also a loop over annotations.
-startTime = time.time()
-# iRow = 0; row = metadataTable.iloc[iRow]
-for iRow,row in tqdm.tqdm(metadataTable.iterrows(), total=metadataTable.shape[0]):
-    im = {}
-    # A1__03224850850507__2015-11-28__10-45-04(1).JPG
-    fn = row['filename_new']
-    assert '.JPG' in fn
-    fn = fn.replace('.JPG','')
-    im['id'] = fn
-    # 'A1\\03224850850507\\A1__03224850850507__2015-11-28__10-45-04(1).JPG'
-    im['file_name'] = row['filePath']
-    # Not currently populated
-    im['seq_id'] = row['sequenceID']
-    # Often -1, sometimes a semi-meaningful int
-    im['frame_num'] = row['frameNumber']
-    # A1
-    im['site']= row['Station']
-    # 03224850850507
-    im['camera'] = row['Camera']
-    # In variable form, but sometimes '28/11/2015 10:45'
-    im['datetime'] = row['DateTimeOriginal']
-    images.append(im)
-    # Check image height and width
-    imagePath = os.path.join(imageBaseDir,im['file_name'])
-    assert(os.path.isfile(imagePath))
-    pilImage = PIL.Image.open(imagePath)
-    width, height = pilImage.size
-    im['width'] = width
-    im['height'] = height
-    category = row['Species'].lower()
-    if category in speciesMappings:
-        category = speciesMappings[category]
-    # Have we seen this category before?
-    if category in categoriesToCategoryId:
-        categoryID = categoriesToCategoryId[category]
-        categoriesToCounts[category] += 1
-    else:
-        categoryID = nextCategoryID
-        categoriesToCategoryId[category] = categoryID
-        categoriesToCounts[category] = 0
-        nextCategoryID += 1
-    # Create an annotation
-    ann = {}
-    # The Internet tells me this guarantees uniqueness to a reasonable extent, even
-    # beyond the sheer improbability of collisions.
-    ann['id'] = str(uuid.uuid1())
-    ann['image_id'] = im['id']
-    ann['category_id'] = categoryID
-    annotations.append(ann)
-# ...for each image
-# Convert categories to a CCT-style dictionary
-categories = []
-for category in categoriesToCounts:
-    print('Category {}, count {}'.format(category,categoriesToCounts[category]))
-    categoryID = categoriesToCategoryId[category]
-    cat = {}
-    cat['name'] = category
-    cat['id'] = categoryID
-    categories.append(cat)
-elapsed = time.time() - startTime
-print('Finished creating CCT dictionaries in {}'.format(
-      humanfriendly.format_timespan(elapsed)))
-#%% Write output
-json_data = {}
-json_data['images'] = images
-json_data['annotations'] = annotations
-json_data['categories'] = categories
-json_data['info'] = info
-json.dump(json_data,open(outputFile,'w'),indent=4)
-print('Finished writing .json file with {} images, {} annotations, and {} categories'.format(
-        len(images),len(annotations),len(categories)))
-#%% Check database integrity
-options = integrity_check_json_db.IntegrityCheckOptions()
-options.baseDir = imageBaseDir
-options.bCheckImageSizes = False
-options.bFindUnusedImages = False
-integrity_check_json_db.integrity_check_json_db(outputFile, options)
-#%% Preview a few images to make sure labels were passed along sensibly
-db_path = outputFile
-output_dir = os.path.join(baseDir,'label_preview')
-image_base_dir = imageBaseDir
-options = visualize_db.DbVizOptions()
-options.num_to_visualize = 100
-htmlOutputFile = visualize_db.visualize_db(db_path,output_dir,image_base_dir,options)
-#%% One-time processing step: copy images to a flat directory for annotation
-if False:
-    #%%
-    from shutil import copyfile
-    os.makedirs(imageFlatDir,exist_ok=True)
-    for sourcePath in tqdm.tqdm(imageFullPaths):
-        fn = ntpath.basename(sourcePath)
-        targetPath = os.path.join(imageFlatDir,fn)
-        assert not os.path.isfile(targetPath)
-        copyfile(sourcePath,targetPath)
-    print('Copied {} files'.format(len(imageFullPaths)))

megadetector/data_management/importers/save_the_elephants_survey_A.py DELETED Viewed

@@ -1,320 +0,0 @@
-"""
- save_the_elephants_survey_A.py
- Convert the .csv file provided for the Save the Elephants Survey A data set to a
- COCO-camera-traps .json file
-"""
-#%% Constants and environment
-import pandas as pd
-import os
-import json
-import uuid
-import time
-import humanfriendly
-import numpy as np
-from tqdm import tqdm
-from megadetector.utils.path_utils import find_images
-input_base = r'z:/ste_2019_08_drop'
-input_metadata_file = os.path.join(input_base,'SURVEY_A.xlsx')
-output_base = r'f:/save_the_elephants/survey_a'
-output_json_file = os.path.join(output_base,'ste_survey_a.json')
-image_directory = os.path.join(input_base,'SURVEY A with False Triggers')
-os.makedirs(output_base,exist_ok=True)
-assert(os.path.isdir(image_directory))
-assert(os.path.isfile(input_metadata_file))
-# Handle all unstructured fields in the source data as extra fields in the annotations
-mapped_fields = {'No. of Animals in Photo':'num_animals',
-                 'No. of new indiviauls (first sighting of new individual)':'num_new_individuals',
-                 'Number Adult Males (first sighting of new individual)':'num_adult_males',
-                 'Number Adult Females (first sighting of new individual)':'num_adult_females',
-                 'Number Adult Unknown (first sighting of new individual)':'num_adult_unknown',
-                 'Number Sub-adult Males (first sighting of new individual)':'num_subadult_males',
-                 'Number Sub-adult Females (first sighting of new individual)':'num_subadult_females',
-                 'Number Sub-adult Unknown (first sighting of new individual)':'num_subadult_unknown',
-                 'Number Juvenile (first sighting of new individual)':'num_juvenile',
-                 'Number Newborn (first sighting of new individual)':'num_newborn',
-                 'Activity':'activity',
-                 'Animal ID':'animal_id',
-                 'Specific Notes':'notes'}
-# photo_type really should be an image property, but there are a few conflicts
-# that forced me to handle it as an annotation proprerty
-mapped_fields['Photo Type '] = 'photo_type'
-#%% Read source data
-input_metadata = pd.read_excel(input_metadata_file, sheet_name='9. CT Image')
-input_metadata = input_metadata.iloc[2:]
-print('Read {} columns and {} rows from metadata file'.format(len(input_metadata.columns),
-      len(input_metadata)))
-#%% Map filenames to rows, verify image existence
-start_time = time.time()
-# Maps relative paths to row indices in input_metadata
-filenames_to_rows = {}
-filenames_with_multiple_annotations = []
-missing_images = []
-# Build up a map from filenames to a list of rows, checking image existence as we go
-for i_row, fn in tqdm(enumerate(input_metadata['Image Name']),total=len(input_metadata)):
-    # Ignore directories
-    if not fn.endswith('.JPG'):
-        continue
-    if fn in filenames_to_rows:
-        filenames_with_multiple_annotations.append(fn)
-        filenames_to_rows[fn].append(i_row)
-    else:
-        filenames_to_rows[fn] = [i_row]
-        image_path = os.path.join(image_directory, fn)
-        if not os.path.isfile(image_path):
-            missing_images.append(image_path)
-elapsed = time.time() - start_time
-print('Finished verifying image existence for {} files in {}, found {} filenames with multiple labels, {} missing images'.format(
-      len(filenames_to_rows), humanfriendly.format_timespan(elapsed),
-      len(filenames_with_multiple_annotations),len(missing_images)))
-#%% Make sure the multiple-annotation cases make sense
-if False:
-    #%%
-    fn = filenames_with_multiple_annotations[1000]
-    rows = filenames_to_rows[fn]
-    assert(len(rows) > 1)
-    for i_row in rows:
-        print(input_metadata.iloc[i_row]['Species'])
-#%% Check for images that aren't included in the metadata file
-# Enumerate all images
-image_full_paths = find_images(image_directory, bRecursive=True)
-unannotated_images = []
-for iImage, image_path in tqdm(enumerate(image_full_paths),total=len(image_full_paths)):
-    relative_path = os.path.relpath(image_path,image_directory)
-    if relative_path not in filenames_to_rows:
-        unannotated_images.append(relative_path)
-print('Finished checking {} images to make sure they\'re in the metadata, found {} unannotated images'.format(
-        len(image_full_paths),len(unannotated_images)))
-#%% Create CCT dictionaries
-images = []
-annotations = []
-categories = []
-image_ids_to_images = {}
-category_name_to_category = {}
-# Force the empty category to be ID 0
-empty_category = {}
-empty_category['name'] = 'empty'
-empty_category['id'] = 0
-category_name_to_category['empty'] = empty_category
-categories.append(empty_category)
-next_category_id = 1
-start_time = time.time()
-# i_image = 0; image_name = list(filenames_to_rows.keys())[i_image]
-for image_name in tqdm(list(filenames_to_rows.keys())):
-    # Example filename:
-    #
-    # 'Site 1_Oloisukut_1\Oloisukut_A11_UP\Service_2\100EK113\EK001382.JPG'
-    # 'Site 1_Oloisukut_1\Oloisukut_A11_UP\Service_2.1\100EK113\EK001382.JPG'
-    img_id = image_name.replace('\\','/').replace('/','_').replace(' ','_')
-    row_indices = filenames_to_rows[image_name]
-    # i_row = row_indices[0]
-    for i_row in row_indices:
-        row = input_metadata.iloc[i_row]
-        assert(row['Image Name'] == image_name)
-        timestamp = row['Date'].strftime("%d/%m/%Y")
-        station_label = row['Camera Trap Station Label']
-        photo_type = row['Photo Type ']
-        if isinstance(photo_type,float):
-            photo_type = ''
-        photo_type = photo_type.strip().lower()
-        if img_id in image_ids_to_images:
-            im = image_ids_to_images[img_id]
-            assert im['file_name'] == image_name
-            assert im['station_label'] == station_label
-            # There are a small handful of datetime mismatches across annotations
-            # for the same image
-            # assert im['datetime'] == timestamp
-            if im['datetime'] != timestamp:
-                print('Warning: timestamp conflict for image {}: {},{}'.format(
-                    image_name,im['datetime'],timestamp))
-        else:
-            im = {}
-            im['id'] = img_id
-            im['file_name'] = image_name
-            im['datetime'] = timestamp
-            im['station_label'] = station_label
-            im['photo_type'] = photo_type
-            image_ids_to_images[img_id] = im
-            images.append(im)
-        species = row['Species']
-        if (isinstance(species,float) or \
-            (isinstance(species,str) and (len(species) == 0))):
-            category_name = 'empty'
-        else:
-            category_name = species
-        # Special cases based on the 'photo type' field
-        if 'vehicle' in photo_type:
-            category_name = 'vehicle'
-        # Various spellings of 'community'
-        elif 'comm' in photo_type:
-            category_name = 'human'
-        elif 'camera' in photo_type or 'researcher' in photo_type:
-            category_name = 'human'
-        elif 'livestock' in photo_type:
-            category_name = 'livestock'
-        elif 'blank' in photo_type:
-            category_name = 'empty'
-        elif 'plant movement' in photo_type:
-            category_name = 'empty'
-        category_name = category_name.strip().lower()
-        # Have we seen this category before?
-        if category_name in category_name_to_category:
-            category_id = category_name_to_category[category_name]['id']
-        else:
-            category_id = next_category_id
-            category = {}
-            category['id'] = category_id
-            category['name'] = category_name
-            category_name_to_category[category_name] = category
-            categories.append(category)
-            next_category_id += 1
-        # Create an annotation
-        ann = {}
-        ann['id'] = str(uuid.uuid1())
-        ann['image_id'] = im['id']
-        ann['category_id'] = category_id
-        # fieldname = list(mapped_fields.keys())[0]
-        for fieldname in mapped_fields:
-            target_field = mapped_fields[fieldname]
-            val = row[fieldname]
-            if isinstance(val,float) and np.isnan(val):
-                val = ''
-            else:
-                val = str(val).strip()
-            ann[target_field] = val
-        annotations.append(ann)
-    # ...for each row
-# ...for each image
-print('Finished creating CCT dictionaries in {}'.format(
-      humanfriendly.format_timespan(elapsed)))
-#%% Create info struct
-info = {}
-info['year'] = 2019
-info['version'] = 1
-info['description'] = 'Save the Elephants Survey A'
-info['contributor'] = 'Save the Elephants'
-#%% Write output
-json_data = {}
-json_data['images'] = images
-json_data['annotations'] = annotations
-json_data['categories'] = categories
-json_data['info'] = info
-json.dump(json_data, open(output_json_file, 'w'), indent=2)
-print('Finished writing .json file with {} images, {} annotations, and {} categories'.format(
-        len(images),len(annotations),len(categories)))
-#%% Validate output
-from megadetector.data_management.databases import integrity_check_json_db
-options = integrity_check_json_db.IntegrityCheckOptions()
-options.baseDir = image_directory
-options.bCheckImageSizes = False
-options.bCheckImageExistence = False
-options.bFindUnusedImages = False
-sortedCategories, data = integrity_check_json_db.integrity_check_json_db(output_json_file,options)
-#%% Preview labels
-from megadetector.visualization import visualize_db
-from megadetector.data_management.databases import integrity_check_json_db
-viz_options = visualize_db.DbVizOptions()
-viz_options.num_to_visualize = 1000
-viz_options.trim_to_images_with_bboxes = False
-viz_options.add_search_links = True
-viz_options.sort_by_filename = False
-viz_options.parallelize_rendering = True
-html_output_file,image_db = visualize_db.visualize_db(db_path=output_json_file,
-                                                        output_dir=os.path.join(output_base,'preview'),
-                                                        image_base_dir=image_directory,
-                                                        options=viz_options)
-os.startfile(html_output_file)
-#%% Scrap
-if False:
-    pass
-    #%% Find unique photo types
-    annotations = image_db['annotations']
-    photo_types = set()
-    for ann in tqdm(annotations):
-        photo_types.add(ann['photo_type'])

megadetector 5.0.28__py3-none-any.whl → 10.0.0__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.28py3-none-any.whl → 10.0.0py3-none-any.whl