PyPI - megadetector - Versions diffs - 5.0.8__py3-none-any.whl → 5.0.10__py3-none-any.whl - Mend

megadetector 5.0.8py3-none-any.whl → 5.0.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megadetector might be problematic. Click here for more details.

Files changed (190) hide show

api/__init__.py +0 -0
api/batch_processing/__init__.py +0 -0
api/batch_processing/api_core/__init__.py +0 -0
api/batch_processing/api_core/batch_service/__init__.py +0 -0
api/batch_processing/api_core/batch_service/score.py +0 -1
api/batch_processing/api_core/server_job_status_table.py +0 -1
api/batch_processing/api_core_support/__init__.py +0 -0
api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
api/batch_processing/api_support/__init__.py +0 -0
api/batch_processing/api_support/summarize_daily_activity.py +0 -1
api/batch_processing/data_preparation/__init__.py +0 -0
api/batch_processing/data_preparation/manage_local_batch.py +65 -65
api/batch_processing/data_preparation/manage_video_batch.py +8 -8
api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
api/batch_processing/postprocessing/__init__.py +0 -0
api/batch_processing/postprocessing/add_max_conf.py +12 -12
api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
api/batch_processing/postprocessing/combine_api_outputs.py +68 -54
api/batch_processing/postprocessing/compare_batch_results.py +113 -43
api/batch_processing/postprocessing/convert_output_format.py +41 -16
api/batch_processing/postprocessing/load_api_results.py +16 -17
api/batch_processing/postprocessing/md_to_coco.py +31 -21
api/batch_processing/postprocessing/md_to_labelme.py +52 -22
api/batch_processing/postprocessing/merge_detections.py +14 -14
api/batch_processing/postprocessing/postprocess_batch_results.py +246 -174
api/batch_processing/postprocessing/remap_detection_categories.py +32 -25
api/batch_processing/postprocessing/render_detection_confusion_matrix.py +60 -27
api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +242 -158
api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
api/synchronous/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
api/synchronous/api_core/animal_detection_api/config.py +35 -35
api/synchronous/api_core/tests/__init__.py +0 -0
api/synchronous/api_core/tests/load_test.py +109 -109
classification/__init__.py +0 -0
classification/aggregate_classifier_probs.py +21 -24
classification/analyze_failed_images.py +11 -13
classification/cache_batchapi_outputs.py +51 -51
classification/create_classification_dataset.py +69 -68
classification/crop_detections.py +54 -53
classification/csv_to_json.py +97 -100
classification/detect_and_crop.py +105 -105
classification/evaluate_model.py +43 -42
classification/identify_mislabeled_candidates.py +47 -46
classification/json_to_azcopy_list.py +10 -10
classification/json_validator.py +72 -71
classification/map_classification_categories.py +44 -43
classification/merge_classification_detection_output.py +68 -68
classification/prepare_classification_script.py +157 -154
classification/prepare_classification_script_mc.py +228 -228
classification/run_classifier.py +27 -26
classification/save_mislabeled.py +30 -30
classification/train_classifier.py +20 -20
classification/train_classifier_tf.py +21 -22
classification/train_utils.py +10 -10
data_management/__init__.py +0 -0
data_management/annotations/__init__.py +0 -0
data_management/annotations/annotation_constants.py +18 -31
data_management/camtrap_dp_to_coco.py +238 -0
data_management/cct_json_utils.py +102 -59
data_management/cct_to_md.py +176 -158
data_management/cct_to_wi.py +247 -219
data_management/coco_to_labelme.py +272 -263
data_management/coco_to_yolo.py +79 -58
data_management/databases/__init__.py +0 -0
data_management/databases/add_width_and_height_to_db.py +20 -16
data_management/databases/combine_coco_camera_traps_files.py +35 -31
data_management/databases/integrity_check_json_db.py +62 -24
data_management/databases/subset_json_db.py +24 -15
data_management/generate_crops_from_cct.py +27 -45
data_management/get_image_sizes.py +188 -162
data_management/importers/add_nacti_sizes.py +8 -8
data_management/importers/add_timestamps_to_icct.py +78 -78
data_management/importers/animl_results_to_md_results.py +158 -158
data_management/importers/auckland_doc_test_to_json.py +9 -9
data_management/importers/auckland_doc_to_json.py +8 -8
data_management/importers/awc_to_json.py +7 -7
data_management/importers/bellevue_to_json.py +15 -15
data_management/importers/cacophony-thermal-importer.py +13 -13
data_management/importers/carrizo_shrubfree_2018.py +8 -8
data_management/importers/carrizo_trail_cam_2017.py +8 -8
data_management/importers/cct_field_adjustments.py +9 -9
data_management/importers/channel_islands_to_cct.py +10 -10
data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
data_management/importers/ena24_to_json.py +7 -7
data_management/importers/filenames_to_json.py +8 -8
data_management/importers/helena_to_cct.py +7 -7
data_management/importers/idaho-camera-traps.py +7 -7
data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
data_management/importers/jb_csv_to_json.py +9 -9
data_management/importers/mcgill_to_json.py +8 -8
data_management/importers/missouri_to_json.py +18 -18
data_management/importers/nacti_fieldname_adjustments.py +10 -10
data_management/importers/noaa_seals_2019.py +7 -7
data_management/importers/pc_to_json.py +7 -7
data_management/importers/plot_wni_giraffes.py +7 -7
data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
data_management/importers/prepare_zsl_imerit.py +7 -7
data_management/importers/rspb_to_json.py +8 -8
data_management/importers/save_the_elephants_survey_A.py +8 -8
data_management/importers/save_the_elephants_survey_B.py +9 -9
data_management/importers/snapshot_safari_importer.py +26 -26
data_management/importers/snapshot_safari_importer_reprise.py +665 -665
data_management/importers/snapshot_serengeti_lila.py +14 -14
data_management/importers/sulross_get_exif.py +8 -9
data_management/importers/timelapse_csv_set_to_json.py +11 -11
data_management/importers/ubc_to_json.py +13 -13
data_management/importers/umn_to_json.py +7 -7
data_management/importers/wellington_to_json.py +8 -8
data_management/importers/wi_to_json.py +9 -9
data_management/importers/zamba_results_to_md_results.py +181 -181
data_management/labelme_to_coco.py +65 -24
data_management/labelme_to_yolo.py +8 -8
data_management/lila/__init__.py +0 -0
data_management/lila/add_locations_to_island_camera_traps.py +9 -9
data_management/lila/add_locations_to_nacti.py +147 -147
data_management/lila/create_lila_blank_set.py +13 -13
data_management/lila/create_lila_test_set.py +8 -8
data_management/lila/create_links_to_md_results_files.py +106 -106
data_management/lila/download_lila_subset.py +44 -110
data_management/lila/generate_lila_per_image_labels.py +55 -42
data_management/lila/get_lila_annotation_counts.py +18 -15
data_management/lila/get_lila_image_counts.py +11 -11
data_management/lila/lila_common.py +96 -33
data_management/lila/test_lila_metadata_urls.py +132 -116
data_management/ocr_tools.py +173 -128
data_management/read_exif.py +110 -97
data_management/remap_coco_categories.py +83 -83
data_management/remove_exif.py +58 -62
data_management/resize_coco_dataset.py +30 -23
data_management/wi_download_csv_to_coco.py +246 -239
data_management/yolo_output_to_md_output.py +86 -73
data_management/yolo_to_coco.py +300 -60
detection/__init__.py +0 -0
detection/detector_training/__init__.py +0 -0
detection/process_video.py +85 -33
detection/pytorch_detector.py +43 -25
detection/run_detector.py +157 -72
detection/run_detector_batch.py +179 -113
detection/run_inference_with_yolov5_val.py +108 -48
detection/run_tiled_inference.py +111 -40
detection/tf_detector.py +51 -29
detection/video_utils.py +606 -521
docs/source/conf.py +43 -0
md_utils/__init__.py +0 -0
md_utils/azure_utils.py +9 -9
md_utils/ct_utils.py +228 -68
md_utils/directory_listing.py +59 -64
md_utils/md_tests.py +968 -871
md_utils/path_utils.py +460 -134
md_utils/process_utils.py +157 -133
md_utils/sas_blob_utils.py +20 -20
md_utils/split_locations_into_train_val.py +45 -32
md_utils/string_utils.py +33 -10
md_utils/url_utils.py +176 -60
md_utils/write_html_image_list.py +40 -33
md_visualization/__init__.py +0 -0
md_visualization/plot_utils.py +102 -109
md_visualization/render_images_with_thumbnails.py +34 -34
md_visualization/visualization_utils.py +597 -291
md_visualization/visualize_db.py +76 -48
md_visualization/visualize_detector_output.py +61 -42
{megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/METADATA +13 -7
megadetector-5.0.10.dist-info/RECORD +224 -0
{megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/top_level.txt +1 -0
taxonomy_mapping/__init__.py +0 -0
taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
taxonomy_mapping/map_new_lila_datasets.py +154 -154
taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
taxonomy_mapping/preview_lila_taxonomy.py +591 -591
taxonomy_mapping/retrieve_sample_image.py +12 -12
taxonomy_mapping/simple_image_download.py +11 -11
taxonomy_mapping/species_lookup.py +10 -10
taxonomy_mapping/taxonomy_csv_checker.py +18 -18
taxonomy_mapping/taxonomy_graph.py +47 -47
taxonomy_mapping/validate_lila_category_mappings.py +83 -76
data_management/cct_json_to_filename_json.py +0 -89
data_management/cct_to_csv.py +0 -140
data_management/databases/remove_corrupted_images_from_db.py +0 -191
detection/detector_training/copy_checkpoints.py +0 -43
megadetector-5.0.8.dist-info/RECORD +0 -205
{megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/LICENSE +0 -0
{megadetector-5.0.8.dist-info → megadetector-5.0.10.dist-info}/WHEEL +0 -0

data_management/importers/prepare-noaa-fish-data-for-lila.py CHANGED Viewed

@@ -1,359 +1,359 @@
-########
-#
-# Prepare a LILA-ready .json file for the NOAA Puget Sound Nearshore Fish dataset.
-#
-########
-#%% Constants and imports
-import os
-import json
-import uuid
-import pandas as pd
-from md_utils.path_utils import open_file
-base_folder = r'G:\temp\noaa'
-output_json_fn = os.path.join(base_folder,'noaa_estuary_fish.json')
-edited_image_folders = ['edited_clip_2017','edited_clip_2018']
-jpeg_image_folder = 'JPEGImages'
-metadata_file = 'MasterDataForMicrosoft.xlsx'
-#%% Enumerate files
-edited_image_files = []
-# edited_image_folder = edited_image_folders[0]
-for edited_image_folder in edited_image_folders:
-    folder_path = os.path.join(base_folder,edited_image_folder)
-    image_files = os.listdir(folder_path)
-    assert all([fn.endswith('.jpg') for fn in image_files])
-    edited_image_files.extend([os.path.join(folder_path,fn) for fn in image_files])
-jpeg_image_folder_files = os.listdir(os.path.join(base_folder,jpeg_image_folder))
-assert all([fn.endswith('.jpg') for fn in jpeg_image_folder_files])
-relative_edited_image_files_set = set()
-# fn = edited_image_files[0]
-for fn in edited_image_files:
-    bn = os.path.basename(fn)
-    assert bn not in relative_edited_image_files_set
-    relative_edited_image_files_set.add(bn)
-jpeg_image_folder_files_set = set(jpeg_image_folder_files)
-assert len(jpeg_image_folder_files_set) == len(relative_edited_image_files_set)
-assert jpeg_image_folder_files_set == relative_edited_image_files_set
-#%% Read metadata and capture location information
-df = pd.read_excel(os.path.join(base_folder,metadata_file))
-print('Read {} rows from metadata file'.format(len(df)))
-id_string_to_site = {}
-# i_row = 0; row = df.iloc[i_row]
-for i_row,row in df.iterrows():
-    assert row['sd'].lower().startswith('sd')
-    assert isinstance(row['id'],int) and row['id'] > 0 and row['id'] < 10000
-    date_string = row['date']
-    date_tokens = date_string.split('_')
-    # Sometimes '2017' was just '17' in the date column
-    if len(date_tokens[2]) != 4:
-        assert len(date_tokens[2]) == 2
-        date_tokens[2] = '20' + date_tokens[2]
-        date_string = '_'.join(date_tokens)
-    else:
-        assert date_tokens[2].startswith('201')
-    id_string = row['sd'].upper() + '_' + str(row['id']) + '_' + date_string
-    id_string_to_site[id_string] = row['site']
-print('Found {} unique locations'.format(len(pd.unique(df['site']))))
-#%% Read the .json files and build output dictionaries
-json_files = [fn for fn in os.listdir(base_folder) if (fn.endswith('.json') and (fn != os.path.basename(output_json_fn)))]
-json_files = [os.path.join(base_folder,fn) for fn in json_files]
-fn_to_image = {}
-annotations = []
-CATEGORY_ID_EMPTY = 0
-CATEGORY_ID_FISH = 1
-categories = [{'id':CATEGORY_ID_EMPTY,'name':'empty'},{'id':CATEGORY_ID_FISH,'name':'animal'}]
-empty_images = set()
-non_empty_images = set()
-n_matched_locations = 0
-images_with_unmatched_locations = []
-import random
-random.seed(1)
-site_to_location_id = {}
-# json_fn = json_files[0]
-for json_fn in json_files:
-    # if 'partial' in json_fn:
-    #    continue
-    with open(json_fn,'r') as f:
-        lines = f.readlines()
-        # line = lines[0]
-        for line in lines:
-            d = json.loads(line)
-            image_fn = d['image']
-            # if image_fn == 'SD1_238_6_26_17_16_76.73.jpg':
-            #    asdfad
-            # SD29_079_5_14_2018_17_52.85.jpg
-            tokens = image_fn.split('_')
-            assert len(tokens) == 7
-            assert tokens[0].startswith('SD')
-            # Re-write two-digit years as four-digit years
-            if len(tokens[4]) != 4:
-                assert len(tokens[4]) == 2
-                tokens[4] = '20' + tokens[4]
-            else:
-                assert tokens[4].startswith('201')
-            # Sometimes the year was written with two digits instead of 4
-            # assert len(tokens[4]) == 4 and tokens[4].startswith('20')
-            while tokens[1].startswith('0'):
-                tokens[1] = tokens[1][1:]
-            assert not tokens[1].startswith('0')
-            assert len(tokens[1]) > 0
-            id_string = '_'.join(tokens[0:5])
-            location_id = 'unknown'
-            if id_string in id_string_to_site:
-                site_id = id_string_to_site[id_string]
-                # Have we seen this location already?
-                if site_id in site_to_location_id:
-                    location_id = site_to_location_id[site_id]
-                else:
-                    location_id = 'loc_' + str(uuid.uuid1())
-                    site_to_location_id[site_id] = location_id
-                    print('Adding new location ID {} for site {}'.format(
-                        location_id,site_id))
-                n_matched_locations += 1
-            else:
-                raise ValueError('Could not match location ID')
-                images_with_unmatched_locations.append(image_fn)
-            assert image_fn in jpeg_image_folder_files_set
-            assert d['type'] == 'image/jpg'
-            input_ann = d['annotations']
-            assert len(input_ann) == 1 and len(input_ann.keys()) == 1 and 'object' in input_ann
-            input_ann = input_ann['object']
-            assert input_ann['metainfo']['image']['height'] == 1080
-            assert input_ann['metainfo']['image']['width'] == 1920
-            im = {}
-            img_h = input_ann['metainfo']['image']['height']
-            img_w = input_ann['metainfo']['image']['width']
-            im['width'] = img_w
-            im['height'] = img_h
-            im['file_name'] = image_fn
-            if image_fn in fn_to_image:
-                assert fn_to_image[image_fn]['file_name'] == image_fn
-                assert fn_to_image[image_fn]['width'] == img_w
-                assert fn_to_image[image_fn]['height'] == img_h
-                im = fn_to_image[image_fn]
-            else:
-                fn_to_image[image_fn] = im
-                im['location'] = location_id
-                im['id'] = image_fn # str(uuid.uuid1())
-            # Not a typo, it's actually "formateddata"
-            formatted_data = input_ann['formateddata']
-            if len(formatted_data) == 0:
-                # An image shouldn't be annotated as both empty and non-empty
-                assert image_fn not in non_empty_images
-                empty_images.add(image_fn)
-                ann = {}
-                ann['id'] = str(uuid.uuid1())
-                ann['image_id'] = im['id']
-                ann['category_id'] = CATEGORY_ID_EMPTY
-                ann['sequence_level_annotation'] = False
-                annotations.append(ann)
-            else:
-                # An image shouldn't be annotated as both empty and non-empty
-                assert image_fn not in empty_images
-                non_empty_images.add(image_fn)
-                n_boxes = len(formatted_data)
-                # box = formatteddata[0]
-                for box in formatted_data:
-                    attributes = box['attribute']
-                    assert len(attributes) == 2 and 'occluded' in attributes and 'truncated' in attributes
-                    coordinates = box['coordinates']
-                    assert box['object_type'] == 'bbox'
-                    assert box['class']['type'] == 'Fish'
-                    assert len(coordinates) == 4
-                    for coord in coordinates:
-                        assert len(coord) == 2 and 'x' in coord and 'y' in coord
-                    assert coordinates[0]['y'] == coordinates[1]['y']
-                    assert coordinates[2]['y'] == coordinates[3]['y']
-                    assert coordinates[0]['x'] == coordinates[3]['x']
-                    assert coordinates[1]['x'] == coordinates[2]['x']
-                    assert coordinates[0]['x'] < coordinates[1]['x']
-                    assert coordinates[0]['y'] < coordinates[3]['y']
-                    if False:
-                        x = coordinates[0]['x'] / img_w
-                        y = coordinates[0]['y'] / img_h
-                        box_w = (coordinates[1]['x'] - coordinates[0]['x']) / img_w
-                        box_h = (coordinates[3]['y'] - coordinates[0]['y']) / img_h
-                    else:
-                        x = coordinates[0]['x']
-                        y = coordinates[0]['y']
-                        box_w = (coordinates[1]['x'] - coordinates[0]['x'])
-                        box_h = (coordinates[3]['y'] - coordinates[0]['y'])
-                    bbox = [x,y,box_w,box_h]
-                    ann = {}
-                    ann['id'] = str(uuid.uuid1())
-                    ann['image_id'] = im['id']
-                    ann['category_id'] = CATEGORY_ID_FISH
-                    ann['sequence_level_annotation'] = False
-                    ann['bbox'] = bbox
-                    annotations.append(ann)
-                    # open_file(os.path.join(base_folder,jpeg_image_folder,image_fn))
-                # ...for each box
-            # ...if there are boxes on this image
-        # ...for each line
-    # ...with open()
-# ...for each json file
-print('Found annotations for {} images (of {})'.format(len(fn_to_image),
-                                                       len(jpeg_image_folder_files_set)))
-print('Matched locations for {} images (failed to match {})'.format(
-    n_matched_locations,len(images_with_unmatched_locations)))
-images = list(fn_to_image.values())
-#%% Prepare the output .json
-info = {}
-info['version'] = '2022.07.31.00'
-info['description'] = 'NOAA Estuary Fish 2022'
-info['year'] = 2022
-info['contributor'] = 'NOAA Fisheries'
-d = {}
-d['info'] = info
-d['annotations'] = annotations
-d['images'] = images
-d['categories'] = categories
-with open(output_json_fn,'w') as f:
-    json.dump(d,f,indent=1)
-#%% Check DB integrity
-from data_management.databases import integrity_check_json_db
-options = integrity_check_json_db.IntegrityCheckOptions()
-options.baseDir = os.path.join(base_folder,jpeg_image_folder)
-options.bCheckImageSizes = False
-options.bCheckImageExistence = True
-options.bFindUnusedImages = True
-_, _, _ = integrity_check_json_db.integrity_check_json_db(output_json_fn, options)
-#%% Print unique locations
-from collections import defaultdict
-location_to_count = defaultdict(int)
-for im in d['images']:
-    location_to_count[im['location']] += 1
-for loc in location_to_count.keys():
-    print(loc + ': ' + str(location_to_count[loc]))
-print('{} unique locations'.format(len(location_to_count)))
-assert 'unknown' not in location_to_count.keys()
-# SD12_202_6_23_2017_1_31.85.jpg
-#%% Preview some images
-from md_visualization import visualize_db
-viz_options = visualize_db.DbVizOptions()
-viz_options.num_to_visualize = 10000
-viz_options.trim_to_images_with_bboxes = False
-viz_options.add_search_links = False
-viz_options.sort_by_filename = False
-viz_options.parallelize_rendering = True
-viz_options.include_filename_links = True
-html_output_file, _ = visualize_db.visualize_db(db_path=output_json_fn,
-                                                output_dir=os.path.join(base_folder,'preview'),
-                                                image_base_dir=os.path.join(base_folder,jpeg_image_folder),
-                                                options=viz_options)
-open_file(html_output_file)
-#%% Statistics
-print('Empty: {}'.format(len(empty_images)))
-print('Non-empty: {}'.format(len(non_empty_images)))
-images_with_no_boxes = 0
-n_boxes = 0
-for ann in annotations:
-    if 'bbox' not in ann:
-        images_with_no_boxes += 1
-    else:
-        assert len(bbox) == 4
-        n_boxes += 1
-print('N boxes: {}'.format(n_boxes))
+"""
+ Prepare a LILA-ready .json file for the NOAA Puget Sound Nearshore Fish dataset.
+"""
+#%% Constants and imports
+import os
+import json
+import uuid
+import pandas as pd
+from md_utils.path_utils import open_file
+base_folder = r'G:\temp\noaa'
+output_json_fn = os.path.join(base_folder,'noaa_estuary_fish.json')
+edited_image_folders = ['edited_clip_2017','edited_clip_2018']
+jpeg_image_folder = 'JPEGImages'
+metadata_file = 'MasterDataForMicrosoft.xlsx'
+#%% Enumerate files
+edited_image_files = []
+# edited_image_folder = edited_image_folders[0]
+for edited_image_folder in edited_image_folders:
+    folder_path = os.path.join(base_folder,edited_image_folder)
+    image_files = os.listdir(folder_path)
+    assert all([fn.endswith('.jpg') for fn in image_files])
+    edited_image_files.extend([os.path.join(folder_path,fn) for fn in image_files])
+jpeg_image_folder_files = os.listdir(os.path.join(base_folder,jpeg_image_folder))
+assert all([fn.endswith('.jpg') for fn in jpeg_image_folder_files])
+relative_edited_image_files_set = set()
+# fn = edited_image_files[0]
+for fn in edited_image_files:
+    bn = os.path.basename(fn)
+    assert bn not in relative_edited_image_files_set
+    relative_edited_image_files_set.add(bn)
+jpeg_image_folder_files_set = set(jpeg_image_folder_files)
+assert len(jpeg_image_folder_files_set) == len(relative_edited_image_files_set)
+assert jpeg_image_folder_files_set == relative_edited_image_files_set
+#%% Read metadata and capture location information
+df = pd.read_excel(os.path.join(base_folder,metadata_file))
+print('Read {} rows from metadata file'.format(len(df)))
+id_string_to_site = {}
+# i_row = 0; row = df.iloc[i_row]
+for i_row,row in df.iterrows():
+    assert row['sd'].lower().startswith('sd')
+    assert isinstance(row['id'],int) and row['id'] > 0 and row['id'] < 10000
+    date_string = row['date']
+    date_tokens = date_string.split('_')
+    # Sometimes '2017' was just '17' in the date column
+    if len(date_tokens[2]) != 4:
+        assert len(date_tokens[2]) == 2
+        date_tokens[2] = '20' + date_tokens[2]
+        date_string = '_'.join(date_tokens)
+    else:
+        assert date_tokens[2].startswith('201')
+    id_string = row['sd'].upper() + '_' + str(row['id']) + '_' + date_string
+    id_string_to_site[id_string] = row['site']
+print('Found {} unique locations'.format(len(pd.unique(df['site']))))
+#%% Read the .json files and build output dictionaries
+json_files = [fn for fn in os.listdir(base_folder) if (fn.endswith('.json') and (fn != os.path.basename(output_json_fn)))]
+json_files = [os.path.join(base_folder,fn) for fn in json_files]
+fn_to_image = {}
+annotations = []
+CATEGORY_ID_EMPTY = 0
+CATEGORY_ID_FISH = 1
+categories = [{'id':CATEGORY_ID_EMPTY,'name':'empty'},{'id':CATEGORY_ID_FISH,'name':'animal'}]
+empty_images = set()
+non_empty_images = set()
+n_matched_locations = 0
+images_with_unmatched_locations = []
+import random
+random.seed(1)
+site_to_location_id = {}
+# json_fn = json_files[0]
+for json_fn in json_files:
+    # if 'partial' in json_fn:
+    #    continue
+    with open(json_fn,'r') as f:
+        lines = f.readlines()
+        # line = lines[0]
+        for line in lines:
+            d = json.loads(line)
+            image_fn = d['image']
+            # if image_fn == 'SD1_238_6_26_17_16_76.73.jpg':
+            #    asdfad
+            # SD29_079_5_14_2018_17_52.85.jpg
+            tokens = image_fn.split('_')
+            assert len(tokens) == 7
+            assert tokens[0].startswith('SD')
+            # Re-write two-digit years as four-digit years
+            if len(tokens[4]) != 4:
+                assert len(tokens[4]) == 2
+                tokens[4] = '20' + tokens[4]
+            else:
+                assert tokens[4].startswith('201')
+            # Sometimes the year was written with two digits instead of 4
+            # assert len(tokens[4]) == 4 and tokens[4].startswith('20')
+            while tokens[1].startswith('0'):
+                tokens[1] = tokens[1][1:]
+            assert not tokens[1].startswith('0')
+            assert len(tokens[1]) > 0
+            id_string = '_'.join(tokens[0:5])
+            location_id = 'unknown'
+            if id_string in id_string_to_site:
+                site_id = id_string_to_site[id_string]
+                # Have we seen this location already?
+                if site_id in site_to_location_id:
+                    location_id = site_to_location_id[site_id]
+                else:
+                    location_id = 'loc_' + str(uuid.uuid1())
+                    site_to_location_id[site_id] = location_id
+                    print('Adding new location ID {} for site {}'.format(
+                        location_id,site_id))
+                n_matched_locations += 1
+            else:
+                raise ValueError('Could not match location ID')
+                images_with_unmatched_locations.append(image_fn)
+            assert image_fn in jpeg_image_folder_files_set
+            assert d['type'] == 'image/jpg'
+            input_ann = d['annotations']
+            assert len(input_ann) == 1 and len(input_ann.keys()) == 1 and 'object' in input_ann
+            input_ann = input_ann['object']
+            assert input_ann['metainfo']['image']['height'] == 1080
+            assert input_ann['metainfo']['image']['width'] == 1920
+            im = {}
+            img_h = input_ann['metainfo']['image']['height']
+            img_w = input_ann['metainfo']['image']['width']
+            im['width'] = img_w
+            im['height'] = img_h
+            im['file_name'] = image_fn
+            if image_fn in fn_to_image:
+                assert fn_to_image[image_fn]['file_name'] == image_fn
+                assert fn_to_image[image_fn]['width'] == img_w
+                assert fn_to_image[image_fn]['height'] == img_h
+                im = fn_to_image[image_fn]
+            else:
+                fn_to_image[image_fn] = im
+                im['location'] = location_id
+                im['id'] = image_fn # str(uuid.uuid1())
+            # Not a typo, it's actually "formateddata"
+            formatted_data = input_ann['formateddata']
+            if len(formatted_data) == 0:
+                # An image shouldn't be annotated as both empty and non-empty
+                assert image_fn not in non_empty_images
+                empty_images.add(image_fn)
+                ann = {}
+                ann['id'] = str(uuid.uuid1())
+                ann['image_id'] = im['id']
+                ann['category_id'] = CATEGORY_ID_EMPTY
+                ann['sequence_level_annotation'] = False
+                annotations.append(ann)
+            else:
+                # An image shouldn't be annotated as both empty and non-empty
+                assert image_fn not in empty_images
+                non_empty_images.add(image_fn)
+                n_boxes = len(formatted_data)
+                # box = formatteddata[0]
+                for box in formatted_data:
+                    attributes = box['attribute']
+                    assert len(attributes) == 2 and 'occluded' in attributes and 'truncated' in attributes
+                    coordinates = box['coordinates']
+                    assert box['object_type'] == 'bbox'
+                    assert box['class']['type'] == 'Fish'
+                    assert len(coordinates) == 4
+                    for coord in coordinates:
+                        assert len(coord) == 2 and 'x' in coord and 'y' in coord
+                    assert coordinates[0]['y'] == coordinates[1]['y']
+                    assert coordinates[2]['y'] == coordinates[3]['y']
+                    assert coordinates[0]['x'] == coordinates[3]['x']
+                    assert coordinates[1]['x'] == coordinates[2]['x']
+                    assert coordinates[0]['x'] < coordinates[1]['x']
+                    assert coordinates[0]['y'] < coordinates[3]['y']
+                    if False:
+                        x = coordinates[0]['x'] / img_w
+                        y = coordinates[0]['y'] / img_h
+                        box_w = (coordinates[1]['x'] - coordinates[0]['x']) / img_w
+                        box_h = (coordinates[3]['y'] - coordinates[0]['y']) / img_h
+                    else:
+                        x = coordinates[0]['x']
+                        y = coordinates[0]['y']
+                        box_w = (coordinates[1]['x'] - coordinates[0]['x'])
+                        box_h = (coordinates[3]['y'] - coordinates[0]['y'])
+                    bbox = [x,y,box_w,box_h]
+                    ann = {}
+                    ann['id'] = str(uuid.uuid1())
+                    ann['image_id'] = im['id']
+                    ann['category_id'] = CATEGORY_ID_FISH
+                    ann['sequence_level_annotation'] = False
+                    ann['bbox'] = bbox
+                    annotations.append(ann)
+                    # open_file(os.path.join(base_folder,jpeg_image_folder,image_fn))
+                # ...for each box
+            # ...if there are boxes on this image
+        # ...for each line
+    # ...with open()
+# ...for each json file
+print('Found annotations for {} images (of {})'.format(len(fn_to_image),
+                                                       len(jpeg_image_folder_files_set)))
+print('Matched locations for {} images (failed to match {})'.format(
+    n_matched_locations,len(images_with_unmatched_locations)))
+images = list(fn_to_image.values())
+#%% Prepare the output .json
+info = {}
+info['version'] = '2022.07.31.00'
+info['description'] = 'NOAA Estuary Fish 2022'
+info['year'] = 2022
+info['contributor'] = 'NOAA Fisheries'
+d = {}
+d['info'] = info
+d['annotations'] = annotations
+d['images'] = images
+d['categories'] = categories
+with open(output_json_fn,'w') as f:
+    json.dump(d,f,indent=1)
+#%% Check DB integrity
+from data_management.databases import integrity_check_json_db
+options = integrity_check_json_db.IntegrityCheckOptions()
+options.baseDir = os.path.join(base_folder,jpeg_image_folder)
+options.bCheckImageSizes = False
+options.bCheckImageExistence = True
+options.bFindUnusedImages = True
+_, _, _ = integrity_check_json_db.integrity_check_json_db(output_json_fn, options)
+#%% Print unique locations
+from collections import defaultdict
+location_to_count = defaultdict(int)
+for im in d['images']:
+    location_to_count[im['location']] += 1
+for loc in location_to_count.keys():
+    print(loc + ': ' + str(location_to_count[loc]))
+print('{} unique locations'.format(len(location_to_count)))
+assert 'unknown' not in location_to_count.keys()
+# SD12_202_6_23_2017_1_31.85.jpg
+#%% Preview some images
+from md_visualization import visualize_db
+viz_options = visualize_db.DbVizOptions()
+viz_options.num_to_visualize = 10000
+viz_options.trim_to_images_with_bboxes = False
+viz_options.add_search_links = False
+viz_options.sort_by_filename = False
+viz_options.parallelize_rendering = True
+viz_options.include_filename_links = True
+html_output_file, _ = visualize_db.visualize_db(db_path=output_json_fn,
+                                                output_dir=os.path.join(base_folder,'preview'),
+                                                image_base_dir=os.path.join(base_folder,jpeg_image_folder),
+                                                options=viz_options)
+open_file(html_output_file)
+#%% Statistics
+print('Empty: {}'.format(len(empty_images)))
+print('Non-empty: {}'.format(len(non_empty_images)))
+images_with_no_boxes = 0
+n_boxes = 0
+for ann in annotations:
+    if 'bbox' not in ann:
+        images_with_no_boxes += 1
+    else:
+        assert len(bbox) == 4
+        n_boxes += 1
+print('N boxes: {}'.format(n_boxes))

megadetector 5.0.8__py3-none-any.whl → 5.0.10__py3-none-any.whl

Potentially problematic release.

megadetector 5.0.8py3-none-any.whl → 5.0.10py3-none-any.whl