megadetector 5.0.24__py3-none-any.whl → 5.0.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/data_management/cct_json_utils.py +15 -2
- megadetector/data_management/coco_to_yolo.py +53 -31
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +7 -3
- megadetector/data_management/databases/integrity_check_json_db.py +2 -2
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +73 -69
- megadetector/data_management/lila/add_locations_to_nacti.py +114 -110
- megadetector/data_management/lila/generate_lila_per_image_labels.py +2 -2
- megadetector/data_management/lila/test_lila_metadata_urls.py +21 -10
- megadetector/data_management/remap_coco_categories.py +60 -11
- megadetector/data_management/{wi_to_md.py → speciesnet_to_md.py} +2 -2
- megadetector/data_management/yolo_to_coco.py +45 -15
- megadetector/detection/run_detector.py +1 -0
- megadetector/detection/run_detector_batch.py +5 -4
- megadetector/postprocessing/classification_postprocessing.py +788 -524
- megadetector/postprocessing/compare_batch_results.py +176 -9
- megadetector/postprocessing/create_crop_folder.py +420 -0
- megadetector/postprocessing/load_api_results.py +4 -1
- megadetector/postprocessing/md_to_coco.py +1 -1
- megadetector/postprocessing/postprocess_batch_results.py +158 -44
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +3 -8
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +2 -2
- megadetector/postprocessing/separate_detections_into_folders.py +20 -4
- megadetector/postprocessing/subset_json_detector_output.py +180 -15
- megadetector/postprocessing/validate_batch_results.py +13 -5
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +6 -6
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -58
- megadetector/taxonomy_mapping/species_lookup.py +45 -2
- megadetector/utils/ct_utils.py +76 -3
- megadetector/utils/directory_listing.py +4 -4
- megadetector/utils/gpu_test.py +21 -3
- megadetector/utils/md_tests.py +142 -49
- megadetector/utils/path_utils.py +342 -19
- megadetector/utils/wi_utils.py +1286 -212
- megadetector/visualization/visualization_utils.py +16 -4
- megadetector/visualization/visualize_db.py +1 -1
- megadetector/visualization/visualize_detector_output.py +1 -4
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/METADATA +6 -3
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/RECORD +41 -40
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/WHEEL +1 -1
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info/licenses}/LICENSE +0 -0
- {megadetector-5.0.24.dist-info → megadetector-5.0.26.dist-info}/top_level.txt +0 -0
|
@@ -138,6 +138,9 @@ class BatchComparisonOptions:
|
|
|
138
138
|
#: List of filenames to include in the comparison, or None to use all files
|
|
139
139
|
self.filenames_to_include = None
|
|
140
140
|
|
|
141
|
+
#: List of category names to include in the comparison, or None to use all categories
|
|
142
|
+
self.category_names_to_include = None
|
|
143
|
+
|
|
141
144
|
#: Compare only detections/non-detections, ignore categories (still renders categories)
|
|
142
145
|
self.class_agnostic_comparison = False
|
|
143
146
|
|
|
@@ -986,7 +989,32 @@ def _pairwise_compare_batch_results(options,output_index,pairwise_options):
|
|
|
986
989
|
if invalid_category_error:
|
|
987
990
|
|
|
988
991
|
continue
|
|
989
|
-
|
|
992
|
+
|
|
993
|
+
# Should we be restricting the comparison to only certain categories?
|
|
994
|
+
if options.category_names_to_include is not None:
|
|
995
|
+
|
|
996
|
+
# Just in case the user provided a single category instead of a list
|
|
997
|
+
if isinstance(options.category_names_to_include,str):
|
|
998
|
+
options.category_names_to_include = [options.category_names_to_include]
|
|
999
|
+
|
|
1000
|
+
category_name_to_id_a = invert_dictionary(detection_categories_a)
|
|
1001
|
+
category_name_to_id_b = invert_dictionary(detection_categories_b)
|
|
1002
|
+
category_ids_to_include_a = []
|
|
1003
|
+
category_ids_to_include_b = []
|
|
1004
|
+
|
|
1005
|
+
for category_name in options.category_names_to_include:
|
|
1006
|
+
if category_name in category_name_to_id_a:
|
|
1007
|
+
category_ids_to_include_a.append(category_name_to_id_a[category_name])
|
|
1008
|
+
if category_name in category_name_to_id_b:
|
|
1009
|
+
category_ids_to_include_b.append(category_name_to_id_b[category_name])
|
|
1010
|
+
|
|
1011
|
+
# Restrict the categories we treat as above-threshold to the set we're supposed
|
|
1012
|
+
# to be using
|
|
1013
|
+
categories_above_threshold_a = [category_id for category_id in categories_above_threshold_a if \
|
|
1014
|
+
category_id in category_ids_to_include_a]
|
|
1015
|
+
categories_above_threshold_b = [category_id for category_id in categories_above_threshold_b if \
|
|
1016
|
+
category_id in category_ids_to_include_b]
|
|
1017
|
+
|
|
990
1018
|
detection_a = (len(categories_above_threshold_a) > 0)
|
|
991
1019
|
detection_b = (len(categories_above_threshold_b) > 0)
|
|
992
1020
|
|
|
@@ -1609,7 +1637,72 @@ def n_way_comparison(filenames,
|
|
|
1609
1637
|
# ...def n_way_comparison(...)
|
|
1610
1638
|
|
|
1611
1639
|
|
|
1612
|
-
def
|
|
1640
|
+
def find_image_level_detections_above_threshold(results,threshold=0.2,category_names=None):
|
|
1641
|
+
"""
|
|
1642
|
+
Returns images in the set of MD results [results] with detections above
|
|
1643
|
+
a threshold confidence level, optionally only counting certain categories.
|
|
1644
|
+
|
|
1645
|
+
Args:
|
|
1646
|
+
results (str or dict): the set of results, either a .json filename or a results
|
|
1647
|
+
dict
|
|
1648
|
+
threshold (float, optional): the threshold used to determine the target number of
|
|
1649
|
+
detections in [results]
|
|
1650
|
+
category_names (list or str, optional): the list of category names to consider (defaults
|
|
1651
|
+
to using all categories), or the name of a single category.
|
|
1652
|
+
|
|
1653
|
+
Returns:
|
|
1654
|
+
list: the images with above-threshold detections
|
|
1655
|
+
"""
|
|
1656
|
+
if isinstance(results,str):
|
|
1657
|
+
with open(results,'r') as f:
|
|
1658
|
+
results = json.load(f)
|
|
1659
|
+
|
|
1660
|
+
category_ids_to_consider = None
|
|
1661
|
+
|
|
1662
|
+
if category_names is not None:
|
|
1663
|
+
|
|
1664
|
+
if isinstance(category_names,str):
|
|
1665
|
+
category_names = [category_names]
|
|
1666
|
+
|
|
1667
|
+
category_id_to_name = results['detection_categories']
|
|
1668
|
+
category_name_to_id = invert_dictionary(category_id_to_name)
|
|
1669
|
+
|
|
1670
|
+
category_ids_to_consider = []
|
|
1671
|
+
|
|
1672
|
+
# category_name = category_names[0]
|
|
1673
|
+
for category_name in category_names:
|
|
1674
|
+
category_id = category_name_to_id[category_name]
|
|
1675
|
+
category_ids_to_consider.append(category_id)
|
|
1676
|
+
|
|
1677
|
+
assert len(category_ids_to_consider) > 0, \
|
|
1678
|
+
'Category name list did not map to any category IDs'
|
|
1679
|
+
|
|
1680
|
+
images_above_threshold = []
|
|
1681
|
+
|
|
1682
|
+
for im in results['images']:
|
|
1683
|
+
|
|
1684
|
+
if ('detections' in im) and (im['detections'] is not None) and (len(im['detections']) > 0):
|
|
1685
|
+
confidence_values_this_image = [0]
|
|
1686
|
+
for det in im['detections']:
|
|
1687
|
+
if category_ids_to_consider is not None:
|
|
1688
|
+
if det['category'] not in category_ids_to_consider:
|
|
1689
|
+
continue
|
|
1690
|
+
confidence_values_this_image.append(det['conf'])
|
|
1691
|
+
if max(confidence_values_this_image) >= threshold:
|
|
1692
|
+
images_above_threshold.append(im)
|
|
1693
|
+
|
|
1694
|
+
# ...for each image
|
|
1695
|
+
|
|
1696
|
+
return images_above_threshold
|
|
1697
|
+
|
|
1698
|
+
# ...def find_image_level_detections_above_threshold(...)
|
|
1699
|
+
|
|
1700
|
+
|
|
1701
|
+
def find_equivalent_threshold(results_a,
|
|
1702
|
+
results_b,
|
|
1703
|
+
threshold_a=0.2,
|
|
1704
|
+
category_names=None,
|
|
1705
|
+
verbose=False):
|
|
1613
1706
|
"""
|
|
1614
1707
|
Given two sets of detector results, finds the confidence threshold for results_b
|
|
1615
1708
|
that produces the same fraction of *images* with detections as threshold_a does for
|
|
@@ -1622,6 +1715,9 @@ def find_equivalent_threshold(results_a,results_b,threshold_a=0.2):
|
|
|
1622
1715
|
dict
|
|
1623
1716
|
threshold_a (float, optional): the threshold used to determine the target number of
|
|
1624
1717
|
detections in results_a
|
|
1718
|
+
category_names (list or str, optional): the list of category names to consider (defaults
|
|
1719
|
+
to using all categories), or the name of a single category.
|
|
1720
|
+
verbose (bool, optional): enable additional debug output
|
|
1625
1721
|
|
|
1626
1722
|
Returns:
|
|
1627
1723
|
float: the threshold that - when applied to results_b - produces the same number
|
|
@@ -1629,35 +1725,106 @@ def find_equivalent_threshold(results_a,results_b,threshold_a=0.2):
|
|
|
1629
1725
|
"""
|
|
1630
1726
|
|
|
1631
1727
|
if isinstance(results_a,str):
|
|
1728
|
+
if verbose:
|
|
1729
|
+
print('Loading results from {}'.format(results_a))
|
|
1632
1730
|
with open(results_a,'r') as f:
|
|
1633
1731
|
results_a = json.load(f)
|
|
1634
1732
|
|
|
1635
1733
|
if isinstance(results_b,str):
|
|
1734
|
+
if verbose:
|
|
1735
|
+
print('Loading results from {}'.format(results_b))
|
|
1636
1736
|
with open(results_b,'r') as f:
|
|
1637
1737
|
results_b = json.load(f)
|
|
1738
|
+
|
|
1739
|
+
category_ids_to_consider_a = None
|
|
1740
|
+
category_ids_to_consider_b = None
|
|
1741
|
+
|
|
1742
|
+
if category_names is not None:
|
|
1743
|
+
|
|
1744
|
+
if isinstance(category_names,str):
|
|
1745
|
+
category_names = [category_names]
|
|
1746
|
+
|
|
1747
|
+
categories_a = results_a['detection_categories']
|
|
1748
|
+
categories_b = results_b['detection_categories']
|
|
1749
|
+
category_name_to_id_a = invert_dictionary(categories_a)
|
|
1750
|
+
category_name_to_id_b = invert_dictionary(categories_b)
|
|
1751
|
+
|
|
1752
|
+
category_ids_to_consider_a = []
|
|
1753
|
+
category_ids_to_consider_b = []
|
|
1754
|
+
|
|
1755
|
+
# category_name = category_names[0]
|
|
1756
|
+
for category_name in category_names:
|
|
1757
|
+
category_id_a = category_name_to_id_a[category_name]
|
|
1758
|
+
category_id_b = category_name_to_id_b[category_name]
|
|
1759
|
+
category_ids_to_consider_a.append(category_id_a)
|
|
1760
|
+
category_ids_to_consider_b.append(category_id_b)
|
|
1638
1761
|
|
|
1639
|
-
|
|
1762
|
+
assert len(category_ids_to_consider_a) > 0 and len(category_ids_to_consider_b) > 0, \
|
|
1763
|
+
'Category name list did not map to any category IDs in one or both detection sets'
|
|
1764
|
+
|
|
1765
|
+
def _get_confidence_values_for_results(images,category_ids_to_consider,threshold):
|
|
1766
|
+
"""
|
|
1767
|
+
Return a list of the maximum confidence value for each image in [images].
|
|
1768
|
+
Returns zero confidence for images with no detections (or no detections
|
|
1769
|
+
in the specified categories). Does not return anything for invalid images.
|
|
1770
|
+
"""
|
|
1771
|
+
|
|
1640
1772
|
confidence_values = []
|
|
1773
|
+
images_above_threshold = []
|
|
1774
|
+
|
|
1641
1775
|
for im in images:
|
|
1642
1776
|
if 'detections' in im and im['detections'] is not None:
|
|
1643
1777
|
if len(im['detections']) == 0:
|
|
1644
1778
|
confidence_values.append(0)
|
|
1645
1779
|
else:
|
|
1646
|
-
confidence_values_this_image = [
|
|
1647
|
-
|
|
1648
|
-
|
|
1780
|
+
confidence_values_this_image = []
|
|
1781
|
+
for det in im['detections']:
|
|
1782
|
+
if category_ids_to_consider is not None:
|
|
1783
|
+
if det['category'] not in category_ids_to_consider:
|
|
1784
|
+
continue
|
|
1785
|
+
confidence_values_this_image.append(det['conf'])
|
|
1786
|
+
if len(confidence_values_this_image) == 0:
|
|
1787
|
+
confidence_values.append(0)
|
|
1788
|
+
else:
|
|
1789
|
+
max_conf_value = max(confidence_values_this_image)
|
|
1790
|
+
|
|
1791
|
+
if threshold is not None and max_conf_value >= threshold:
|
|
1792
|
+
images_above_threshold.append(im)
|
|
1793
|
+
confidence_values.append(max_conf_value)
|
|
1794
|
+
# ...for each image
|
|
1795
|
+
|
|
1796
|
+
return confidence_values, images_above_threshold
|
|
1649
1797
|
|
|
1650
|
-
confidence_values_a =
|
|
1798
|
+
confidence_values_a,images_above_threshold_a = \
|
|
1799
|
+
_get_confidence_values_for_results(results_a['images'],
|
|
1800
|
+
category_ids_to_consider_a,
|
|
1801
|
+
threshold_a)
|
|
1802
|
+
|
|
1803
|
+
# ...def _get_confidence_values_for_results(...)
|
|
1804
|
+
|
|
1805
|
+
if verbose:
|
|
1806
|
+
print('For result set A, considering {} of {} images'.format(
|
|
1807
|
+
len(confidence_values_a),len(results_a['images'])))
|
|
1651
1808
|
confidence_values_a_above_threshold = [c for c in confidence_values_a if c >= threshold_a]
|
|
1652
1809
|
|
|
1653
|
-
confidence_values_b =
|
|
1654
|
-
|
|
1810
|
+
confidence_values_b,_ = _get_confidence_values_for_results(results_b['images'],
|
|
1811
|
+
category_ids_to_consider_b,
|
|
1812
|
+
threshold=None)
|
|
1813
|
+
if verbose:
|
|
1814
|
+
print('For result set B, considering {} of {} images'.format(
|
|
1815
|
+
len(confidence_values_b),len(results_b['images'])))
|
|
1816
|
+
confidence_values_b = sorted(confidence_values_b)
|
|
1655
1817
|
|
|
1656
1818
|
target_detection_fraction = len(confidence_values_a_above_threshold) / len(confidence_values_a)
|
|
1657
1819
|
|
|
1658
1820
|
detection_cutoff_index = round((1.0-target_detection_fraction) * len(confidence_values_b))
|
|
1659
1821
|
threshold_b = confidence_values_b[detection_cutoff_index]
|
|
1660
1822
|
|
|
1823
|
+
if verbose:
|
|
1824
|
+
print('{} confidence values above threshold (A)'.format(len(confidence_values_a_above_threshold)))
|
|
1825
|
+
confidence_values_b_above_threshold = [c for c in confidence_values_b if c >= threshold_b]
|
|
1826
|
+
print('{} confidence values above threshold (B)'.format(len(confidence_values_b_above_threshold)))
|
|
1827
|
+
|
|
1661
1828
|
return threshold_b
|
|
1662
1829
|
|
|
1663
1830
|
# ...def find_equivalent_threshold(...)
|
|
@@ -0,0 +1,420 @@
|
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
create_crop_folder.py
|
|
4
|
+
|
|
5
|
+
Given a MegaDetector .json file and a folder of images, creates a new folder
|
|
6
|
+
of images representing all above-threshold crops from the original folder.
|
|
7
|
+
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
#%% Constants and imports
|
|
11
|
+
|
|
12
|
+
import os
|
|
13
|
+
import json
|
|
14
|
+
from tqdm import tqdm
|
|
15
|
+
|
|
16
|
+
from multiprocessing.pool import Pool, ThreadPool
|
|
17
|
+
from collections import defaultdict
|
|
18
|
+
from functools import partial
|
|
19
|
+
|
|
20
|
+
from megadetector.utils.path_utils import insert_before_extension
|
|
21
|
+
from megadetector.utils.ct_utils import invert_dictionary
|
|
22
|
+
from megadetector.visualization.visualization_utils import crop_image
|
|
23
|
+
from megadetector.visualization.visualization_utils import exif_preserving_save
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
#%% Support classes
|
|
27
|
+
|
|
28
|
+
class CreateCropFolderOptions:
|
|
29
|
+
"""
|
|
30
|
+
Options used to parameterize create_crop_folder().
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(self):
|
|
34
|
+
|
|
35
|
+
#: Confidence threshold determining which detections get written
|
|
36
|
+
self.confidence_threshold = 0.1
|
|
37
|
+
|
|
38
|
+
#: Number of pixels to expand each crop
|
|
39
|
+
self.expansion = 0
|
|
40
|
+
|
|
41
|
+
#: JPEG quality to use for saving crops (None for default)
|
|
42
|
+
self.quality = 95
|
|
43
|
+
|
|
44
|
+
#: Whether to overwrite existing images
|
|
45
|
+
self.overwrite = True
|
|
46
|
+
|
|
47
|
+
#: Number of concurrent workers
|
|
48
|
+
self.n_workers = 8
|
|
49
|
+
|
|
50
|
+
#: Whether to use processes ('process') or threads ('thread') for parallelization
|
|
51
|
+
self.pool_type = 'thread'
|
|
52
|
+
|
|
53
|
+
#: Include only these categories, or None to include all
|
|
54
|
+
#:
|
|
55
|
+
#: options.category_names_to_include = ['animal']
|
|
56
|
+
self.category_names_to_include = None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
#%% Support functions
|
|
60
|
+
|
|
61
|
+
def _get_crop_filename(image_fn,crop_id):
|
|
62
|
+
"""
|
|
63
|
+
Generate crop filenames in a consistent way.
|
|
64
|
+
"""
|
|
65
|
+
if isinstance(crop_id,int):
|
|
66
|
+
crop_id = str(crop_id).zfill(3)
|
|
67
|
+
assert isinstance(crop_id,str)
|
|
68
|
+
return insert_before_extension(image_fn,'crop_' + crop_id)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _generate_crops_for_single_image(crops_this_image,
|
|
72
|
+
input_folder,
|
|
73
|
+
output_folder,
|
|
74
|
+
options):
|
|
75
|
+
"""
|
|
76
|
+
Generate all the crops required for a single image.
|
|
77
|
+
"""
|
|
78
|
+
if len(crops_this_image) == 0:
|
|
79
|
+
return
|
|
80
|
+
|
|
81
|
+
image_fn_relative = crops_this_image[0]['image_fn_relative']
|
|
82
|
+
input_fn_abs = os.path.join(input_folder,image_fn_relative)
|
|
83
|
+
assert os.path.isfile(input_fn_abs)
|
|
84
|
+
|
|
85
|
+
detections_to_crop = [c['detection'] for c in crops_this_image]
|
|
86
|
+
|
|
87
|
+
cropped_images = crop_image(detections_to_crop,
|
|
88
|
+
input_fn_abs,
|
|
89
|
+
confidence_threshold=0,
|
|
90
|
+
expansion=options.expansion)
|
|
91
|
+
|
|
92
|
+
assert len(cropped_images) == len(crops_this_image)
|
|
93
|
+
|
|
94
|
+
# i_crop = 0; crop_info = crops_this_image[0]
|
|
95
|
+
for i_crop,crop_info in enumerate(crops_this_image):
|
|
96
|
+
|
|
97
|
+
assert crop_info['image_fn_relative'] == image_fn_relative
|
|
98
|
+
crop_filename_relative = _get_crop_filename(image_fn_relative, crop_info['crop_id'])
|
|
99
|
+
crop_filename_abs = os.path.join(output_folder,crop_filename_relative).replace('\\','/')
|
|
100
|
+
|
|
101
|
+
if os.path.isfile(crop_filename_abs) and not options.overwrite:
|
|
102
|
+
continue
|
|
103
|
+
|
|
104
|
+
cropped_image = cropped_images[i_crop]
|
|
105
|
+
os.makedirs(os.path.dirname(crop_filename_abs),exist_ok=True)
|
|
106
|
+
exif_preserving_save(cropped_image,crop_filename_abs,quality=options.quality)
|
|
107
|
+
|
|
108
|
+
# ...for each crop
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
#%% Main function
|
|
112
|
+
|
|
113
|
+
def crop_results_to_image_results(image_results_file_with_crop_ids,
|
|
114
|
+
crop_results_file,
|
|
115
|
+
output_file,
|
|
116
|
+
delete_crop_information=True):
|
|
117
|
+
"""
|
|
118
|
+
This function is intended to be run after you have:
|
|
119
|
+
|
|
120
|
+
1. Run MegaDetector on a folder
|
|
121
|
+
2. Generated a crop folder using create_crop_folder
|
|
122
|
+
3. Run a species classifier on those crops
|
|
123
|
+
|
|
124
|
+
This function will take the crop-level results and transform them back
|
|
125
|
+
to the original images. Classification categories, if available, are taken
|
|
126
|
+
from [crop_results_file].
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
image_results_file_with_crop_ids (str): results file for the original images,
|
|
130
|
+
containing crop IDs, likely generated via create_crop_folder. All
|
|
131
|
+
non-standard fields in this file will be passed along to [output_file].
|
|
132
|
+
crop_results_file (str): results file for the crop folder
|
|
133
|
+
output_file (str): ouptut .json file, containing crop-level classifications
|
|
134
|
+
mapped back to the image level.
|
|
135
|
+
delete_crop_information (bool, optional): whether to delete the "crop_id" and
|
|
136
|
+
"crop_filename_relative" fields from each detection, if present.
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
##%% Validate inputs
|
|
140
|
+
|
|
141
|
+
assert os.path.isfile(image_results_file_with_crop_ids), \
|
|
142
|
+
'Could not find image-level input file {}'.format(image_results_file_with_crop_ids)
|
|
143
|
+
assert os.path.isfile(crop_results_file), \
|
|
144
|
+
'Could not find crop results file {}'.format(crop_results_file)
|
|
145
|
+
os.makedirs(os.path.dirname(output_file),exist_ok=True)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
##%% Read input files
|
|
149
|
+
|
|
150
|
+
print('Reading input...')
|
|
151
|
+
|
|
152
|
+
with open(image_results_file_with_crop_ids,'r') as f:
|
|
153
|
+
image_results_with_crop_ids = json.load(f)
|
|
154
|
+
with open(crop_results_file,'r') as f:
|
|
155
|
+
crop_results = json.load(f)
|
|
156
|
+
|
|
157
|
+
# Find all the detection categories that need to be consistent
|
|
158
|
+
used_category_ids = set()
|
|
159
|
+
for im in tqdm(image_results_with_crop_ids['images']):
|
|
160
|
+
if 'detections' not in im or im['detections'] is None:
|
|
161
|
+
continue
|
|
162
|
+
for det in im['detections']:
|
|
163
|
+
if 'crop_id' in det:
|
|
164
|
+
used_category_ids.add(det['category'])
|
|
165
|
+
|
|
166
|
+
# Make sure the categories that matter are consistent across the two files
|
|
167
|
+
for category_id in used_category_ids:
|
|
168
|
+
category_name = image_results_with_crop_ids['detection_categories'][category_id]
|
|
169
|
+
assert category_id in crop_results['detection_categories'] and \
|
|
170
|
+
category_name == crop_results['detection_categories'][category_id], \
|
|
171
|
+
'Crop results and detection results use incompatible categories'
|
|
172
|
+
|
|
173
|
+
crop_filename_to_results = {}
|
|
174
|
+
|
|
175
|
+
# im = crop_results['images'][0]
|
|
176
|
+
for im in crop_results['images']:
|
|
177
|
+
crop_filename_to_results[im['file']] = im
|
|
178
|
+
|
|
179
|
+
if 'classification_categories' in crop_results:
|
|
180
|
+
image_results_with_crop_ids['classification_categories'] = \
|
|
181
|
+
crop_results['classification_categories']
|
|
182
|
+
|
|
183
|
+
if 'classification_category_descriptions' in crop_results:
|
|
184
|
+
image_results_with_crop_ids['classification_category_descriptions'] = \
|
|
185
|
+
crop_results['classification_category_descriptions']
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
##%% Read classifications from crop results, merge into image-level results
|
|
189
|
+
|
|
190
|
+
# im = image_results_with_crop_ids['images'][0]
|
|
191
|
+
for im in tqdm(image_results_with_crop_ids['images']):
|
|
192
|
+
|
|
193
|
+
if 'detections' not in im or im['detections'] is None:
|
|
194
|
+
continue
|
|
195
|
+
|
|
196
|
+
for det in im['detections']:
|
|
197
|
+
|
|
198
|
+
if 'classifications' in det:
|
|
199
|
+
del det['classifications']
|
|
200
|
+
|
|
201
|
+
if 'crop_id' in det:
|
|
202
|
+
crop_filename_relative = det['crop_filename_relative']
|
|
203
|
+
assert crop_filename_relative in crop_filename_to_results, \
|
|
204
|
+
'Crop lookup error'
|
|
205
|
+
crop_results_this_detection = crop_filename_to_results[crop_filename_relative]
|
|
206
|
+
assert crop_results_this_detection['file'] == crop_filename_relative
|
|
207
|
+
assert len(crop_results_this_detection['detections']) == 1
|
|
208
|
+
# Allow a slight confidence difference for the case where output precision was truncated
|
|
209
|
+
assert abs(crop_results_this_detection['detections'][0]['conf'] - det['conf']) < 0.01
|
|
210
|
+
assert crop_results_this_detection['detections'][0]['category'] == det['category']
|
|
211
|
+
assert crop_results_this_detection['detections'][0]['bbox'] == [0,0,1,1]
|
|
212
|
+
det['classifications'] = crop_results_this_detection['detections'][0]['classifications']
|
|
213
|
+
|
|
214
|
+
if delete_crop_information:
|
|
215
|
+
if 'crop_id' in det:
|
|
216
|
+
del det['crop_id']
|
|
217
|
+
if 'crop_filename_relative' in det:
|
|
218
|
+
del det['crop_filename_relative']
|
|
219
|
+
|
|
220
|
+
# ...for each detection
|
|
221
|
+
|
|
222
|
+
# ...for each image
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
##%% Write output file
|
|
226
|
+
|
|
227
|
+
print('Writing output file...')
|
|
228
|
+
|
|
229
|
+
with open(output_file,'w') as f:
|
|
230
|
+
json.dump(image_results_with_crop_ids,f,indent=1)
|
|
231
|
+
|
|
232
|
+
# ...def crop_results_to_image_results(...)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def create_crop_folder(input_file,
|
|
236
|
+
input_folder,
|
|
237
|
+
output_folder,
|
|
238
|
+
output_file=None,
|
|
239
|
+
crops_output_file=None,
|
|
240
|
+
options=None):
|
|
241
|
+
"""
|
|
242
|
+
Given a MegaDetector .json file and a folder of images, creates a new folder
|
|
243
|
+
of images representing all above-threshold crops from the original folder.
|
|
244
|
+
|
|
245
|
+
Optionally writes a new .json file that attaches unique IDs to each detection.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
input_file (str): MD-formatted .json file to process
|
|
249
|
+
input_folder (str): Input image folder
|
|
250
|
+
output_folder (str): Output (cropped) image folder
|
|
251
|
+
output_file (str, optional): new .json file that attaches unique IDs to each detection.
|
|
252
|
+
crops_output_file (str, optional): new .json file that includes whole-image detections
|
|
253
|
+
for each of the crops, using confidence values from the original results
|
|
254
|
+
options (CreateCropFolderOptions, optional): crop parameters
|
|
255
|
+
"""
|
|
256
|
+
|
|
257
|
+
## Validate options, prepare output folders
|
|
258
|
+
|
|
259
|
+
if options is None:
|
|
260
|
+
options = CreateCropFolderOptions()
|
|
261
|
+
|
|
262
|
+
assert os.path.isfile(input_file), 'Input file {} not found'.format(input_file)
|
|
263
|
+
assert os.path.isdir(input_folder), 'Input folder {} not found'.format(input_folder)
|
|
264
|
+
os.makedirs(output_folder,exist_ok=True)
|
|
265
|
+
if output_file is not None:
|
|
266
|
+
os.makedirs(os.path.dirname(output_file),exist_ok=True)
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
##%% Read input
|
|
270
|
+
|
|
271
|
+
print('Reading MD results file...')
|
|
272
|
+
with open(input_file,'r') as f:
|
|
273
|
+
detection_results = json.load(f)
|
|
274
|
+
|
|
275
|
+
category_ids_to_include = None
|
|
276
|
+
|
|
277
|
+
if options.category_names_to_include is not None:
|
|
278
|
+
category_id_to_name = detection_results['detection_categories']
|
|
279
|
+
category_name_to_id = invert_dictionary(category_id_to_name)
|
|
280
|
+
category_ids_to_include = set()
|
|
281
|
+
for category_name in options.category_names_to_include:
|
|
282
|
+
assert category_name in category_name_to_id, \
|
|
283
|
+
'Unrecognized category name {}'.format(category_name)
|
|
284
|
+
category_ids_to_include.add(category_name_to_id[category_name])
|
|
285
|
+
|
|
286
|
+
##%% Make a list of crops that we need to create
|
|
287
|
+
|
|
288
|
+
# Maps input images to list of dicts, with keys 'crop_id','detection'
|
|
289
|
+
image_fn_relative_to_crops = defaultdict(list)
|
|
290
|
+
n_crops = 0
|
|
291
|
+
|
|
292
|
+
n_detections_excluded_by_category = 0
|
|
293
|
+
|
|
294
|
+
# im = detection_results['images'][0]
|
|
295
|
+
for i_image,im in enumerate(detection_results['images']):
|
|
296
|
+
|
|
297
|
+
if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
|
|
298
|
+
continue
|
|
299
|
+
|
|
300
|
+
detections_this_image = im['detections']
|
|
301
|
+
|
|
302
|
+
image_fn_relative = im['file']
|
|
303
|
+
|
|
304
|
+
for i_detection,det in enumerate(detections_this_image):
|
|
305
|
+
|
|
306
|
+
if det['conf'] < options.confidence_threshold:
|
|
307
|
+
continue
|
|
308
|
+
|
|
309
|
+
if (category_ids_to_include is not None) and \
|
|
310
|
+
(det['category'] not in category_ids_to_include):
|
|
311
|
+
n_detections_excluded_by_category += 1
|
|
312
|
+
continue
|
|
313
|
+
|
|
314
|
+
det['crop_id'] = i_detection
|
|
315
|
+
|
|
316
|
+
crop_info = {'image_fn_relative':image_fn_relative,
|
|
317
|
+
'crop_id':i_detection,
|
|
318
|
+
'detection':det}
|
|
319
|
+
|
|
320
|
+
crop_filename_relative = _get_crop_filename(image_fn_relative,
|
|
321
|
+
crop_info['crop_id'])
|
|
322
|
+
det['crop_filename_relative'] = crop_filename_relative
|
|
323
|
+
|
|
324
|
+
image_fn_relative_to_crops[image_fn_relative].append(crop_info)
|
|
325
|
+
n_crops += 1
|
|
326
|
+
|
|
327
|
+
# ...for each input image
|
|
328
|
+
|
|
329
|
+
print('Prepared a list of {} crops from {} of {} input images'.format(
|
|
330
|
+
n_crops,len(image_fn_relative_to_crops),len(detection_results['images'])))
|
|
331
|
+
|
|
332
|
+
if n_detections_excluded_by_category > 0:
|
|
333
|
+
print('Excluded {} detections by category'.format(n_detections_excluded_by_category))
|
|
334
|
+
|
|
335
|
+
##%% Generate crops
|
|
336
|
+
|
|
337
|
+
if options.n_workers <= 1:
|
|
338
|
+
|
|
339
|
+
# image_fn_relative = next(iter(image_fn_relative_to_crops))
|
|
340
|
+
for image_fn_relative in tqdm(image_fn_relative_to_crops.keys()):
|
|
341
|
+
crops_this_image = image_fn_relative_to_crops[image_fn_relative]
|
|
342
|
+
_generate_crops_for_single_image(crops_this_image=crops_this_image,
|
|
343
|
+
input_folder=input_folder,
|
|
344
|
+
output_folder=output_folder,
|
|
345
|
+
options=options)
|
|
346
|
+
|
|
347
|
+
else:
|
|
348
|
+
|
|
349
|
+
print('Creating a {} pool with {} workers'.format(options.pool_type,options.n_workers))
|
|
350
|
+
|
|
351
|
+
if options.pool_type == 'thread':
|
|
352
|
+
pool = ThreadPool(options.n_workers)
|
|
353
|
+
else:
|
|
354
|
+
assert options.pool_type == 'process'
|
|
355
|
+
pool = Pool(options.n_workers)
|
|
356
|
+
|
|
357
|
+
# Each element in this list is the list of crops for a single image
|
|
358
|
+
crop_lists = list(image_fn_relative_to_crops.values())
|
|
359
|
+
|
|
360
|
+
with tqdm(total=len(image_fn_relative_to_crops)) as pbar:
|
|
361
|
+
for i,_ in enumerate(pool.imap_unordered(partial(
|
|
362
|
+
_generate_crops_for_single_image,
|
|
363
|
+
input_folder=input_folder,
|
|
364
|
+
output_folder=output_folder,
|
|
365
|
+
options=options),
|
|
366
|
+
crop_lists)):
|
|
367
|
+
pbar.update()
|
|
368
|
+
|
|
369
|
+
# ...if we're using parallel processing
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
##%% Write output file
|
|
373
|
+
|
|
374
|
+
if output_file is not None:
|
|
375
|
+
with open(output_file,'w') as f:
|
|
376
|
+
json.dump(detection_results,f,indent=1)
|
|
377
|
+
|
|
378
|
+
if crops_output_file is not None:
|
|
379
|
+
|
|
380
|
+
original_images = detection_results['images']
|
|
381
|
+
|
|
382
|
+
detection_results_cropped = detection_results
|
|
383
|
+
detection_results_cropped['images'] = []
|
|
384
|
+
|
|
385
|
+
# im = original_images[0]
|
|
386
|
+
for im in original_images:
|
|
387
|
+
|
|
388
|
+
if 'detections' not in im or im['detections'] is None or len(im['detections']) == 0:
|
|
389
|
+
continue
|
|
390
|
+
|
|
391
|
+
detections_this_image = im['detections']
|
|
392
|
+
image_fn_relative = im['file']
|
|
393
|
+
|
|
394
|
+
for i_detection,det in enumerate(detections_this_image):
|
|
395
|
+
|
|
396
|
+
if 'crop_id' in det:
|
|
397
|
+
im_out = {}
|
|
398
|
+
im_out['file'] = det['crop_filename_relative']
|
|
399
|
+
det_out = {}
|
|
400
|
+
det_out['category'] = det['category']
|
|
401
|
+
det_out['conf'] = det['conf']
|
|
402
|
+
det_out['bbox'] = [0, 0, 1, 1]
|
|
403
|
+
im_out['detections'] = [det_out]
|
|
404
|
+
detection_results_cropped['images'].append(im_out)
|
|
405
|
+
|
|
406
|
+
# ...if we need to include this crop in the new .json file
|
|
407
|
+
|
|
408
|
+
# ...for each crop
|
|
409
|
+
|
|
410
|
+
# ...for each original image
|
|
411
|
+
|
|
412
|
+
with open(crops_output_file,'w') as f:
|
|
413
|
+
json.dump(detection_results_cropped,f,indent=1)
|
|
414
|
+
|
|
415
|
+
# ...def create_crop_folder()
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
#%% Command-line driver
|
|
419
|
+
|
|
420
|
+
# TODO
|
|
@@ -107,6 +107,9 @@ def write_api_results(detection_results_table, other_fields, out_path):
|
|
|
107
107
|
images = detection_results_table.to_json(orient='records',
|
|
108
108
|
double_precision=3)
|
|
109
109
|
images = json.loads(images)
|
|
110
|
+
for im in images:
|
|
111
|
+
if 'failure' in im and im['failure'] is None:
|
|
112
|
+
del im['failure']
|
|
110
113
|
fields['images'] = images
|
|
111
114
|
|
|
112
115
|
# Convert the 'version' field back to a string as per format convention
|
|
@@ -129,7 +132,7 @@ def write_api_results(detection_results_table, other_fields, out_path):
|
|
|
129
132
|
except Exception:
|
|
130
133
|
print('Warning: error removing max_detection_conf from output')
|
|
131
134
|
pass
|
|
132
|
-
|
|
135
|
+
|
|
133
136
|
with open(out_path, 'w') as f:
|
|
134
137
|
json.dump(fields, f, indent=1)
|
|
135
138
|
|