megadetector 5.0.7__py3-none-any.whl → 5.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- api/__init__.py +0 -0
- api/batch_processing/__init__.py +0 -0
- api/batch_processing/api_core/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/__init__.py +0 -0
- api/batch_processing/api_core/batch_service/score.py +0 -1
- api/batch_processing/api_core/server_job_status_table.py +0 -1
- api/batch_processing/api_core_support/__init__.py +0 -0
- api/batch_processing/api_core_support/aggregate_results_manually.py +0 -1
- api/batch_processing/api_support/__init__.py +0 -0
- api/batch_processing/api_support/summarize_daily_activity.py +0 -1
- api/batch_processing/data_preparation/__init__.py +0 -0
- api/batch_processing/data_preparation/manage_local_batch.py +93 -79
- api/batch_processing/data_preparation/manage_video_batch.py +8 -8
- api/batch_processing/integration/digiKam/xmp_integration.py +0 -1
- api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +0 -1
- api/batch_processing/postprocessing/__init__.py +0 -0
- api/batch_processing/postprocessing/add_max_conf.py +12 -12
- api/batch_processing/postprocessing/categorize_detections_by_size.py +32 -14
- api/batch_processing/postprocessing/combine_api_outputs.py +69 -55
- api/batch_processing/postprocessing/compare_batch_results.py +114 -44
- api/batch_processing/postprocessing/convert_output_format.py +62 -19
- api/batch_processing/postprocessing/load_api_results.py +17 -20
- api/batch_processing/postprocessing/md_to_coco.py +31 -21
- api/batch_processing/postprocessing/md_to_labelme.py +165 -68
- api/batch_processing/postprocessing/merge_detections.py +40 -15
- api/batch_processing/postprocessing/postprocess_batch_results.py +270 -186
- api/batch_processing/postprocessing/remap_detection_categories.py +170 -0
- api/batch_processing/postprocessing/render_detection_confusion_matrix.py +75 -39
- api/batch_processing/postprocessing/repeat_detection_elimination/find_repeat_detections.py +53 -44
- api/batch_processing/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +25 -14
- api/batch_processing/postprocessing/repeat_detection_elimination/repeat_detections_core.py +244 -160
- api/batch_processing/postprocessing/separate_detections_into_folders.py +159 -114
- api/batch_processing/postprocessing/subset_json_detector_output.py +146 -169
- api/batch_processing/postprocessing/top_folders_to_bottom.py +77 -43
- api/synchronous/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/__init__.py +0 -0
- api/synchronous/api_core/animal_detection_api/api_backend.py +0 -2
- api/synchronous/api_core/animal_detection_api/api_frontend.py +266 -268
- api/synchronous/api_core/animal_detection_api/config.py +35 -35
- api/synchronous/api_core/tests/__init__.py +0 -0
- api/synchronous/api_core/tests/load_test.py +109 -109
- classification/__init__.py +0 -0
- classification/aggregate_classifier_probs.py +21 -24
- classification/analyze_failed_images.py +11 -13
- classification/cache_batchapi_outputs.py +51 -51
- classification/create_classification_dataset.py +69 -68
- classification/crop_detections.py +54 -53
- classification/csv_to_json.py +97 -100
- classification/detect_and_crop.py +105 -105
- classification/evaluate_model.py +43 -42
- classification/identify_mislabeled_candidates.py +47 -46
- classification/json_to_azcopy_list.py +10 -10
- classification/json_validator.py +72 -71
- classification/map_classification_categories.py +44 -43
- classification/merge_classification_detection_output.py +68 -68
- classification/prepare_classification_script.py +157 -154
- classification/prepare_classification_script_mc.py +228 -228
- classification/run_classifier.py +27 -26
- classification/save_mislabeled.py +30 -30
- classification/train_classifier.py +20 -20
- classification/train_classifier_tf.py +21 -22
- classification/train_utils.py +10 -10
- data_management/__init__.py +0 -0
- data_management/annotations/__init__.py +0 -0
- data_management/annotations/annotation_constants.py +18 -31
- data_management/camtrap_dp_to_coco.py +238 -0
- data_management/cct_json_utils.py +107 -59
- data_management/cct_to_md.py +176 -158
- data_management/cct_to_wi.py +247 -219
- data_management/coco_to_labelme.py +272 -0
- data_management/coco_to_yolo.py +86 -62
- data_management/databases/__init__.py +0 -0
- data_management/databases/add_width_and_height_to_db.py +20 -16
- data_management/databases/combine_coco_camera_traps_files.py +35 -31
- data_management/databases/integrity_check_json_db.py +130 -83
- data_management/databases/subset_json_db.py +25 -16
- data_management/generate_crops_from_cct.py +27 -45
- data_management/get_image_sizes.py +188 -144
- data_management/importers/add_nacti_sizes.py +8 -8
- data_management/importers/add_timestamps_to_icct.py +78 -78
- data_management/importers/animl_results_to_md_results.py +158 -160
- data_management/importers/auckland_doc_test_to_json.py +9 -9
- data_management/importers/auckland_doc_to_json.py +8 -8
- data_management/importers/awc_to_json.py +7 -7
- data_management/importers/bellevue_to_json.py +15 -15
- data_management/importers/cacophony-thermal-importer.py +13 -13
- data_management/importers/carrizo_shrubfree_2018.py +8 -8
- data_management/importers/carrizo_trail_cam_2017.py +8 -8
- data_management/importers/cct_field_adjustments.py +9 -9
- data_management/importers/channel_islands_to_cct.py +10 -10
- data_management/importers/eMammal/copy_and_unzip_emammal.py +1 -0
- data_management/importers/ena24_to_json.py +7 -7
- data_management/importers/filenames_to_json.py +8 -8
- data_management/importers/helena_to_cct.py +7 -7
- data_management/importers/idaho-camera-traps.py +7 -7
- data_management/importers/idfg_iwildcam_lila_prep.py +10 -10
- data_management/importers/jb_csv_to_json.py +9 -9
- data_management/importers/mcgill_to_json.py +8 -8
- data_management/importers/missouri_to_json.py +18 -18
- data_management/importers/nacti_fieldname_adjustments.py +10 -10
- data_management/importers/noaa_seals_2019.py +8 -8
- data_management/importers/pc_to_json.py +7 -7
- data_management/importers/plot_wni_giraffes.py +7 -7
- data_management/importers/prepare-noaa-fish-data-for-lila.py +359 -359
- data_management/importers/prepare_zsl_imerit.py +7 -7
- data_management/importers/rspb_to_json.py +8 -8
- data_management/importers/save_the_elephants_survey_A.py +8 -8
- data_management/importers/save_the_elephants_survey_B.py +9 -9
- data_management/importers/snapshot_safari_importer.py +26 -26
- data_management/importers/snapshot_safari_importer_reprise.py +665 -665
- data_management/importers/snapshot_serengeti_lila.py +14 -14
- data_management/importers/sulross_get_exif.py +8 -9
- data_management/importers/timelapse_csv_set_to_json.py +11 -11
- data_management/importers/ubc_to_json.py +13 -13
- data_management/importers/umn_to_json.py +7 -7
- data_management/importers/wellington_to_json.py +8 -8
- data_management/importers/wi_to_json.py +9 -9
- data_management/importers/zamba_results_to_md_results.py +181 -181
- data_management/labelme_to_coco.py +309 -159
- data_management/labelme_to_yolo.py +103 -60
- data_management/lila/__init__.py +0 -0
- data_management/lila/add_locations_to_island_camera_traps.py +9 -9
- data_management/lila/add_locations_to_nacti.py +147 -147
- data_management/lila/create_lila_blank_set.py +114 -31
- data_management/lila/create_lila_test_set.py +8 -8
- data_management/lila/create_links_to_md_results_files.py +106 -106
- data_management/lila/download_lila_subset.py +92 -90
- data_management/lila/generate_lila_per_image_labels.py +56 -43
- data_management/lila/get_lila_annotation_counts.py +18 -15
- data_management/lila/get_lila_image_counts.py +11 -11
- data_management/lila/lila_common.py +103 -70
- data_management/lila/test_lila_metadata_urls.py +132 -116
- data_management/ocr_tools.py +173 -128
- data_management/read_exif.py +161 -99
- data_management/remap_coco_categories.py +84 -0
- data_management/remove_exif.py +58 -62
- data_management/resize_coco_dataset.py +32 -44
- data_management/wi_download_csv_to_coco.py +246 -0
- data_management/yolo_output_to_md_output.py +86 -73
- data_management/yolo_to_coco.py +535 -95
- detection/__init__.py +0 -0
- detection/detector_training/__init__.py +0 -0
- detection/process_video.py +85 -33
- detection/pytorch_detector.py +43 -25
- detection/run_detector.py +157 -72
- detection/run_detector_batch.py +189 -114
- detection/run_inference_with_yolov5_val.py +118 -51
- detection/run_tiled_inference.py +113 -42
- detection/tf_detector.py +51 -28
- detection/video_utils.py +606 -521
- docs/source/conf.py +43 -0
- md_utils/__init__.py +0 -0
- md_utils/azure_utils.py +9 -9
- md_utils/ct_utils.py +249 -70
- md_utils/directory_listing.py +59 -64
- md_utils/md_tests.py +968 -862
- md_utils/path_utils.py +655 -155
- md_utils/process_utils.py +157 -133
- md_utils/sas_blob_utils.py +20 -20
- md_utils/split_locations_into_train_val.py +45 -32
- md_utils/string_utils.py +33 -10
- md_utils/url_utils.py +208 -27
- md_utils/write_html_image_list.py +51 -35
- md_visualization/__init__.py +0 -0
- md_visualization/plot_utils.py +102 -109
- md_visualization/render_images_with_thumbnails.py +34 -34
- md_visualization/visualization_utils.py +908 -311
- md_visualization/visualize_db.py +109 -58
- md_visualization/visualize_detector_output.py +61 -42
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/METADATA +21 -17
- megadetector-5.0.9.dist-info/RECORD +224 -0
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/WHEEL +1 -1
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/top_level.txt +1 -0
- taxonomy_mapping/__init__.py +0 -0
- taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +342 -335
- taxonomy_mapping/map_new_lila_datasets.py +154 -154
- taxonomy_mapping/prepare_lila_taxonomy_release.py +142 -134
- taxonomy_mapping/preview_lila_taxonomy.py +591 -591
- taxonomy_mapping/retrieve_sample_image.py +12 -12
- taxonomy_mapping/simple_image_download.py +11 -11
- taxonomy_mapping/species_lookup.py +10 -10
- taxonomy_mapping/taxonomy_csv_checker.py +18 -18
- taxonomy_mapping/taxonomy_graph.py +47 -47
- taxonomy_mapping/validate_lila_category_mappings.py +83 -76
- data_management/cct_json_to_filename_json.py +0 -89
- data_management/cct_to_csv.py +0 -140
- data_management/databases/remove_corrupted_images_from_db.py +0 -191
- detection/detector_training/copy_checkpoints.py +0 -43
- md_visualization/visualize_megadb.py +0 -183
- megadetector-5.0.7.dist-info/RECORD +0 -202
- {megadetector-5.0.7.dist-info → megadetector-5.0.9.dist-info}/LICENSE +0 -0
md_utils/process_utils.py
CHANGED
|
@@ -1,133 +1,157 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
#%% Constants, imports, and environment
|
|
14
|
-
|
|
15
|
-
import os
|
|
16
|
-
import subprocess
|
|
17
|
-
|
|
18
|
-
os.environ["PYTHONUNBUFFERED"] = "1"
|
|
19
|
-
|
|
20
|
-
def execute(cmd,encoding=None,errors=None,env=None,verbose=False):
|
|
21
|
-
"""
|
|
22
|
-
Run [cmd] (a single string) in a shell, yielding each line of output to the caller.
|
|
23
|
-
|
|
24
|
-
The "encoding", "errors", and "env" parameters are passed directly to subprocess.Popen().
|
|
25
|
-
|
|
26
|
-
"verbose" only impacts output about process management, it is not related to printing
|
|
27
|
-
output from the child process.
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
process_utils.py
|
|
4
|
+
|
|
5
|
+
Run something at the command line and capture the output, based on:
|
|
6
|
+
|
|
7
|
+
https://stackoverflow.com/questions/4417546/constantly-print-subprocess-output-while-process-is-running
|
|
8
|
+
|
|
9
|
+
Includes handy example code for doing this on multiple processes/threads.
|
|
10
|
+
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
#%% Constants, imports, and environment
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
import subprocess
|
|
17
|
+
|
|
18
|
+
os.environ["PYTHONUNBUFFERED"] = "1"
|
|
19
|
+
|
|
20
|
+
def execute(cmd,encoding=None,errors=None,env=None,verbose=False):
|
|
21
|
+
"""
|
|
22
|
+
Run [cmd] (a single string) in a shell, yielding each line of output to the caller.
|
|
23
|
+
|
|
24
|
+
The "encoding", "errors", and "env" parameters are passed directly to subprocess.Popen().
|
|
25
|
+
|
|
26
|
+
"verbose" only impacts output about process management, it is not related to printing
|
|
27
|
+
output from the child process.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
cmd (str): command to run
|
|
31
|
+
encoding (str, optional): stdout encoding, see Popen() documentation
|
|
32
|
+
errors (str, optional): error handling, see Popen() documentation
|
|
33
|
+
env (dict, optional): environment variables, see Popen() documentation
|
|
34
|
+
verbose (bool, optional): enable additional debug console output
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
int: the command's return code, always zero, otherwise a CalledProcessError is raised
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
if verbose:
|
|
41
|
+
if encoding is not None:
|
|
42
|
+
print('Launching child process with non-default encoding {}'.format(encoding))
|
|
43
|
+
if errors is not None:
|
|
44
|
+
print('Launching child process with non-default text error handling {}'.format(errors))
|
|
45
|
+
if env is not None:
|
|
46
|
+
print('Launching child process with non-default environment {}'.format(str(env)))
|
|
47
|
+
|
|
48
|
+
# https://stackoverflow.com/questions/4417546/constantly-print-subprocess-output-while-process-is-running
|
|
49
|
+
popen = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
|
|
50
|
+
shell=True, universal_newlines=True, encoding=encoding,
|
|
51
|
+
errors=errors, env=env)
|
|
52
|
+
for stdout_line in iter(popen.stdout.readline, ""):
|
|
53
|
+
yield stdout_line
|
|
54
|
+
popen.stdout.close()
|
|
55
|
+
return_code = popen.wait()
|
|
56
|
+
if return_code:
|
|
57
|
+
raise subprocess.CalledProcessError(return_code, cmd)
|
|
58
|
+
|
|
59
|
+
return return_code
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def execute_and_print(cmd,print_output=True,encoding=None,errors=None,env=None,verbose=False):
|
|
63
|
+
"""
|
|
64
|
+
Run [cmd] (a single string) in a shell, capturing and printing output. Returns
|
|
65
|
+
a dictionary with fields "status" and "output".
|
|
66
|
+
|
|
67
|
+
The "encoding", "errors", and "env" parameters are passed directly to subprocess.Popen().
|
|
68
|
+
|
|
69
|
+
"verbose" only impacts output about process management, it is not related to printing
|
|
70
|
+
output from the child process.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
cmd (str): command to run
|
|
74
|
+
print_output (bool, optional): whether to print output from [cmd]
|
|
75
|
+
encoding (str, optional): stdout encoding, see Popen() documentation
|
|
76
|
+
errors (str, optional): error handling, see Popen() documentation
|
|
77
|
+
env (dict, optional): environment variables, see Popen() documentation
|
|
78
|
+
verbose (bool, optional): enable additional debug console output
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
dict: a dictionary with fields "status" (the process return code) and "output"
|
|
82
|
+
(the content of stdout)
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
to_return = {'status':'unknown','output':''}
|
|
86
|
+
output = []
|
|
87
|
+
try:
|
|
88
|
+
for s in execute(cmd,encoding=encoding,errors=errors,env=env,verbose=verbose):
|
|
89
|
+
output.append(s)
|
|
90
|
+
if print_output:
|
|
91
|
+
print(s,end='',flush=True)
|
|
92
|
+
to_return['status'] = 0
|
|
93
|
+
except subprocess.CalledProcessError as cpe:
|
|
94
|
+
print('execute_and_print caught error: {} ({})'.format(cpe.output,str(cpe)))
|
|
95
|
+
to_return['status'] = cpe.returncode
|
|
96
|
+
to_return['output'] = output
|
|
97
|
+
|
|
98
|
+
return to_return
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
#%% Single-threaded test driver for execute_and_print
|
|
102
|
+
|
|
103
|
+
if False:
|
|
104
|
+
|
|
105
|
+
pass
|
|
106
|
+
|
|
107
|
+
#%%
|
|
108
|
+
|
|
109
|
+
if os.name == 'nt':
|
|
110
|
+
execute_and_print('echo hello && ping -n 5 127.0.0.1 && echo goodbye')
|
|
111
|
+
else:
|
|
112
|
+
execute_and_print('echo hello && sleep 1 && echo goodbye')
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
#%% Parallel test driver for execute_and_print
|
|
116
|
+
|
|
117
|
+
if False:
|
|
118
|
+
|
|
119
|
+
pass
|
|
120
|
+
|
|
121
|
+
#%%
|
|
122
|
+
|
|
123
|
+
from functools import partial
|
|
124
|
+
from multiprocessing.pool import ThreadPool as ThreadPool
|
|
125
|
+
from multiprocessing.pool import Pool as Pool
|
|
126
|
+
|
|
127
|
+
n_workers = 10
|
|
128
|
+
|
|
129
|
+
# Should we use threads (vs. processes) for parallelization?
|
|
130
|
+
use_threads = True
|
|
131
|
+
|
|
132
|
+
test_data = ['a','b','c','d']
|
|
133
|
+
|
|
134
|
+
def process_sample(s):
|
|
135
|
+
return execute_and_print('echo ' + s,True)
|
|
136
|
+
|
|
137
|
+
if n_workers == 1:
|
|
138
|
+
|
|
139
|
+
results = []
|
|
140
|
+
for i_sample,sample in enumerate(test_data):
|
|
141
|
+
results.append(process_sample(sample))
|
|
142
|
+
|
|
143
|
+
else:
|
|
144
|
+
|
|
145
|
+
n_threads = min(n_workers,len(test_data))
|
|
146
|
+
|
|
147
|
+
if use_threads:
|
|
148
|
+
print('Starting parallel thread pool with {} workers'.format(n_threads))
|
|
149
|
+
pool = ThreadPool(n_threads)
|
|
150
|
+
else:
|
|
151
|
+
print('Starting parallel process pool with {} workers'.format(n_threads))
|
|
152
|
+
pool = Pool(n_threads)
|
|
153
|
+
|
|
154
|
+
results = list(pool.map(partial(process_sample),test_data))
|
|
155
|
+
|
|
156
|
+
for r in results:
|
|
157
|
+
print(r)
|
md_utils/sas_blob_utils.py
CHANGED
|
@@ -1,23 +1,23 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
sas_blob_utils.py
|
|
4
|
+
|
|
5
|
+
This module contains helper functions for dealing with Shared Access Signatures
|
|
6
|
+
(SAS) tokens for Azure Blob Storage.
|
|
7
|
+
|
|
8
|
+
The default Azure Storage SAS URI format is:
|
|
9
|
+
|
|
10
|
+
https://<account>.blob.core.windows.net/<container>/<blob>?<sas_token>
|
|
11
|
+
|
|
12
|
+
This module assumes azure-storage-blob version 12.5.
|
|
13
|
+
|
|
14
|
+
Documentation for Azure Blob Storage:
|
|
15
|
+
docs.microsoft.com/en-us/azure/developer/python/sdk/storage/storage-blob-readme
|
|
16
|
+
|
|
17
|
+
Documentation for SAS:
|
|
18
|
+
docs.microsoft.com/en-us/azure/storage/common/storage-sas-overview
|
|
19
|
+
|
|
20
|
+
"""
|
|
21
21
|
|
|
22
22
|
#%% Imports
|
|
23
23
|
|
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
split_locations_into_train_val.py
|
|
4
|
+
|
|
5
|
+
Splits a list of location IDs into training and validation, targeting a specific
|
|
6
|
+
train/val split for each category, but allowing some categories to be tighter or looser
|
|
7
|
+
than others. Does nothing particularly clever, just randomly splits locations into
|
|
8
|
+
train/val lots of times using the target val fraction, and picks the one that meets the
|
|
9
|
+
specified constraints and minimizes weighted error, where "error" is defined as the
|
|
10
|
+
sum of each class's absolute divergence from the target val fraction.
|
|
11
|
+
|
|
12
|
+
"""
|
|
13
13
|
|
|
14
14
|
#%% Imports/constants
|
|
15
15
|
|
|
@@ -30,31 +30,44 @@ def split_locations_into_train_val(location_to_category_counts,
|
|
|
30
30
|
category_to_error_weight=None,
|
|
31
31
|
default_max_allowable_error=0.1):
|
|
32
32
|
"""
|
|
33
|
-
|
|
33
|
+
Splits a list of location IDs into training and validation, targeting a specific
|
|
34
34
|
train/val split for each category, but allowing some categories to be tighter or looser
|
|
35
35
|
than others. Does nothing particularly clever, just randomly splits locations into
|
|
36
36
|
train/val lots of times using the target val fraction, and picks the one that meets the
|
|
37
37
|
specified constraints and minimizes weighted error, where "error" is defined as the
|
|
38
38
|
sum of each class's absolute divergence from the target val fraction.
|
|
39
39
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
40
|
+
Args:
|
|
41
|
+
location_to_category_counts (dict): a dict mapping location IDs to dicts,
|
|
42
|
+
with each dict mapping a category name to a count. Any categories not present
|
|
43
|
+
in a particular dict are assumed to have a count of zero for that location.
|
|
44
|
+
|
|
45
|
+
For example:
|
|
46
|
+
|
|
47
|
+
.. code-block:: none
|
|
48
|
+
|
|
49
|
+
{'location-000': {'bear':4,'wolf':10},
|
|
50
|
+
'location-001': {'bear':12,'elk':20}}
|
|
51
|
+
|
|
52
|
+
n_random_seeds (int, optional): number of random seeds to try, always starting from zero
|
|
53
|
+
target_val_fraction (float, optional): fraction of images containing each species we'd
|
|
54
|
+
like to put in the val split
|
|
55
|
+
category_to_max_allowable_error (dict, optional): a dict mapping category names
|
|
56
|
+
to maximum allowable errors. These are hard constraints (i.e., we will error
|
|
57
|
+
if we can't meet them). Does not need to include all categories; categories not
|
|
58
|
+
included will be assigned a maximum error according to [default_max_allowable_error].
|
|
59
|
+
If this is None, no hard constraints are applied.
|
|
60
|
+
category_to_error_weight (dict, optional): a dict mapping category names to
|
|
61
|
+
error weights. You can specify a subset of categories; categories not included here
|
|
62
|
+
have a weight of 1.0. If None, all categories have the same weight.
|
|
63
|
+
default_max_allowable_error (float, optional): the maximum allowable error for categories not
|
|
64
|
+
present in [category_to_max_allowable_error]. Set to None (or >= 1.0) to disable hard
|
|
65
|
+
constraints for categories not present in [category_to_max_allowable_error]
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
tuple: A two-element tuple:
|
|
69
|
+
- list of location IDs in the val split
|
|
70
|
+
- a dict mapping category names to the fraction of images in the val split
|
|
58
71
|
"""
|
|
59
72
|
|
|
60
73
|
location_ids = list(location_to_category_counts.keys())
|
|
@@ -84,7 +97,7 @@ def split_locations_into_train_val(location_to_category_counts,
|
|
|
84
97
|
# random_seed = 0
|
|
85
98
|
def compute_seed_errors(random_seed):
|
|
86
99
|
"""
|
|
87
|
-
|
|
100
|
+
Computes the per-category error for a specific random seed.
|
|
88
101
|
|
|
89
102
|
returns weighted_average_error,category_to_val_fraction
|
|
90
103
|
"""
|
md_utils/string_utils.py
CHANGED
|
@@ -1,16 +1,27 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
1
|
+
"""
|
|
2
|
+
|
|
3
|
+
string_utils.py
|
|
4
|
+
|
|
5
|
+
Miscellaneous string utilities.
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
#%% Imports
|
|
8
10
|
|
|
9
11
|
import re
|
|
10
12
|
|
|
13
|
+
|
|
14
|
+
#%% Functions
|
|
15
|
+
|
|
11
16
|
def is_float(s):
|
|
12
17
|
"""
|
|
13
|
-
Checks whether a string
|
|
18
|
+
Checks whether [s] is an object (typically a string) that can be cast to a float
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
s (object): object to evaluate
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
bool: True if s successfully casts to a float, otherwise False
|
|
14
25
|
"""
|
|
15
26
|
|
|
16
27
|
try:
|
|
@@ -23,10 +34,16 @@ def is_float(s):
|
|
|
23
34
|
def human_readable_to_bytes(size):
|
|
24
35
|
"""
|
|
25
36
|
Given a human-readable byte string (e.g. 2G, 10GB, 30MB, 20KB),
|
|
26
|
-
|
|
37
|
+
returns the number of bytes. Will return 0 if the argument has
|
|
27
38
|
unexpected form.
|
|
28
39
|
|
|
29
40
|
https://gist.github.com/beugley/ccd69945346759eb6142272a6d69b4e0
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
size (str): string representing a size
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
int: the corresponding size in bytes
|
|
30
47
|
"""
|
|
31
48
|
|
|
32
49
|
size = re.sub(r'\s+', '', size)
|
|
@@ -61,9 +78,15 @@ def human_readable_to_bytes(size):
|
|
|
61
78
|
|
|
62
79
|
def remove_ansi_codes(s):
|
|
63
80
|
"""
|
|
64
|
-
|
|
81
|
+
Removes ANSI escape codes from a string.
|
|
65
82
|
|
|
66
83
|
https://stackoverflow.com/questions/14693701/how-can-i-remove-the-ansi-escape-sequences-from-a-string-in-python#14693789
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
s (str): the string to de-ANSI-i-fy
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
str: A copy of [s] without ANSI codes
|
|
67
90
|
"""
|
|
68
91
|
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
|
|
69
92
|
return ansi_escape.sub('', s)
|