megadetector 5.0.28__py3-none-any.whl → 5.0.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megadetector might be problematic. Click here for more details.
- megadetector/api/batch_processing/api_core/batch_service/score.py +4 -5
- megadetector/api/batch_processing/api_core_support/aggregate_results_manually.py +1 -1
- megadetector/api/batch_processing/api_support/summarize_daily_activity.py +1 -1
- megadetector/api/batch_processing/integration/digiKam/xmp_integration.py +2 -2
- megadetector/api/batch_processing/integration/eMammal/test_scripts/push_annotations_to_emammal.py +1 -1
- megadetector/api/batch_processing/integration/eMammal/test_scripts/select_images_for_testing.py +1 -1
- megadetector/api/synchronous/api_core/tests/load_test.py +2 -3
- megadetector/classification/aggregate_classifier_probs.py +3 -3
- megadetector/classification/analyze_failed_images.py +5 -5
- megadetector/classification/cache_batchapi_outputs.py +5 -5
- megadetector/classification/create_classification_dataset.py +11 -12
- megadetector/classification/crop_detections.py +10 -10
- megadetector/classification/csv_to_json.py +8 -8
- megadetector/classification/detect_and_crop.py +13 -15
- megadetector/classification/evaluate_model.py +7 -7
- megadetector/classification/identify_mislabeled_candidates.py +6 -6
- megadetector/classification/json_to_azcopy_list.py +1 -1
- megadetector/classification/json_validator.py +29 -32
- megadetector/classification/map_classification_categories.py +9 -9
- megadetector/classification/merge_classification_detection_output.py +12 -9
- megadetector/classification/prepare_classification_script.py +19 -19
- megadetector/classification/prepare_classification_script_mc.py +23 -23
- megadetector/classification/run_classifier.py +4 -4
- megadetector/classification/save_mislabeled.py +6 -6
- megadetector/classification/train_classifier.py +1 -1
- megadetector/classification/train_classifier_tf.py +9 -9
- megadetector/classification/train_utils.py +10 -10
- megadetector/data_management/annotations/annotation_constants.py +1 -1
- megadetector/data_management/camtrap_dp_to_coco.py +45 -45
- megadetector/data_management/cct_json_utils.py +101 -101
- megadetector/data_management/cct_to_md.py +49 -49
- megadetector/data_management/cct_to_wi.py +33 -33
- megadetector/data_management/coco_to_labelme.py +75 -75
- megadetector/data_management/coco_to_yolo.py +189 -189
- megadetector/data_management/databases/add_width_and_height_to_db.py +3 -2
- megadetector/data_management/databases/combine_coco_camera_traps_files.py +38 -38
- megadetector/data_management/databases/integrity_check_json_db.py +202 -188
- megadetector/data_management/databases/subset_json_db.py +33 -33
- megadetector/data_management/generate_crops_from_cct.py +38 -38
- megadetector/data_management/get_image_sizes.py +54 -49
- megadetector/data_management/labelme_to_coco.py +130 -124
- megadetector/data_management/labelme_to_yolo.py +78 -72
- megadetector/data_management/lila/create_lila_blank_set.py +81 -83
- megadetector/data_management/lila/create_lila_test_set.py +32 -31
- megadetector/data_management/lila/create_links_to_md_results_files.py +18 -18
- megadetector/data_management/lila/download_lila_subset.py +21 -24
- megadetector/data_management/lila/generate_lila_per_image_labels.py +91 -91
- megadetector/data_management/lila/get_lila_annotation_counts.py +30 -30
- megadetector/data_management/lila/get_lila_image_counts.py +22 -22
- megadetector/data_management/lila/lila_common.py +70 -70
- megadetector/data_management/lila/test_lila_metadata_urls.py +13 -14
- megadetector/data_management/mewc_to_md.py +339 -340
- megadetector/data_management/ocr_tools.py +258 -252
- megadetector/data_management/read_exif.py +231 -224
- megadetector/data_management/remap_coco_categories.py +26 -26
- megadetector/data_management/remove_exif.py +31 -20
- megadetector/data_management/rename_images.py +187 -187
- megadetector/data_management/resize_coco_dataset.py +41 -41
- megadetector/data_management/speciesnet_to_md.py +41 -41
- megadetector/data_management/wi_download_csv_to_coco.py +55 -55
- megadetector/data_management/yolo_output_to_md_output.py +117 -120
- megadetector/data_management/yolo_to_coco.py +195 -188
- megadetector/detection/change_detection.py +831 -0
- megadetector/detection/process_video.py +340 -337
- megadetector/detection/pytorch_detector.py +304 -262
- megadetector/detection/run_detector.py +177 -164
- megadetector/detection/run_detector_batch.py +364 -363
- megadetector/detection/run_inference_with_yolov5_val.py +328 -325
- megadetector/detection/run_tiled_inference.py +256 -249
- megadetector/detection/tf_detector.py +24 -24
- megadetector/detection/video_utils.py +290 -282
- megadetector/postprocessing/add_max_conf.py +15 -11
- megadetector/postprocessing/categorize_detections_by_size.py +44 -44
- megadetector/postprocessing/classification_postprocessing.py +415 -415
- megadetector/postprocessing/combine_batch_outputs.py +20 -21
- megadetector/postprocessing/compare_batch_results.py +528 -517
- megadetector/postprocessing/convert_output_format.py +97 -97
- megadetector/postprocessing/create_crop_folder.py +219 -146
- megadetector/postprocessing/detector_calibration.py +173 -168
- megadetector/postprocessing/generate_csv_report.py +508 -499
- megadetector/postprocessing/load_api_results.py +23 -20
- megadetector/postprocessing/md_to_coco.py +129 -98
- megadetector/postprocessing/md_to_labelme.py +89 -83
- megadetector/postprocessing/md_to_wi.py +40 -40
- megadetector/postprocessing/merge_detections.py +87 -114
- megadetector/postprocessing/postprocess_batch_results.py +313 -298
- megadetector/postprocessing/remap_detection_categories.py +36 -36
- megadetector/postprocessing/render_detection_confusion_matrix.py +205 -199
- megadetector/postprocessing/repeat_detection_elimination/find_repeat_detections.py +57 -57
- megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +27 -28
- megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +702 -677
- megadetector/postprocessing/separate_detections_into_folders.py +226 -211
- megadetector/postprocessing/subset_json_detector_output.py +265 -262
- megadetector/postprocessing/top_folders_to_bottom.py +45 -45
- megadetector/postprocessing/validate_batch_results.py +70 -70
- megadetector/taxonomy_mapping/map_lila_taxonomy_to_wi_taxonomy.py +52 -52
- megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -15
- megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +14 -14
- megadetector/taxonomy_mapping/preview_lila_taxonomy.py +66 -66
- megadetector/taxonomy_mapping/retrieve_sample_image.py +16 -16
- megadetector/taxonomy_mapping/simple_image_download.py +8 -8
- megadetector/taxonomy_mapping/species_lookup.py +33 -33
- megadetector/taxonomy_mapping/taxonomy_csv_checker.py +14 -14
- megadetector/taxonomy_mapping/taxonomy_graph.py +10 -10
- megadetector/taxonomy_mapping/validate_lila_category_mappings.py +13 -13
- megadetector/utils/azure_utils.py +22 -22
- megadetector/utils/ct_utils.py +1018 -200
- megadetector/utils/directory_listing.py +21 -77
- megadetector/utils/gpu_test.py +22 -22
- megadetector/utils/md_tests.py +541 -518
- megadetector/utils/path_utils.py +1457 -398
- megadetector/utils/process_utils.py +41 -41
- megadetector/utils/sas_blob_utils.py +53 -49
- megadetector/utils/split_locations_into_train_val.py +61 -61
- megadetector/utils/string_utils.py +147 -26
- megadetector/utils/url_utils.py +463 -173
- megadetector/utils/wi_utils.py +2629 -2526
- megadetector/utils/write_html_image_list.py +137 -137
- megadetector/visualization/plot_utils.py +21 -21
- megadetector/visualization/render_images_with_thumbnails.py +37 -73
- megadetector/visualization/visualization_utils.py +401 -397
- megadetector/visualization/visualize_db.py +197 -190
- megadetector/visualization/visualize_detector_output.py +79 -73
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/METADATA +135 -132
- megadetector-5.0.29.dist-info/RECORD +163 -0
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/WHEEL +1 -1
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/licenses/LICENSE +0 -0
- {megadetector-5.0.28.dist-info → megadetector-5.0.29.dist-info}/top_level.txt +0 -0
- megadetector/data_management/importers/add_nacti_sizes.py +0 -52
- megadetector/data_management/importers/add_timestamps_to_icct.py +0 -79
- megadetector/data_management/importers/animl_results_to_md_results.py +0 -158
- megadetector/data_management/importers/auckland_doc_test_to_json.py +0 -373
- megadetector/data_management/importers/auckland_doc_to_json.py +0 -201
- megadetector/data_management/importers/awc_to_json.py +0 -191
- megadetector/data_management/importers/bellevue_to_json.py +0 -272
- megadetector/data_management/importers/cacophony-thermal-importer.py +0 -793
- megadetector/data_management/importers/carrizo_shrubfree_2018.py +0 -269
- megadetector/data_management/importers/carrizo_trail_cam_2017.py +0 -289
- megadetector/data_management/importers/cct_field_adjustments.py +0 -58
- megadetector/data_management/importers/channel_islands_to_cct.py +0 -913
- megadetector/data_management/importers/eMammal/copy_and_unzip_emammal.py +0 -180
- megadetector/data_management/importers/eMammal/eMammal_helpers.py +0 -249
- megadetector/data_management/importers/eMammal/make_eMammal_json.py +0 -223
- megadetector/data_management/importers/ena24_to_json.py +0 -276
- megadetector/data_management/importers/filenames_to_json.py +0 -386
- megadetector/data_management/importers/helena_to_cct.py +0 -283
- megadetector/data_management/importers/idaho-camera-traps.py +0 -1407
- megadetector/data_management/importers/idfg_iwildcam_lila_prep.py +0 -294
- megadetector/data_management/importers/import_desert_lion_conservation_camera_traps.py +0 -387
- megadetector/data_management/importers/jb_csv_to_json.py +0 -150
- megadetector/data_management/importers/mcgill_to_json.py +0 -250
- megadetector/data_management/importers/missouri_to_json.py +0 -490
- megadetector/data_management/importers/nacti_fieldname_adjustments.py +0 -79
- megadetector/data_management/importers/noaa_seals_2019.py +0 -181
- megadetector/data_management/importers/osu-small-animals-to-json.py +0 -364
- megadetector/data_management/importers/pc_to_json.py +0 -365
- megadetector/data_management/importers/plot_wni_giraffes.py +0 -123
- megadetector/data_management/importers/prepare_zsl_imerit.py +0 -131
- megadetector/data_management/importers/raic_csv_to_md_results.py +0 -416
- megadetector/data_management/importers/rspb_to_json.py +0 -356
- megadetector/data_management/importers/save_the_elephants_survey_A.py +0 -320
- megadetector/data_management/importers/save_the_elephants_survey_B.py +0 -329
- megadetector/data_management/importers/snapshot_safari_importer.py +0 -758
- megadetector/data_management/importers/snapshot_serengeti_lila.py +0 -1067
- megadetector/data_management/importers/snapshotserengeti/make_full_SS_json.py +0 -150
- megadetector/data_management/importers/snapshotserengeti/make_per_season_SS_json.py +0 -153
- megadetector/data_management/importers/sulross_get_exif.py +0 -65
- megadetector/data_management/importers/timelapse_csv_set_to_json.py +0 -490
- megadetector/data_management/importers/ubc_to_json.py +0 -399
- megadetector/data_management/importers/umn_to_json.py +0 -507
- megadetector/data_management/importers/wellington_to_json.py +0 -263
- megadetector/data_management/importers/wi_to_json.py +0 -442
- megadetector/data_management/importers/zamba_results_to_md_results.py +0 -180
- megadetector/data_management/lila/add_locations_to_island_camera_traps.py +0 -101
- megadetector/data_management/lila/add_locations_to_nacti.py +0 -151
- megadetector-5.0.28.dist-info/RECORD +0 -209
|
@@ -36,7 +36,7 @@ def check_taxonomy_csv(csv_path: str) -> None:
|
|
|
36
36
|
"""
|
|
37
37
|
See module docstring.
|
|
38
38
|
"""
|
|
39
|
-
|
|
39
|
+
|
|
40
40
|
taxonomy_df = pd.read_csv(csv_path)
|
|
41
41
|
|
|
42
42
|
graph = nx.DiGraph()
|
|
@@ -46,12 +46,12 @@ def check_taxonomy_csv(csv_path: str) -> None:
|
|
|
46
46
|
num_scientific_name_errors = 0
|
|
47
47
|
|
|
48
48
|
for i_row, row in taxonomy_df.iterrows():
|
|
49
|
-
|
|
49
|
+
|
|
50
50
|
ds = row['dataset_name']
|
|
51
51
|
ds_label = row['query']
|
|
52
52
|
scientific_name = row['scientific_name']
|
|
53
53
|
level = row['taxonomy_level']
|
|
54
|
-
|
|
54
|
+
|
|
55
55
|
# This used to represent the source of the mapping: iNat, gbif, or manual. We've
|
|
56
56
|
# stopped tracking this, so this is now vestigial.
|
|
57
57
|
id_source = 0 # row['source']
|
|
@@ -95,8 +95,8 @@ def check_taxonomy_csv(csv_path: str) -> None:
|
|
|
95
95
|
num_scientific_name_errors += 1
|
|
96
96
|
|
|
97
97
|
taxon_child = node
|
|
98
|
-
|
|
99
|
-
# ...for each row in the taxonomy file
|
|
98
|
+
|
|
99
|
+
# ...for each row in the taxonomy file
|
|
100
100
|
|
|
101
101
|
assert nx.is_directed_acyclic_graph(graph)
|
|
102
102
|
|
|
@@ -124,36 +124,36 @@ def check_taxonomy_csv(csv_path: str) -> None:
|
|
|
124
124
|
print(f'At least one node has unresolved ambiguous parents: {e}')
|
|
125
125
|
|
|
126
126
|
print('Processed {} rows from {}'.format(len(taxonomy_df),csv_path))
|
|
127
|
-
|
|
127
|
+
|
|
128
128
|
print('num taxon level errors:', num_taxon_level_errors)
|
|
129
129
|
print('num scientific name errors:', num_scientific_name_errors)
|
|
130
130
|
|
|
131
131
|
|
|
132
132
|
#%% Command-line driver
|
|
133
|
-
|
|
133
|
+
|
|
134
134
|
if __name__ == '__main__':
|
|
135
|
-
|
|
135
|
+
|
|
136
136
|
parser = argparse.ArgumentParser()
|
|
137
137
|
parser.add_argument(
|
|
138
138
|
'taxonomy_csv_path',
|
|
139
139
|
help='path to taxonomy CSV file')
|
|
140
|
-
|
|
140
|
+
|
|
141
141
|
if len(sys.argv[1:]) == 0:
|
|
142
142
|
parser.print_help()
|
|
143
143
|
parser.exit()
|
|
144
|
-
|
|
144
|
+
|
|
145
145
|
args = parser.parse_args()
|
|
146
146
|
|
|
147
147
|
check_taxonomy_csv(args.taxonomy_csv_path)
|
|
148
148
|
|
|
149
149
|
|
|
150
150
|
#%% Interactive driver
|
|
151
|
-
|
|
151
|
+
|
|
152
152
|
if False:
|
|
153
|
-
|
|
153
|
+
|
|
154
154
|
#%%
|
|
155
|
-
|
|
155
|
+
|
|
156
156
|
import os
|
|
157
157
|
csv_path = os.path.expanduser('~/lila/lila-taxonomy-mapping_release.csv')
|
|
158
158
|
check_taxonomy_csv(csv_path)
|
|
159
|
-
|
|
159
|
+
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""
|
|
1
|
+
r"""
|
|
2
2
|
|
|
3
3
|
taxonomy_graph.py
|
|
4
4
|
|
|
@@ -69,7 +69,7 @@ class TaxonNode:
|
|
|
69
69
|
By default, we support multiple parents for each TaxonNode. See discussion
|
|
70
70
|
in module docstring above.
|
|
71
71
|
"""
|
|
72
|
-
|
|
72
|
+
|
|
73
73
|
# class variables
|
|
74
74
|
single_parent_only: ClassVar[bool] = False
|
|
75
75
|
|
|
@@ -82,7 +82,7 @@ class TaxonNode:
|
|
|
82
82
|
|
|
83
83
|
def __init__(self, level: str, name: str,
|
|
84
84
|
graph: Optional[nx.DiGraph] = None):
|
|
85
|
-
|
|
85
|
+
|
|
86
86
|
self.level = level
|
|
87
87
|
self.name = name
|
|
88
88
|
self.graph = graph
|
|
@@ -131,7 +131,7 @@ class TaxonNode:
|
|
|
131
131
|
Args:
|
|
132
132
|
parent: TaxonNode, must be higher in the taxonomical hierarchy
|
|
133
133
|
"""
|
|
134
|
-
|
|
134
|
+
|
|
135
135
|
assert self.graph is not None
|
|
136
136
|
parents = self.parents
|
|
137
137
|
if TaxonNode.single_parent_only and len(parents) > 0:
|
|
@@ -150,7 +150,7 @@ class TaxonNode:
|
|
|
150
150
|
Args:
|
|
151
151
|
child: TaxonNode, must be lower in the taxonomical hierarchy
|
|
152
152
|
"""
|
|
153
|
-
|
|
153
|
+
|
|
154
154
|
assert self.graph is not None
|
|
155
155
|
self.graph.add_edge(self, child)
|
|
156
156
|
|
|
@@ -160,7 +160,7 @@ class TaxonNode:
|
|
|
160
160
|
ds: str, name of dataset
|
|
161
161
|
ds_label: str, name of label used by that dataset
|
|
162
162
|
"""
|
|
163
|
-
|
|
163
|
+
|
|
164
164
|
self.dataset_labels.add((ds, ds_label))
|
|
165
165
|
|
|
166
166
|
def get_dataset_labels(self,
|
|
@@ -176,7 +176,7 @@ class TaxonNode:
|
|
|
176
176
|
|
|
177
177
|
Returns: set of (ds, ds_label) tuples
|
|
178
178
|
"""
|
|
179
|
-
|
|
179
|
+
|
|
180
180
|
result = self.dataset_labels
|
|
181
181
|
if include_datasets is not None:
|
|
182
182
|
result = set(tup for tup in result if tup[0] in include_datasets)
|
|
@@ -199,7 +199,7 @@ class TaxonNode:
|
|
|
199
199
|
|
|
200
200
|
Returns: TaxonNode, the LCA if it exists, or None if no LCA exists
|
|
201
201
|
"""
|
|
202
|
-
|
|
202
|
+
|
|
203
203
|
paths = []
|
|
204
204
|
for node in nodes:
|
|
205
205
|
# get path to root
|
|
@@ -242,7 +242,7 @@ def build_taxonomy_graph(taxonomy_df: pd.DataFrame
|
|
|
242
242
|
TaxonNode node in the tree that contains the label,
|
|
243
243
|
keys are all lowercase
|
|
244
244
|
"""
|
|
245
|
-
|
|
245
|
+
|
|
246
246
|
graph = nx.DiGraph()
|
|
247
247
|
taxon_to_node = {} # maps (taxon_level, taxon_name) to a TaxonNode
|
|
248
248
|
label_to_node = {} # maps (dataset_name, dataset_label) to a TaxonNode
|
|
@@ -308,7 +308,7 @@ def dag_to_tree(graph: nx.DiGraph,
|
|
|
308
308
|
|
|
309
309
|
Returns: nx.DiGraph, a tree-structured graph
|
|
310
310
|
"""
|
|
311
|
-
|
|
311
|
+
|
|
312
312
|
tree = nx.DiGraph()
|
|
313
313
|
for node in graph.nodes:
|
|
314
314
|
tree.add_node(node)
|
|
@@ -17,9 +17,9 @@ from megadetector.data_management.lila.lila_common import read_lila_taxonomy_map
|
|
|
17
17
|
#%% Prevent execution during infrastructural imports
|
|
18
18
|
|
|
19
19
|
if False:
|
|
20
|
-
|
|
20
|
+
|
|
21
21
|
#%% Constants
|
|
22
|
-
|
|
22
|
+
|
|
23
23
|
lila_local_base = os.path.expanduser('~/lila')
|
|
24
24
|
|
|
25
25
|
metadata_dir = os.path.join(lila_local_base,'metadata')
|
|
@@ -30,7 +30,7 @@ if False:
|
|
|
30
30
|
lila_dataset_to_categories_file = os.path.join(category_list_dir,'lila_dataset_to_categories.json')
|
|
31
31
|
|
|
32
32
|
assert os.path.isfile(lila_dataset_to_categories_file)
|
|
33
|
-
|
|
33
|
+
|
|
34
34
|
|
|
35
35
|
#%% Load category and taxonomy files
|
|
36
36
|
|
|
@@ -48,36 +48,36 @@ if False:
|
|
|
48
48
|
|
|
49
49
|
# i_row = 1; row = taxonomy_df.iloc[i_row]; row
|
|
50
50
|
for i_row,row in taxonomy_df.iterrows():
|
|
51
|
-
|
|
51
|
+
|
|
52
52
|
ds_query = row['dataset_name'] + ':' + row['query']
|
|
53
53
|
ds_query = ds_query.lower()
|
|
54
|
-
|
|
54
|
+
|
|
55
55
|
if not isinstance(row['scientific_name'],str):
|
|
56
56
|
unmapped_queries.add(ds_query)
|
|
57
57
|
ds_query_to_scientific_name[ds_query] = 'unmapped'
|
|
58
58
|
continue
|
|
59
|
-
|
|
59
|
+
|
|
60
60
|
ds_query_to_scientific_name[ds_query] = row['scientific_name']
|
|
61
|
-
|
|
62
|
-
|
|
61
|
+
|
|
62
|
+
|
|
63
63
|
#%% For each dataset, make sure we can map every category to the taxonomy
|
|
64
64
|
|
|
65
65
|
# dataset_name = list(lila_dataset_to_categories.keys())[0]
|
|
66
66
|
for _dataset_name in lila_dataset_to_categories.keys():
|
|
67
|
-
|
|
67
|
+
|
|
68
68
|
if '_bbox' in _dataset_name:
|
|
69
69
|
dataset_name = _dataset_name.replace('_bbox','')
|
|
70
70
|
else:
|
|
71
71
|
dataset_name = _dataset_name
|
|
72
|
-
|
|
72
|
+
|
|
73
73
|
categories = lila_dataset_to_categories[dataset_name]
|
|
74
|
-
|
|
74
|
+
|
|
75
75
|
# c = categories[0]
|
|
76
76
|
for c in categories:
|
|
77
77
|
ds_query = dataset_name + ':' + c['name']
|
|
78
78
|
ds_query = ds_query.lower()
|
|
79
|
-
|
|
79
|
+
|
|
80
80
|
if ds_query not in ds_query_to_scientific_name:
|
|
81
|
-
print('Could not find mapping for {}'.format(ds_query))
|
|
81
|
+
print('Could not find mapping for {}'.format(ds_query))
|
|
82
82
|
else:
|
|
83
83
|
scientific_name = ds_query_to_scientific_name[ds_query]
|
|
@@ -12,8 +12,8 @@ Requires azure-storage-blob>=12.4.0
|
|
|
12
12
|
|
|
13
13
|
import json
|
|
14
14
|
|
|
15
|
-
from typing import Any, Iterable,
|
|
16
|
-
from azure.storage.blob import BlobPrefix, ContainerClient
|
|
15
|
+
from typing import Any, Iterable, Optional, Union
|
|
16
|
+
from azure.storage.blob import BlobPrefix, ContainerClient # type: ignore
|
|
17
17
|
|
|
18
18
|
from megadetector.utils import path_utils
|
|
19
19
|
from megadetector.utils import sas_blob_utils
|
|
@@ -26,20 +26,20 @@ def walk_container(container_client: ContainerClient,
|
|
|
26
26
|
prefix: str = '',
|
|
27
27
|
store_folders: bool = True,
|
|
28
28
|
store_blobs: bool = True,
|
|
29
|
-
debug_max_items: int = -1) ->
|
|
29
|
+
debug_max_items: int = -1) -> tuple[list[str], list[str]]:
|
|
30
30
|
"""
|
|
31
31
|
Recursively walk folders a Azure Blob Storage container.
|
|
32
32
|
|
|
33
33
|
Based on:
|
|
34
34
|
https://github.com/Azure/azure-sdk-for-python/blob/master/sdk/storage/azure-storage-blob/samples/blob_samples_walk_blob_hierarchy.py
|
|
35
35
|
"""
|
|
36
|
-
|
|
36
|
+
|
|
37
37
|
depth = 1
|
|
38
38
|
|
|
39
39
|
def walk_blob_hierarchy(prefix: str,
|
|
40
|
-
folders: Optional[
|
|
41
|
-
blobs: Optional[
|
|
42
|
-
) ->
|
|
40
|
+
folders: Optional[list[str]] = None,
|
|
41
|
+
blobs: Optional[list[str]] = None
|
|
42
|
+
) -> tuple[list[str], list[str]]:
|
|
43
43
|
if folders is None:
|
|
44
44
|
folders = []
|
|
45
45
|
if blobs is None:
|
|
@@ -76,11 +76,11 @@ def walk_container(container_client: ContainerClient,
|
|
|
76
76
|
return folders, blobs
|
|
77
77
|
|
|
78
78
|
|
|
79
|
-
def list_top_level_blob_folders(container_client: ContainerClient) ->
|
|
79
|
+
def list_top_level_blob_folders(container_client: ContainerClient) -> list[str]:
|
|
80
80
|
"""
|
|
81
81
|
List all top-level folders in a container.
|
|
82
82
|
"""
|
|
83
|
-
|
|
83
|
+
|
|
84
84
|
top_level_folders, _ = walk_container(
|
|
85
85
|
container_client, max_depth=1, store_blobs=False)
|
|
86
86
|
return top_level_folders
|
|
@@ -88,13 +88,13 @@ def list_top_level_blob_folders(container_client: ContainerClient) -> List[str]:
|
|
|
88
88
|
|
|
89
89
|
def concatenate_json_lists(input_files: Iterable[str],
|
|
90
90
|
output_file: Optional[str] = None
|
|
91
|
-
) ->
|
|
91
|
+
) -> list[Any]:
|
|
92
92
|
"""
|
|
93
93
|
Given a list of JSON files that contain lists (typically string
|
|
94
94
|
filenames), concatenates the lists into a single list and optionally
|
|
95
95
|
writes out this list to a new output JSON file.
|
|
96
96
|
"""
|
|
97
|
-
|
|
97
|
+
|
|
98
98
|
output_list = []
|
|
99
99
|
for fn in input_files:
|
|
100
100
|
with open(fn, 'r') as f:
|
|
@@ -116,12 +116,12 @@ def upload_file_to_blob(account_name: str,
|
|
|
116
116
|
Uploads a local file to Azure Blob Storage and returns the uploaded
|
|
117
117
|
blob URI with SAS token.
|
|
118
118
|
"""
|
|
119
|
-
|
|
119
|
+
|
|
120
120
|
container_uri = sas_blob_utils.build_azure_storage_uri(
|
|
121
121
|
account=account_name, container=container_name, sas_token=sas_token)
|
|
122
122
|
with open(local_path, 'rb') as data:
|
|
123
123
|
return sas_blob_utils.upload_blob(
|
|
124
|
-
container_uri=container_uri, blob_name=blob_name, data=data,
|
|
124
|
+
container_uri=container_uri, blob_name=blob_name, data=data,
|
|
125
125
|
overwrite=overwrite)
|
|
126
126
|
|
|
127
127
|
|
|
@@ -131,11 +131,11 @@ def enumerate_blobs_to_file(
|
|
|
131
131
|
container_name: str,
|
|
132
132
|
sas_token: Optional[str] = None,
|
|
133
133
|
blob_prefix: Optional[str] = None,
|
|
134
|
-
blob_suffix: Optional[Union[str,
|
|
134
|
+
blob_suffix: Optional[Union[str, tuple[str]]] = None,
|
|
135
135
|
rsearch: Optional[str] = None,
|
|
136
136
|
limit: Optional[int] = None,
|
|
137
137
|
verbose: Optional[bool] = True
|
|
138
|
-
) ->
|
|
138
|
+
) -> list[str]:
|
|
139
139
|
"""
|
|
140
140
|
Enumerates blobs in a container, and writes the blob names to an output
|
|
141
141
|
file.
|
|
@@ -143,7 +143,7 @@ def enumerate_blobs_to_file(
|
|
|
143
143
|
Args:
|
|
144
144
|
output_file: str, path to save list of files in container
|
|
145
145
|
If ends in '.json', writes a JSON string. Otherwise, writes a
|
|
146
|
-
newline-delimited list. Can be None, in which case this is just a
|
|
146
|
+
newline-delimited list. Can be None, in which case this is just a
|
|
147
147
|
convenient wrapper for blob enumeration.
|
|
148
148
|
account_name: str, Azure Storage account name
|
|
149
149
|
container_name: str, Azure Blob Storage container name
|
|
@@ -155,24 +155,24 @@ def enumerate_blobs_to_file(
|
|
|
155
155
|
be lowercased first before comparing with the suffix(es).
|
|
156
156
|
rsearch: optional str, returned results will only contain blob names
|
|
157
157
|
that match this regex. Can also be a list of regexes, in which case
|
|
158
|
-
blobs matching *any* of the regex's will be returned.
|
|
158
|
+
blobs matching *any* of the regex's will be returned.
|
|
159
159
|
limit: int, maximum # of blob names to list
|
|
160
160
|
if None, then returns all blob names
|
|
161
161
|
|
|
162
162
|
Returns: list of str, sorted blob names, of length limit or shorter.
|
|
163
163
|
"""
|
|
164
|
-
|
|
164
|
+
|
|
165
165
|
if sas_token is not None and len(sas_token) > 9 and sas_token[0] == '?':
|
|
166
166
|
sas_token = sas_token[1:]
|
|
167
|
-
|
|
167
|
+
|
|
168
168
|
container_uri = sas_blob_utils.build_azure_storage_uri(
|
|
169
169
|
account=account_name, container=container_name, sas_token=sas_token)
|
|
170
|
-
|
|
170
|
+
|
|
171
171
|
matched_blobs = sas_blob_utils.list_blobs_in_container(
|
|
172
172
|
container_uri=container_uri, blob_prefix=blob_prefix,
|
|
173
173
|
blob_suffix=blob_suffix, rsearch=rsearch, limit=limit, verbose=verbose)
|
|
174
|
-
|
|
174
|
+
|
|
175
175
|
if output_file is not None:
|
|
176
176
|
path_utils.write_list_to_file(output_file, matched_blobs)
|
|
177
|
-
|
|
177
|
+
|
|
178
178
|
return matched_blobs
|