megadetector 5.0.23__py3-none-any.whl → 5.0.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (38) hide show
  1. megadetector/api/synchronous/api_core/animal_detection_api/api_backend.py +2 -3
  2. megadetector/classification/merge_classification_detection_output.py +2 -2
  3. megadetector/data_management/coco_to_labelme.py +2 -1
  4. megadetector/data_management/databases/integrity_check_json_db.py +15 -14
  5. megadetector/data_management/databases/subset_json_db.py +49 -21
  6. megadetector/data_management/mewc_to_md.py +340 -0
  7. megadetector/data_management/wi_to_md.py +41 -0
  8. megadetector/data_management/yolo_output_to_md_output.py +15 -8
  9. megadetector/detection/process_video.py +24 -7
  10. megadetector/detection/pytorch_detector.py +841 -160
  11. megadetector/detection/run_detector.py +340 -146
  12. megadetector/detection/run_detector_batch.py +304 -68
  13. megadetector/detection/run_inference_with_yolov5_val.py +61 -4
  14. megadetector/detection/tf_detector.py +6 -1
  15. megadetector/postprocessing/{combine_api_outputs.py → combine_batch_outputs.py} +10 -13
  16. megadetector/postprocessing/compare_batch_results.py +68 -6
  17. megadetector/postprocessing/md_to_labelme.py +7 -7
  18. megadetector/postprocessing/md_to_wi.py +40 -0
  19. megadetector/postprocessing/merge_detections.py +1 -1
  20. megadetector/postprocessing/postprocess_batch_results.py +10 -3
  21. megadetector/postprocessing/separate_detections_into_folders.py +32 -4
  22. megadetector/postprocessing/validate_batch_results.py +9 -4
  23. megadetector/utils/ct_utils.py +165 -45
  24. megadetector/utils/gpu_test.py +107 -0
  25. megadetector/utils/md_tests.py +355 -108
  26. megadetector/utils/path_utils.py +9 -2
  27. megadetector/utils/wi_utils.py +1794 -0
  28. megadetector/visualization/visualization_utils.py +82 -16
  29. megadetector/visualization/visualize_db.py +25 -7
  30. megadetector/visualization/visualize_detector_output.py +60 -13
  31. {megadetector-5.0.23.dist-info → megadetector-5.0.24.dist-info}/METADATA +10 -24
  32. {megadetector-5.0.23.dist-info → megadetector-5.0.24.dist-info}/RECORD +35 -33
  33. megadetector/detection/detector_training/__init__.py +0 -0
  34. megadetector/detection/detector_training/model_main_tf2.py +0 -114
  35. megadetector/utils/torch_test.py +0 -32
  36. {megadetector-5.0.23.dist-info → megadetector-5.0.24.dist-info}/LICENSE +0 -0
  37. {megadetector-5.0.23.dist-info → megadetector-5.0.24.dist-info}/WHEEL +0 -0
  38. {megadetector-5.0.23.dist-info → megadetector-5.0.24.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1794 @@
1
+ """
2
+
3
+ wi_utils.py
4
+
5
+ Functions related to working with the WI insights platform, specifically for:
6
+
7
+ * Retrieving images based on .csv downloads
8
+ * Pushing results to the ProcessCVResponse() API (requires an API key)
9
+ * Working with WI taxonomy records and geofencing data
10
+
11
+ """
12
+
13
+ #%% Imports and constants
14
+
15
+ import os
16
+ import requests
17
+ import json
18
+
19
+ import numpy as np
20
+ import pandas as pd
21
+
22
+ from copy import deepcopy
23
+ from collections import defaultdict
24
+ from multiprocessing.pool import Pool, ThreadPool
25
+ from functools import partial
26
+ from tqdm import tqdm
27
+
28
+ from megadetector.utils.path_utils import insert_before_extension
29
+ from megadetector.utils.ct_utils import split_list_into_n_chunks
30
+ from megadetector.utils.ct_utils import invert_dictionary
31
+ from megadetector.utils.ct_utils import sort_list_of_dicts_by_key
32
+ from megadetector.utils.path_utils import find_images
33
+ from megadetector.postprocessing.validate_batch_results import \
34
+ validate_batch_results, ValidateBatchResultsOptions
35
+
36
+ md_category_id_to_name = {'1':'animal','2':'person','3':'vehicle'}
37
+ md_category_name_to_id = invert_dictionary(md_category_id_to_name)
38
+
39
+ # Only used when pushing results directly to the platform via the API; any detections we want
40
+ # to show in the UI should have at least this confidence value.
41
+ min_md_output_confidence = 0.25
42
+
43
+ # Fields expected to be present in a valid WI result
44
+ wi_result_fields = ['wi_taxon_id','class','order','family','genus','species','common_name']
45
+
46
+
47
+ #%% Miscellaneous WI support functions
48
+
49
+ def is_valid_prediction_string(s):
50
+ """
51
+ Determine whether [s] is a valid WI prediction string. Prediction strings look like:
52
+
53
+ '90d950db-2106-4bd9-a4c1-777604c3eada;mammalia;rodentia;;;;rodent'
54
+
55
+ Args:
56
+ s (str): the string to be tested for validity
57
+
58
+ Returns:
59
+ bool: True if this looks more or less like a WI prediction string
60
+ """
61
+
62
+ return isinstance(s,str) and (len(s.split(';')) == 7) and (s == s.lower())
63
+
64
+
65
+ def wi_result_to_prediction_string(r):
66
+ """
67
+ Convert the dict [r] - typically loaded from a row in a downloaded .csv file - to
68
+ a valid prediction string, e.g.:
69
+
70
+ 1f689929-883d-4dae-958c-3d57ab5b6c16;;;;;;animal
71
+ 90d950db-2106-4bd9-a4c1-777604c3eada;mammalia;rodentia;;;;rodent
72
+
73
+ Args:
74
+ r (dict): dict containing WI prediction information, with at least the fields
75
+ specified in wi_result_fields.
76
+
77
+ Returns:
78
+ str: the result in [r], as a semicolon-delimited prediction string
79
+ """
80
+
81
+ values = []
82
+ for field in wi_result_fields:
83
+ if isinstance(r[field],str):
84
+ values.append(r[field].lower())
85
+ else:
86
+ assert isinstance(r[field],float) and np.isnan(r[field])
87
+ values.append('')
88
+ s = ';'.join(values)
89
+ assert is_valid_prediction_string(s)
90
+ return s
91
+
92
+
93
+ def compare_values(v0,v1):
94
+ """
95
+ Utility function for comparing two values when we want to return True if both
96
+ values are NaN.
97
+
98
+ Args:
99
+ v0 (object): the first value to compare
100
+ v1 (object): the second value to compare
101
+
102
+ Returns:
103
+ bool: True if v0 == v1, or if both v0 and v1 are NaN
104
+ """
105
+
106
+ if isinstance(v0,float) and isinstance(v1,float) and np.isnan(v0) and np.isnan(v1):
107
+ return True
108
+ return v0 == v1
109
+
110
+
111
+ def record_is_unidentified(record):
112
+ """
113
+ A record is considered "unidentified" if the "identified by" field is either NaN or "computer vision"
114
+
115
+ Args:
116
+ record (dict): dict representing a WI result loaded from a .csv file, with at least the
117
+ field "identified_by"
118
+
119
+ Returns:
120
+ bool: True if the "identified_by" field is either NaN or a string indicating that this
121
+ record has not yet been human-reviewed.
122
+ """
123
+
124
+ identified_by = record['identified_by']
125
+ assert isinstance(identified_by,float) or isinstance(identified_by,str)
126
+ if isinstance(identified_by,float):
127
+ assert np.isnan(identified_by)
128
+ return True
129
+ else:
130
+ return identified_by == 'Computer vision'
131
+
132
+
133
+ def record_lists_are_identical(records_0,records_1,verbose=False):
134
+ """
135
+ Takes two lists of records in the form returned by read_images_from_download_bundle and
136
+ determines whether they are the same.
137
+
138
+ Args:
139
+ records_0 (list of dict): the first list of records to compare
140
+ records_1 (list of dict): the second list of records to compare
141
+ verbose (bool, optional): enable additional debug output
142
+
143
+ Returns:
144
+ bool: True if the two lists are identical
145
+ """
146
+
147
+ if len(records_0) != len(records_1):
148
+ return False
149
+
150
+ # i_record = 0; record_0 = records_0[i_record]
151
+ for i_record,record_0 in enumerate(records_0):
152
+ record_1 = records_1[i_record]
153
+ assert set(record_0.keys()) == set(record_1.keys())
154
+ for k in record_0.keys():
155
+ if not compare_values(record_0[k],record_1[k]):
156
+ if verbose:
157
+ print('Image ID: {} ({})\nRecord 0/{}: {}\nRecord 1/{}: {}'.format(
158
+ record_0['image_id'],record_1['image_id'],
159
+ k,record_0[k],k,record_1[k]))
160
+ return False
161
+
162
+ return True
163
+
164
+
165
+ #%% Functions for managing WI downloads
166
+
167
+ def read_sequences_from_download_bundle(download_folder):
168
+ """
169
+ Reads sequences.csv from [download_folder], returning a list of dicts. This is a
170
+ thin wrapper around pd.read_csv, it's just here for future-proofing.
171
+
172
+ Args:
173
+ download_folder (str): a folder containing exactly one file called sequences.csv, typically
174
+ representing a Wildlife Insights download bundle.
175
+
176
+ Returns:
177
+ list of dict: a direct conversion of the .csv file to a list of dicts
178
+ """
179
+
180
+ print('Reading sequences from {}'.format(download_folder))
181
+
182
+ sequence_list_files = os.listdir(download_folder)
183
+ sequence_list_files = \
184
+ [fn for fn in sequence_list_files if fn == 'sequences.csv']
185
+ assert len(sequence_list_files) == 1, \
186
+ 'Could not find sequences.csv in {}'.format(download_folder)
187
+
188
+ sequence_list_file = os.path.join(download_folder,sequence_list_files[0])
189
+
190
+ df = pd.read_csv(sequence_list_file)
191
+ sequence_records = df.to_dict('records')
192
+ return sequence_records
193
+
194
+
195
+ def read_images_from_download_bundle(download_folder):
196
+ """
197
+ Reads all images.csv files from [download_folder], returns a dict mapping image IDs
198
+ to a list of dicts that describe each image. It's a list of dicts rather than a single dict
199
+ because images may appear more than once.
200
+
201
+ Args:
202
+ download_folder (str): a folder containing one or more images.csv files, typically
203
+ representing a Wildlife Insights download bundle.
204
+
205
+ Returns:
206
+ dict: Maps image GUIDs to dicts with at least the following fields:
207
+ * project_id (int)
208
+ * deployment_id (str)
209
+ * image_id (str, should match the key)
210
+ * filename (str, the filename without path at the time of upload)
211
+ * location (str, starting with gs://)
212
+
213
+ May also contain clasification fields: wi_taxon_id (str), species, etc.
214
+ """
215
+
216
+ print('Reading images from {}'.format(download_folder))
217
+
218
+ ##%% Find lists of images
219
+
220
+ image_list_files = os.listdir(download_folder)
221
+ image_list_files = \
222
+ [fn for fn in image_list_files if fn.startswith('images_') and fn.endswith('.csv')]
223
+ image_list_files = \
224
+ [os.path.join(download_folder,fn) for fn in image_list_files]
225
+ print('Found {} image list files'.format(len(image_list_files)))
226
+
227
+
228
+ ##%% Read lists of images by deployment
229
+
230
+ image_id_to_image_records = defaultdict(list)
231
+
232
+ # image_list_file = image_list_files[0]
233
+ for image_list_file in image_list_files:
234
+
235
+ print('Reading images from list file {}'.format(
236
+ os.path.basename(image_list_file)))
237
+
238
+ df = pd.read_csv(image_list_file)
239
+
240
+ # i_row = 0; row = df.iloc[i_row]
241
+ for i_row,row in tqdm(df.iterrows(),total=len(df)):
242
+
243
+ row_dict = row.to_dict()
244
+ image_id = row_dict['image_id']
245
+ image_id_to_image_records[image_id].append(row_dict)
246
+
247
+ # ...for each image
248
+
249
+ # ...for each list file
250
+
251
+ deployment_ids = set()
252
+ for image_id in image_id_to_image_records:
253
+ image_records = image_id_to_image_records[image_id]
254
+ for image_record in image_records:
255
+ deployment_ids.add(image_record['deployment_id'])
256
+
257
+ print('Found {} rows in {} deployments'.format(
258
+ len(image_id_to_image_records),
259
+ len(deployment_ids)))
260
+
261
+ return image_id_to_image_records
262
+
263
+
264
+ def find_images_in_identify_tab(download_folder_with_identify,download_folder_excluding_identify):
265
+ """
266
+ Based on extracted download packages with and without the "exclude images in 'identify' tab
267
+ checkbox" checked, figure out which images are in the identify tab. Returns a list of dicts (one
268
+ per image).
269
+
270
+ Args:
271
+ download_folder_with_identify (str): the folder containing the download bundle that
272
+ includes images from the "identify" tab
273
+ download_folder_excluding_identify (str): the folder containing the download bundle that
274
+ excludes images from the "identify" tab
275
+
276
+ Returns:
277
+ list of dict: list of image records that are present in the identify tab
278
+ """
279
+
280
+ ##%% Read data (~30 seconds)
281
+
282
+ image_id_to_image_records_with_identify = \
283
+ read_images_from_download_bundle(download_folder_with_identify)
284
+ image_id_to_image_records_excluding_identify = \
285
+ read_images_from_download_bundle(download_folder_excluding_identify)
286
+
287
+
288
+ ##%% Find images that have not been identified
289
+
290
+ all_image_ids_with_identify = set(image_id_to_image_records_with_identify.keys())
291
+ all_image_ids_excluding_identify = set(image_id_to_image_records_excluding_identify.keys())
292
+
293
+ image_ids_in_identify_tab = all_image_ids_with_identify.difference(all_image_ids_excluding_identify)
294
+
295
+ assert len(image_ids_in_identify_tab) == \
296
+ len(all_image_ids_with_identify) - len(all_image_ids_excluding_identify)
297
+
298
+ print('Found {} images with identify, {} in identify tab, {} excluding'.format(
299
+ len(all_image_ids_with_identify),
300
+ len(image_ids_in_identify_tab),
301
+ len(all_image_ids_excluding_identify)))
302
+
303
+ image_records_in_identify_tab = []
304
+ deployment_ids_for_downloaded_images = set()
305
+
306
+ for image_id in image_ids_in_identify_tab:
307
+ image_records_this_image = image_id_to_image_records_with_identify[image_id]
308
+ assert len(image_records_this_image) > 0
309
+ image_records_in_identify_tab.extend(image_records_this_image)
310
+ for image_record in image_records_this_image:
311
+ deployment_ids_for_downloaded_images.add(image_record['deployment_id'])
312
+
313
+ print('Found {} records for {} unique images in {} deployments'.format(
314
+ len(image_records_in_identify_tab),
315
+ len(image_ids_in_identify_tab),
316
+ len(deployment_ids_for_downloaded_images)))
317
+
318
+ return image_records_in_identify_tab
319
+
320
+ # ...def find_images_in_identify_tab(...)
321
+
322
+
323
+ def write_download_commands(image_records_to_download,
324
+ download_dir_base,
325
+ force_download=False,
326
+ n_download_workers=25,
327
+ download_command_file_base=None):
328
+ """
329
+ Given a list of dicts with at least the field 'location' (a gs:// URL), prepare a set of "gcloud
330
+ storage" commands to download images, and write those to a series of .sh scripts, along with one
331
+ .sh script that runs all the others and blocks.
332
+
333
+ gcloud commands will use relative paths.
334
+
335
+ image_records_to_download can also be a dict mapping IDs to lists of records.
336
+
337
+ Args:
338
+ image_records_to_download (list of dict): list of dicts with at least the field 'location'
339
+ download_dir_base (str): local destination folder
340
+ force_download (bool, optional): include gs commands even if the target file exists
341
+ n_download_workers (int, optional): number of scripts to write (that's our hacky way
342
+ of controlling parallelization)
343
+ download_command_file (str, optional): path of the .sh script we should write, defaults
344
+ to "download_wi_images.sh" in the destination folder
345
+ """
346
+
347
+ if isinstance(image_records_to_download,dict):
348
+
349
+ all_image_records = []
350
+ for k in image_records_to_download:
351
+ records_this_image = image_records_to_download[k]
352
+ all_image_records.extend(records_this_image)
353
+ return write_download_commands(all_image_records,
354
+ download_dir_base=download_dir_base,
355
+ force_download=force_download,
356
+ n_download_workers=n_download_workers,
357
+ download_command_file_base=download_command_file_base)
358
+
359
+ ##%% Make list of gcloud storage commands
360
+
361
+ if download_command_file_base is None:
362
+ download_command_file_base = os.path.join(download_dir_base,'download_wi_images.sh')
363
+
364
+ commands = []
365
+ skipped_urls = []
366
+ downloaded_urls = set()
367
+
368
+ # image_record = image_records_to_download[0]
369
+ for image_record in tqdm(image_records_to_download):
370
+
371
+ url = image_record['location']
372
+ if url in downloaded_urls:
373
+ continue
374
+
375
+ assert url.startswith('gs://')
376
+
377
+ relative_path = url.replace('gs://','')
378
+ abs_path = os.path.join(download_dir_base,relative_path)
379
+
380
+ # Skip files that already exist
381
+ if (not force_download) and (os.path.isfile(abs_path)):
382
+ skipped_urls.append(url)
383
+ continue
384
+
385
+ # command = 'gsutil cp "{}" "./{}"'.format(url,relative_path)
386
+ command = 'gcloud storage cp --no-clobber "{}" "./{}"'.format(url,relative_path)
387
+ commands.append(command)
388
+
389
+ print('Generated {} commands for {} image records'.format(
390
+ len(commands),len(image_records_to_download)))
391
+
392
+ print('Skipped {} URLs'.format(len(skipped_urls)))
393
+
394
+
395
+ ##%% Write those commands out to n .sh files
396
+
397
+ commands_by_script = split_list_into_n_chunks(commands,n_download_workers)
398
+
399
+ local_download_commands = []
400
+
401
+ output_dir = os.path.dirname(download_command_file_base)
402
+ os.makedirs(output_dir,exist_ok=True)
403
+
404
+ # Write out the download script for each chunk
405
+ # i_script = 0
406
+ for i_script in range(0,n_download_workers):
407
+ download_command_file = insert_before_extension(download_command_file_base,str(i_script).zfill(2))
408
+ local_download_commands.append(os.path.basename(download_command_file))
409
+ with open(download_command_file,'w',newline='\n') as f:
410
+ for command in commands_by_script[i_script]:
411
+ f.write(command + '\n')
412
+
413
+ # Write out the main download script
414
+ with open(download_command_file_base,'w',newline='\n') as f:
415
+ for local_download_command in local_download_commands:
416
+ f.write('./' + local_download_command + ' &\n')
417
+ f.write('wait\n')
418
+ f.write('echo done\n')
419
+
420
+ # ...def write_download_commands(...)
421
+
422
+
423
+ #%% Functions and constants related to pushing results to the DB
424
+
425
+ # Sample payload for validation
426
+ sample_update_payload = {
427
+
428
+ "predictions": [
429
+ {
430
+ "project_id": "1234",
431
+ "ignore_data_file_checks": True,
432
+ "prediction": "f1856211-cfb7-4a5b-9158-c0f72fd09ee6;;;;;;blank",
433
+ "prediction_score": 0.81218224763870239,
434
+ "classifications": {
435
+ "classes": [
436
+ "f1856211-cfb7-4a5b-9158-c0f72fd09ee6;;;;;;blank",
437
+ "b1352069-a39c-4a84-a949-60044271c0c1;aves;;;;;bird",
438
+ "90d950db-2106-4bd9-a4c1-777604c3eada;mammalia;rodentia;;;;rodent",
439
+ "f2d233e3-80e3-433d-9687-e29ecc7a467a;mammalia;;;;;mammal",
440
+ "ac068717-6079-4aec-a5ab-99e8d14da40b;mammalia;rodentia;sciuridae;dremomys;rufigenis;red-cheeked squirrel"
441
+ ],
442
+ "scores": [
443
+ 0.81218224763870239,
444
+ 0.1096673980355263,
445
+ 0.02707692421972752,
446
+ 0.00771023565903306,
447
+ 0.0049269795417785636
448
+ ]
449
+ },
450
+ "detections": [
451
+ {
452
+ "category": "1",
453
+ "label": "animal",
454
+ "conf": 0.181,
455
+ "bbox": [
456
+ 0.02421,
457
+ 0.35823999999999989,
458
+ 0.051560000000000009,
459
+ 0.070826666666666746
460
+ ]
461
+ }
462
+ ],
463
+ "model_version": "3.1.2",
464
+ "prediction_source": "manual_update",
465
+ "data_file_id": "2ea1d2b2-7f84-43f9-af1f-8be0e69c7015"
466
+ }
467
+ ]
468
+ }
469
+
470
+ blank_prediction_string = 'f1856211-cfb7-4a5b-9158-c0f72fd09ee6;;;;;;blank'
471
+ no_cv_result_prediction_string = 'f2efdae9-efb8-48fb-8a91-eccf79ab4ffb;no cv result;no cv result;no cv result;no cv result;no cv result;no cv result'
472
+ rodent_prediction_string = '90d950db-2106-4bd9-a4c1-777604c3eada;mammalia;rodentia;;;;rodent'
473
+ mammal_prediction_string = 'f2d233e3-80e3-433d-9687-e29ecc7a467a;mammalia;;;;;mammal'
474
+ animal_prediction_string = '1f689929-883d-4dae-958c-3d57ab5b6c16;;;;;;animal'
475
+ human_prediction_string = '990ae9dd-7a59-4344-afcb-1b7b21368000;mammalia;primates;hominidae;homo;sapiens;human'
476
+
477
+ process_cv_response_url = 'https://placeholder'
478
+
479
+
480
+ def prepare_data_update_auth_headers(auth_token_file):
481
+ """
482
+ Read the authorization token from a text file and prepare http headers.
483
+
484
+ Args:
485
+ auth_token_file (str): a single-line text file containing a write-enabled
486
+ API token.
487
+
488
+ Returns:
489
+ dict: http headers, with fields 'Authorization' and 'Content-Type'
490
+ """
491
+
492
+ with open(auth_token_file,'r') as f:
493
+ auth_token = f.read()
494
+
495
+ headers = {
496
+ 'Authorization': 'Bearer ' + auth_token,
497
+ 'Content-Type': 'application/json'
498
+ }
499
+
500
+ return headers
501
+
502
+
503
+ def push_results_for_images(payload,
504
+ headers,
505
+ url=process_cv_response_url,
506
+ verbose=False):
507
+ """
508
+ Push results for one or more images represented in [payload] to the
509
+ process_cv_response API, to write to the WI DB.
510
+
511
+ Args:
512
+ payload (dict): payload to upload to the API
513
+ headers (dict): authorization headers, see prepare_data_update_auth_headers
514
+ url (str, optional): API URL
515
+ verbose (bool, optional): enable additional debug output
516
+
517
+ Return:
518
+ int: response status code
519
+ """
520
+
521
+ if verbose:
522
+ print('Sending header {} to URL {}'.format(
523
+ headers,url))
524
+
525
+ response = requests.post(url, headers=headers, json=payload)
526
+
527
+ # Check the response status code
528
+ if response.status_code in (200,201):
529
+ if verbose:
530
+ print('Successfully pushed results for {} images'.format(len(payload['predictions'])))
531
+ print(response.headers)
532
+ print(str(response))
533
+ else:
534
+ print(f'Error: {response.status_code} {response.text}')
535
+
536
+ return response.status_code
537
+
538
+
539
+ def parallel_push_results_for_images(payloads,
540
+ headers,
541
+ url=process_cv_response_url,
542
+ verbose=False,
543
+ pool_type='thread',
544
+ n_workers=10):
545
+ """
546
+ Push results for the list of payloads in [payloads] to the process_cv_response API,
547
+ parallelized over multiple workers.
548
+
549
+ Args:
550
+ payloads (list of dict): payloads to upload to the API
551
+ headers (dict): authorization headers, see prepare_data_update_auth_headers
552
+ url (str, optional): API URL
553
+ verbose (bool, optional): enable additional debug output
554
+ pool_type (str, optional): 'thread' or 'process'
555
+ n_workers (int, optional): number of parallel workers
556
+
557
+ Returns:
558
+ list of int: list of http response codes, one per payload
559
+ """
560
+
561
+ if n_workers == 1:
562
+
563
+ results = []
564
+ for payload in payloads:
565
+ results.append(push_results_for_images(payload,
566
+ headers=headers,
567
+ url=url,
568
+ verbose=verbose))
569
+ return results
570
+
571
+ else:
572
+
573
+ assert pool_type in ('thread','process')
574
+
575
+ if pool_type == 'thread':
576
+ pool_string = 'thread'
577
+ pool = ThreadPool(n_workers)
578
+ else:
579
+ pool_string = 'process'
580
+ pool = Pool(n_workers)
581
+
582
+ print('Created a {} pool of {} workers'.format(
583
+ pool_string,n_workers))
584
+
585
+ results = list(tqdm(pool.imap(
586
+ partial(push_results_for_images,headers=headers,url=url,verbose=verbose),payloads),
587
+ total=len(payloads)))
588
+
589
+ assert len(results) == len(payloads)
590
+ return results
591
+
592
+
593
+ def generate_payload_with_replacement_detections(wi_result,
594
+ detections,
595
+ prediction_score=0.9,
596
+ model_version='3.1.2',
597
+ prediction_source='manual_update'):
598
+ """
599
+ Generate a payload for a single image that keeps the classifications from
600
+ [wi_result], but replaces the detections with the MD-formatted list [detections].
601
+
602
+ Args:
603
+ wi_result (dict): dict representing a WI prediction result, with at least the
604
+ fields in the constant wi_result_fields
605
+ detections (list): list of WI-formatted detection dicts (with fields ['conf'] and ['category'])
606
+ prediction_score (float, optional): confidence value to use for the combined prediction
607
+ model_version (str, optional): model version string to include in the payload
608
+ prediction_source (str, optional): prediction source string to include in the payload
609
+
610
+ Returns:
611
+ dict: dictionary suitable for uploading via push_results_for_images
612
+ """
613
+
614
+ payload_detections = []
615
+
616
+ # detection = detections[0]
617
+ for detection in detections:
618
+ detection_out = detection.copy()
619
+ detection_out['label'] = md_category_id_to_name[detection['category']]
620
+ if detection_out['conf'] < min_md_output_confidence:
621
+ detection_out['conf'] = min_md_output_confidence
622
+ payload_detections.append(detection_out)
623
+
624
+ prediction_string = wi_result_to_prediction_string(wi_result)
625
+
626
+ prediction = {}
627
+ prediction['ignore_data_file_checks'] = True
628
+ prediction['prediction'] = prediction_string
629
+ prediction['prediction_score'] = prediction_score
630
+
631
+ classifications = {}
632
+ classifications['classes'] = [prediction_string]
633
+ classifications['scores'] = [prediction_score]
634
+
635
+ prediction['classifications'] = classifications
636
+ prediction['detections'] = payload_detections
637
+ prediction['model_version'] = model_version
638
+ prediction['prediction_source'] = prediction_source
639
+ prediction['data_file_id'] = wi_result['image_id']
640
+ prediction['project_id'] = str(wi_result['project_id'])
641
+ payload = {}
642
+ payload['predictions'] = [prediction]
643
+
644
+ return payload
645
+
646
+
647
+ def generate_blank_prediction_payload(data_file_id,
648
+ project_id,
649
+ blank_confidence=0.9,
650
+ model_version='3.1.2',
651
+ prediction_source='manual_update'):
652
+ """
653
+ Generate a payload that will set a single image to the blank classification, with
654
+ no detections. Suitable for upload via push_results_for_images.
655
+
656
+ Args:
657
+ data_file_id (str): unique identifier for this image used in the WI DB
658
+ project_id (int): WI project ID
659
+ blank_confidence (float, optional): confidence value to associate with this
660
+ prediction
661
+ model_version (str, optional): model version string to include in the payload
662
+ prediction_source (str, optional): prediction source string to include in the payload
663
+
664
+ Returns:
665
+ dict: dictionary suitable for uploading via push_results_for_images
666
+ """
667
+
668
+ prediction = {}
669
+ prediction['ignore_data_file_checks'] = True
670
+ prediction['prediction'] = blank_prediction_string
671
+ prediction['prediction_score'] = blank_confidence
672
+ prediction['classifications'] = {}
673
+ prediction['classifications']['classes'] = [blank_prediction_string]
674
+ prediction['classifications']['scores'] = [blank_confidence]
675
+ prediction['detections'] = []
676
+ prediction['model_version'] = model_version
677
+ prediction['prediction_source'] = prediction_source
678
+ prediction['data_file_id'] = data_file_id
679
+ prediction['project_id'] = project_id
680
+ payload = {}
681
+ payload['predictions'] = [prediction]
682
+
683
+ return payload
684
+
685
+
686
+ def generate_no_cv_result_payload(data_file_id,
687
+ project_id,
688
+ no_cv_confidence=0.9,
689
+ model_version='3.1.2',
690
+ prediction_source='manual_update'):
691
+ """
692
+ Generate a payload that will set a single image to the blank classification, with
693
+ no detections. Suitable for uploading via push_results_for_images.
694
+
695
+ Args:
696
+ data_file_id (str): unique identifier for this image used in the WI DB
697
+ project_id (int): WI project ID
698
+ no_cv_confidence (float, optional): confidence value to associate with this
699
+ prediction
700
+ model_version (str, optional): model version string to include in the payload
701
+ prediction_source (str, optional): prediction source string to include in the payload
702
+
703
+ Returns:
704
+ dict: dictionary suitable for uploading via push_results_for_images
705
+ """
706
+
707
+ prediction = {}
708
+ prediction['ignore_data_file_checks'] = True
709
+ prediction['prediction'] = no_cv_result_prediction_string
710
+ prediction['prediction_score'] = no_cv_confidence
711
+ prediction['classifications'] = {}
712
+ prediction['classifications']['classes'] = [no_cv_result_prediction_string]
713
+ prediction['classifications']['scores'] = [no_cv_confidence]
714
+ prediction['detections'] = []
715
+ prediction['model_version'] = model_version
716
+ prediction['prediction_source'] = prediction_source
717
+ prediction['data_file_id'] = data_file_id
718
+ prediction['project_id'] = project_id
719
+ payload = {}
720
+ payload['predictions'] = [prediction]
721
+
722
+ return payload
723
+
724
+
725
+ def generate_payload_for_prediction_string(data_file_id,
726
+ project_id,
727
+ prediction_string,
728
+ prediction_confidence=0.8,
729
+ detections=None,
730
+ model_version='3.1.2',
731
+ prediction_source='manual_update'):
732
+ """
733
+ Generate a payload that will set a single image to a particular prediction, optionally
734
+ including detections. Suitable for uploading via push_results_for_images.
735
+
736
+ Args:
737
+ data_file_id (str): unique identifier for this image used in the WI DB
738
+ project_id (int): WI project ID
739
+ prediction_string (str): WI-formatted prediction string to include in the payload
740
+ prediction_confidence (float, optional): confidence value to associate with this
741
+ prediction
742
+ detections (list, optional): list of MD-formatted detection dicts, with fields
743
+ ['category'] and 'conf'
744
+ model_version (str, optional): model version string to include in the payload
745
+ prediction_source (str, optional): prediction source string to include in the payload
746
+
747
+
748
+ Returns:
749
+ dict: dictionary suitable for uploading via push_results_for_images
750
+ """
751
+
752
+ assert is_valid_prediction_string(prediction_string), \
753
+ 'Invalid prediction string: {}'.format(prediction_string)
754
+
755
+ payload_detections = []
756
+
757
+ if detections is not None:
758
+ # detection = detections[0]
759
+ for detection in detections:
760
+ detection_out = detection.copy()
761
+ detection_out['label'] = md_category_id_to_name[detection['category']]
762
+ if detection_out['conf'] < min_md_output_confidence:
763
+ detection_out['conf'] = min_md_output_confidence
764
+ payload_detections.append(detection_out)
765
+
766
+ prediction = {}
767
+ prediction['ignore_data_file_checks'] = True
768
+ prediction['prediction'] = prediction_string
769
+ prediction['prediction_score'] = prediction_confidence
770
+ prediction['classifications'] = {}
771
+ prediction['classifications']['classes'] = [prediction_string]
772
+ prediction['classifications']['scores'] = [prediction_confidence]
773
+ prediction['detections'] = payload_detections
774
+ prediction['model_version'] = model_version
775
+ prediction['prediction_source'] = prediction_source
776
+ prediction['data_file_id'] = data_file_id
777
+ prediction['project_id'] = project_id
778
+
779
+ payload = {}
780
+ payload['predictions'] = [prediction]
781
+
782
+ return payload
783
+
784
+
785
+ def validate_payload(payload):
786
+ """
787
+ Verifies that the dict [payload] is compatible with the ProcessCVResponse() API. Throws an
788
+ error if [payload] is invalid.
789
+
790
+ Args:
791
+ payload (dict): payload in the format expected by push_results_for_images.
792
+
793
+ Returns:
794
+ bool: successful validation; this is just future-proofing, currently never returns False
795
+ """
796
+
797
+ assert isinstance(payload,dict)
798
+ assert len(payload.keys()) == 1 and 'predictions' in payload
799
+
800
+ # prediction = payload['predictions'][0]
801
+ for prediction in payload['predictions']:
802
+
803
+ assert 'project_id' in prediction
804
+ if not isinstance(prediction['project_id'],int):
805
+ _ = int(prediction['project_id'])
806
+ assert 'ignore_data_file_checks' in prediction and \
807
+ isinstance(prediction['ignore_data_file_checks'],bool)
808
+ assert 'prediction' in prediction and \
809
+ isinstance(prediction['prediction'],str) and \
810
+ len(prediction['prediction'].split(';')) == 7
811
+ assert 'prediction_score' in prediction and \
812
+ isinstance(prediction['prediction_score'],float)
813
+ assert 'model_version' in prediction and \
814
+ isinstance(prediction['model_version'],str)
815
+ assert 'data_file_id' in prediction and \
816
+ isinstance(prediction['data_file_id'],str) and \
817
+ len(prediction['data_file_id']) == 36
818
+ assert 'classifications' in prediction and \
819
+ isinstance(prediction['classifications'],dict)
820
+ classifications = prediction['classifications']
821
+ assert 'classes' in classifications and isinstance(classifications['classes'],list)
822
+ assert 'scores' in classifications and isinstance(classifications['scores'],list)
823
+ assert len(classifications['classes']) == len(classifications['scores'])
824
+ for c in classifications['classes']:
825
+ assert is_valid_prediction_string(c)
826
+ for score in classifications['scores']:
827
+ assert isinstance(score,float) and score >= 0 and score <= 1.0
828
+ assert 'detections' in prediction and isinstance(prediction['detections'],list)
829
+
830
+ for detection in prediction['detections']:
831
+
832
+ assert isinstance(detection,dict)
833
+ assert 'category' in detection and detection['category'] in ('1','2','3')
834
+ assert 'label' in detection and detection['label'] in ('animal','person','vehicle')
835
+ assert 'conf' in detection and \
836
+ isinstance(detection['conf'],float) and \
837
+ detection['conf'] >= 0 and detection['conf'] <= 1.0
838
+ assert 'bbox' in detection and \
839
+ isinstance(detection['bbox'],list) and \
840
+ len(detection['bbox']) == 4
841
+
842
+ # ...for each detection
843
+
844
+ # ...for each prediction
845
+
846
+ return True
847
+
848
+ # ...def validate_payload(...)
849
+
850
+
851
+ #%% Validate constants
852
+
853
+ # This is executed at the time this module gets imported.
854
+
855
+ blank_payload = generate_blank_prediction_payload('70ede9c6-d056-4dd1-9a0b-3098d8113e0e','1234')
856
+ validate_payload(sample_update_payload)
857
+ validate_payload(blank_payload)
858
+
859
+
860
+ #%% Functions and constants related to working with batch predictions
861
+
862
+ def get_kingdom(prediction_string):
863
+ """
864
+ Return the kingdom field from a WI prediction string
865
+
866
+ Args:
867
+ prediction_string (str): a string in the semicolon-delimited prediction string format
868
+
869
+ Returns:
870
+ str: the kingdom field from the input string
871
+ """
872
+ tokens = prediction_string.split(';')
873
+ return tokens[1]
874
+
875
+
876
+ def is_human_classification(prediction_string):
877
+ """
878
+ Determines whether the input string represents a human classification, which includes a variety
879
+ of common names (hiker, person, etc.)
880
+
881
+ Args:
882
+ prediction_string (str): a string in the semicolon-delimited prediction string format
883
+
884
+ Returns:
885
+ bool: whether this string corresponds to a human category
886
+ """
887
+ return prediction_string == human_prediction_string or 'homo;sapiens' in prediction_string
888
+
889
+
890
+ def is_animal_classification(prediction_string):
891
+ """
892
+ Determines whether the input string represents an animal classification, which excludes, e.g.,
893
+ humans, blanks, vehicles, unknowns
894
+
895
+ Args:
896
+ prediction_string (str): a string in the semicolon-delimited prediction string format
897
+
898
+ Returns:
899
+ bool: whether this string corresponds to an animal category
900
+ """
901
+
902
+ if prediction_string == animal_prediction_string:
903
+ return True
904
+ if prediction_string == human_prediction_string or 'homo;sapiens' in prediction_string:
905
+ return False
906
+ if prediction_string == blank_prediction_string:
907
+ return False
908
+ if prediction_string == no_cv_result_prediction_string:
909
+ return False
910
+ if len(get_kingdom(prediction_string)) == 0:
911
+ return False
912
+ return True
913
+
914
+
915
+ def generate_md_results_from_predictions_json(predictions_json_file,md_results_file,base_folder=None):
916
+ """
917
+ Generate an MD-formatted .json file from a predictions.json file. Typically,
918
+ MD results files use relative paths, and predictions.json files use absolute paths, so
919
+ this function optionally removes the leading string [base_folder] from all file names.
920
+
921
+ Currently just applies the top classification category to every detection. If the top classification
922
+ is "blank", writes an empty detection list.
923
+
924
+ wi_to_md.py is a command-line driver for this function.
925
+
926
+ Args:
927
+ predictions_json_file (str): path to a predictions.json file
928
+ md_results_file (str): path to which we should write an MD-formatted .json file
929
+ base_folder (str, optional): leading string to remove from each path in the predictions.json file
930
+ """
931
+
932
+ # Read predictions file
933
+ with open(predictions_json_file,'r') as f:
934
+ predictions = json.load(f)
935
+ predictions = predictions['predictions']
936
+ assert isinstance(predictions,list)
937
+
938
+ from megadetector.utils.ct_utils import is_list_sorted
939
+
940
+ detection_category_id_to_name = {}
941
+ classification_category_name_to_id = {}
942
+
943
+ # Keep track of detections that don't have an assigned detection category; these
944
+ # are fake detections we create for non-blank images with non-empty detection lists.
945
+ # We need to go back later and give them a legitimate detection category ID.
946
+ all_unknown_detections = []
947
+
948
+ # Create the output images list
949
+ images_out = []
950
+
951
+ # im_in = predictions[0]
952
+ for im_in in predictions:
953
+
954
+ # blank_prediction_string
955
+ im_out = {}
956
+
957
+ fn = im_in['filepath']
958
+ if base_folder is not None:
959
+ if fn.startswith(base_folder):
960
+ fn = fn.replace(base_folder,'',1)
961
+
962
+ im_out['file'] = fn
963
+
964
+ if 'failures' in im_in:
965
+
966
+ im_out['failure'] = str(im_in['failures'])
967
+ im_out['detections'] = None
968
+
969
+ else:
970
+
971
+ im_out['detections'] = []
972
+
973
+ if 'detections' in im_in:
974
+
975
+ if len(im_in['detections']) == 0:
976
+ im_out['detections'] = []
977
+ else:
978
+ # det_in = im_in['detections'][0]
979
+ for det_in in im_in['detections']:
980
+ det_out = {}
981
+ if det_in['category'] in detection_category_id_to_name:
982
+ assert detection_category_id_to_name[det_in['category']] == det_in['label']
983
+ else:
984
+ detection_category_id_to_name[det_in['category']] = det_in['label']
985
+ det_out = {}
986
+ for s in ['category','conf','bbox']:
987
+ det_out[s] = det_in[s]
988
+ im_out['detections'].append(det_out)
989
+
990
+ # ...if detections are present
991
+
992
+ class_to_assign = None
993
+ class_confidence = None
994
+
995
+ if 'classifications' in im_in:
996
+
997
+ classifications = im_in['classifications']
998
+ assert len(classifications['scores']) == len(classifications['classes'])
999
+ assert is_list_sorted(classifications['scores'],reverse=True)
1000
+ class_to_assign = classifications['classes'][0]
1001
+ class_confidence = classifications['scores'][0]
1002
+
1003
+ if 'prediction' in im_in:
1004
+
1005
+ class_to_assign = im_in['prediction']
1006
+ class_confidence = im_in['prediction_score']
1007
+
1008
+ if class_to_assign is not None:
1009
+
1010
+ if class_to_assign == blank_prediction_string:
1011
+
1012
+ # This is a scenario that's not captured well by the MD format: a blank prediction
1013
+ # with detections present. But, for now, don't do anything special here, just making
1014
+ # a note of this.
1015
+ if len(im_out['detections']) > 0:
1016
+ pass
1017
+
1018
+ else:
1019
+
1020
+ assert not class_to_assign.endswith('blank')
1021
+
1022
+ # This is a scenario that's not captured well by the MD format: no detections present,
1023
+ # but a non-blank prediction. For now, create a fake detection to handle this prediction.
1024
+ if len(im_out['detections']) == 0:
1025
+
1026
+ print('Warning: creating fake detection for non-blank whole-image classification')
1027
+ det_out = {}
1028
+ all_unknown_detections.append(det_out)
1029
+
1030
+ # We will change this to a string-int later
1031
+ det_out['category'] = 'unknown'
1032
+ det_out['conf'] = class_confidence
1033
+ det_out['bbox'] = [0,0,1,1]
1034
+ im_out['detections'].append(det_out)
1035
+
1036
+ # ...if this is/isn't a blank classification
1037
+
1038
+ # Attach that classification to each detection
1039
+
1040
+ # Create a new category ID if necessary
1041
+ if class_to_assign in classification_category_name_to_id:
1042
+ classification_category_id = classification_category_name_to_id[class_to_assign]
1043
+ else:
1044
+ classification_category_id = str(len(classification_category_name_to_id))
1045
+ classification_category_name_to_id[class_to_assign] = classification_category_id
1046
+
1047
+ for det in im_out['detections']:
1048
+ det['classifications'] = []
1049
+ det['classifications'].append([classification_category_id,class_confidence])
1050
+
1051
+ # ...if we have some type of classification for this image
1052
+
1053
+ # ...if this is/isn't a failure
1054
+
1055
+ images_out.append(im_out)
1056
+
1057
+ # ...for each image
1058
+
1059
+ # Fix the 'unknown' category
1060
+
1061
+ if len(all_unknown_detections) > 0:
1062
+
1063
+ max_detection_category_id = max([int(x) for x in detection_category_id_to_name.keys()])
1064
+ unknown_category_id = str(max_detection_category_id + 1)
1065
+ detection_category_id_to_name[unknown_category_id] = 'unknown'
1066
+
1067
+ for det in all_unknown_detections:
1068
+ assert det['category'] == 'unknown'
1069
+ det['category'] = unknown_category_id
1070
+
1071
+
1072
+ # Sort by filename
1073
+
1074
+ images_out = sort_list_of_dicts_by_key(images_out,'file')
1075
+
1076
+ # Prepare friendly classification names
1077
+
1078
+ classification_category_descriptions = invert_dictionary(classification_category_name_to_id)
1079
+ classification_categories_out = {}
1080
+ for category_id in classification_category_descriptions.keys():
1081
+ category_name = classification_category_descriptions[category_id].split(';')[-1]
1082
+ classification_categories_out[category_id] = category_name
1083
+
1084
+ # Prepare the output dict
1085
+
1086
+ detection_categories_out = detection_category_id_to_name
1087
+ info = {}
1088
+ info['format_version'] = 1.4
1089
+ info['detector'] = 'converted_from_predictions_json'
1090
+
1091
+ output_dict = {}
1092
+ output_dict['info'] = info
1093
+ output_dict['detection_categories'] = detection_categories_out
1094
+ output_dict['classification_categories'] = classification_categories_out
1095
+ output_dict['classification_category_descriptions'] = classification_category_descriptions
1096
+ output_dict['images'] = images_out
1097
+
1098
+ with open(md_results_file,'w') as f:
1099
+ json.dump(output_dict,f,indent=1)
1100
+
1101
+ validation_options = ValidateBatchResultsOptions()
1102
+ validation_options.raise_errors = True
1103
+ _ = validate_batch_results(md_results_file, options=validation_options)
1104
+
1105
+ # ...def generate_md_results_from_predictions_json(...)
1106
+
1107
+
1108
+ def generate_predictions_json_from_md_results(md_results_file,predictions_json_file,base_folder=None):
1109
+ """
1110
+ Generate a predictions.json file from the MD-formatted .json file [md_results_file]. Typically,
1111
+ MD results files use relative paths, and predictions.json files use absolute paths, so
1112
+ this function optionally prepends [base_folder]. Does not handle classification results in
1113
+ MD format, since this is intended to prepare data for passing through the WI classifier.
1114
+
1115
+ md_to_wi.py is a command-line driver for this function.
1116
+
1117
+ Args:
1118
+ md_results_file (str): path to an MD-formatted .json file
1119
+ predictions_json_file (str): path to which we should write a predictions.json file
1120
+ base_folder (str, optional): folder name to prepend to each path in md_results_file,
1121
+ to convert relative paths to absolute paths.
1122
+ """
1123
+
1124
+ # Validate the input file
1125
+ validation_options = ValidateBatchResultsOptions()
1126
+ validation_options.raise_errors = True
1127
+ validation_options.return_data = True
1128
+ md_results = validate_batch_results(md_results_file, options=validation_options)
1129
+ category_id_to_name = md_results['detection_categories']
1130
+
1131
+ output_dict = {}
1132
+ output_dict['predictions'] = []
1133
+
1134
+ # im = md_results['images'][0]
1135
+ for im in md_results['images']:
1136
+
1137
+ prediction = {}
1138
+ fn = im['file']
1139
+ if base_folder is not None:
1140
+ fn = os.path.join(base_folder,fn)
1141
+ fn = fn.replace('\\','/')
1142
+ prediction['filepath'] = fn
1143
+ if 'failure' in im and im['failure'] is not None:
1144
+ prediction['failures'] = ['DETECTOR']
1145
+ else:
1146
+ assert 'detections' in im and im['detections'] is not None
1147
+ detections = []
1148
+ for det in im['detections']:
1149
+ output_det = deepcopy(det)
1150
+ output_det['label'] = category_id_to_name[det['category']]
1151
+ detections.append(output_det)
1152
+
1153
+ # detections *must* be sorted in descending order by confidence
1154
+ detections = sort_list_of_dicts_by_key(detections,'conf', reverse=True)
1155
+ prediction['detections'] = detections
1156
+
1157
+ assert len(prediction.keys()) >= 2
1158
+ output_dict['predictions'].append(prediction)
1159
+
1160
+ # ...for each image
1161
+
1162
+ os.makedirs(os.path.dirname(predictions_json_file),exist_ok=True)
1163
+ with open(predictions_json_file,'w') as f:
1164
+ json.dump(output_dict,f,indent=1)
1165
+
1166
+ # ...def generate_predictions_json_from_md_results(...)
1167
+
1168
+
1169
+ def generate_instances_json_from_folder(folder,
1170
+ country=None,
1171
+ lat=None,
1172
+ lon=None,
1173
+ output_file=None,
1174
+ filename_replacements=None):
1175
+ """
1176
+ Generate an instances.json record that contains all images in [folder], optionally
1177
+ including location information, in a format suitable for run_model.py. Optionally writes
1178
+ the results to [output_file].
1179
+
1180
+ Args:
1181
+ folder (str): the folder to recursively search for images
1182
+ country (str, optional): a three-letter country code
1183
+ lat (float, optional): latitude to associate with all images
1184
+ lon (float, optional): longitude to associate with all images
1185
+ output_file (str, optional): .json file to which we should write instance records
1186
+ filename_replacements (dict, optional): str --> str dict indicating filename substrings
1187
+ that should be replaced with other strings. Replacement occurs *after* converting
1188
+ backslashes to forward slashes.
1189
+
1190
+ Returns:
1191
+ dict: dict with at least the field "instances"
1192
+ """
1193
+
1194
+ assert os.path.isdir(folder)
1195
+
1196
+ image_files_abs = find_images(folder,recursive=True,return_relative_paths=False)
1197
+
1198
+ instances = []
1199
+
1200
+ # image_fn_abs = image_files_abs[0]
1201
+ for image_fn_abs in image_files_abs:
1202
+ instance = {}
1203
+ instance['filepath'] = image_fn_abs.replace('\\','/')
1204
+ if filename_replacements is not None:
1205
+ for s in filename_replacements:
1206
+ instance['filepath'] = instance['filepath'].replace(s,filename_replacements[s])
1207
+ if country is not None:
1208
+ instance['country'] = country
1209
+ if lat is not None:
1210
+ assert lon is not None, 'Latitude provided without longitude'
1211
+ instance['latitude'] = lat
1212
+ if lon is not None:
1213
+ assert lat is not None, 'Longitude provided without latitude'
1214
+ instance['longitude'] = lon
1215
+ instances.append(instance)
1216
+
1217
+ to_return = {'instances':instances}
1218
+
1219
+ if output_file is not None:
1220
+ os.makedirs(os.path.dirname(output_file),exist_ok=True)
1221
+ with open(output_file,'w') as f:
1222
+ json.dump(to_return,f,indent=1)
1223
+
1224
+ return to_return
1225
+
1226
+ # ...def generate_instances_json_from_folder(...)
1227
+
1228
+
1229
+ #%% Functions related to geofencing and taxonomy mapping
1230
+
1231
+ # This maps a taxonomy string (e.g. mammalia;cetartiodactyla;cervidae;odocoileus;virginianus) to
1232
+ # a dict with keys taxon_id, common_name, kingdom, phylum, class, order, family, genus, species
1233
+ taxonomy_string_to_taxonomy_info = None
1234
+ binomial_name_to_taxonomy_info = None
1235
+ common_name_to_taxonomy_info = None
1236
+
1237
+ def taxonomy_info_to_taxonomy_string(taxonomy_info):
1238
+ """
1239
+ Convert a taxonomy record in dict format to a semicolon-delimited string
1240
+
1241
+ Args:
1242
+ taxonomy_info (dict): dict in the format stored in, e.g., taxonomy_string_to_taxonomy_info
1243
+
1244
+ Returns:
1245
+ str: string in the format used as keys in, e.g., taxonomy_string_to_taxonomy_info
1246
+ """
1247
+ return taxonomy_info['class'] + ';' + \
1248
+ taxonomy_info['order'] + ';' + \
1249
+ taxonomy_info['family'] + ';' + \
1250
+ taxonomy_info['genus'] + ';' + \
1251
+ taxonomy_info['species']
1252
+
1253
+
1254
+ def initialize_taxonomy_info(taxonomy_file,force_init=False,encoding='cp1252'):
1255
+ """
1256
+ Load WI taxonomy information from a .json file. Stores information in the global
1257
+ dicts [taxonomy_string_to_taxonomy_info], [binomial_name_to_taxonomy_info], and
1258
+ [common_name_to_taxonomy_info].
1259
+
1260
+ Args:
1261
+ taxonomy_file (str): .json file containing WI taxonomy information
1262
+ force_init (bool, optional): if the output dicts already exist, should we
1263
+ re-initialize anyway?
1264
+ encoding (str, optional): character encoding to use when opening the .json file
1265
+ """
1266
+
1267
+ global taxonomy_string_to_taxonomy_info
1268
+ global binomial_name_to_taxonomy_info
1269
+ global common_name_to_taxonomy_info
1270
+
1271
+ if (taxonomy_string_to_taxonomy_info is not None) and (not force_init):
1272
+ return
1273
+
1274
+ """
1275
+ Taxonomy keys are taxonomy strings, e.g.:
1276
+
1277
+ 'mammalia;cetartiodactyla;cervidae;odocoileus;virginianus'
1278
+
1279
+ Taxonomy values are extended strings w/Taxon IDs and common names, e.g.:
1280
+
1281
+ '5c7ce479-8a45-40b3-ae21-7c97dfae22f5;mammalia;cetartiodactyla;cervidae;odocoileus;virginianus;white-tailed deer'
1282
+ """
1283
+
1284
+ with open(taxonomy_file,encoding=encoding,errors='ignore') as f:
1285
+ taxonomy_table = json.load(f,strict=False)
1286
+
1287
+ # Right now I'm punting on some unusual-character issues, but here is some scrap that
1288
+ # might help address this in the future
1289
+ if False:
1290
+ import codecs
1291
+ with codecs.open(taxonomy_file,'r',encoding=encoding,errors='ignore') as f:
1292
+ s = f.read()
1293
+ import unicodedata
1294
+ s = unicodedata.normalize('NFKD', s).encode('ascii', 'ignore')
1295
+ taxonomy_table = json.loads(s,strict=False)
1296
+
1297
+ taxonomy_string_to_taxonomy_info = {}
1298
+ binomial_name_to_taxonomy_info = {}
1299
+ common_name_to_taxonomy_info = {}
1300
+
1301
+ # taxonomy_string = next(iter(taxonomy_table.keys()))
1302
+ for taxonomy_string in taxonomy_table.keys():
1303
+
1304
+ taxonomy_string = taxonomy_string.lower()
1305
+
1306
+ taxon_info = {}
1307
+ extended_string = taxonomy_table[taxonomy_string]
1308
+ tokens = extended_string.split(';')
1309
+ assert len(tokens) == 7
1310
+ taxon_info['taxon_id'] = tokens[0]
1311
+ assert len(taxon_info['taxon_id']) == 36
1312
+ taxon_info['kingdom'] = 'animal'
1313
+ taxon_info['phylum'] = 'chordata'
1314
+ taxon_info['class'] = tokens[1]
1315
+ taxon_info['order'] = tokens[2]
1316
+ taxon_info['family'] = tokens[3]
1317
+ taxon_info['genus'] = tokens[4]
1318
+ taxon_info['species'] = tokens[5]
1319
+ taxon_info['common_name'] = tokens[6]
1320
+
1321
+ if taxon_info['common_name'] != '':
1322
+ common_name_to_taxonomy_info[taxon_info['common_name']] = taxon_info
1323
+
1324
+ taxonomy_string_to_taxonomy_info[taxonomy_string] = taxon_info
1325
+ if tokens[4] == '' or tokens[5] == '':
1326
+ # print('Warning: no binomial name for {}'.format(taxonomy_string))
1327
+ pass
1328
+ else:
1329
+ binomial_name = tokens[4].strip() + ' ' + tokens[5].strip()
1330
+ binomial_name_to_taxonomy_info[binomial_name] = taxon_info
1331
+
1332
+ # ...def initialize_taxonomy_info(...)
1333
+
1334
+
1335
+ #%% Geofencing functions
1336
+
1337
+ # Dict mapping semicolon-delimited taxonomy strings to geofencing rules
1338
+ taxonomy_string_to_geofencing_rules = None
1339
+
1340
+ # Maps lower-case country names to upper-case country codes
1341
+ country_to_country_code = None
1342
+
1343
+ # Maps upper-case country codes to lower-case country names
1344
+ country_code_to_country = None
1345
+
1346
+ def initialize_geofencing(geofencing_file,country_code_file,force_init=False):
1347
+ """
1348
+ Load geofencing information from a .json file, and country code mappings from
1349
+ a .csv file. Stores results in the global tables [taxonomy_string_to_geofencing_rules],
1350
+ [country_to_country_code], and [country_code_to_country].
1351
+
1352
+ Args:
1353
+ geofencing_file (str): .json file with geofencing rules
1354
+ country_code_file (str): .csv file with country code mappings
1355
+ force_init (bool, optional): if the output dicts already exist, should we
1356
+ re-initialize anyway?
1357
+ """
1358
+ global taxonomy_string_to_geofencing_rules
1359
+ global country_to_country_code
1360
+ global country_code_to_country
1361
+
1362
+ if (country_to_country_code is not None) and \
1363
+ (country_code_to_country is not None) and \
1364
+ (taxonomy_string_to_geofencing_rules is not None) and \
1365
+ (not force_init):
1366
+ return
1367
+
1368
+ # Read country code information
1369
+ country_code_df = pd.read_csv(country_code_file)
1370
+ country_to_country_code = {}
1371
+ country_code_to_country = {}
1372
+ for i_row,row in country_code_df.iterrows():
1373
+ country_to_country_code[row['name'].lower()] = row['alpha-3'].upper()
1374
+ country_code_to_country[row['alpha-3'].upper()] = row['name'].lower()
1375
+
1376
+ # Read geofencing information
1377
+ with open(geofencing_file,'r',encoding='utf-8') as f:
1378
+ taxonomy_string_to_geofencing_rules = json.load(f)
1379
+
1380
+ """
1381
+ Geofencing keys are taxonomy strings, e.g.:
1382
+
1383
+ 'mammalia;cetartiodactyla;cervidae;odocoileus;virginianus'
1384
+
1385
+ Geofencing values are tables mapping allow/block to country codes, optionally including region/state codes, e.g.:
1386
+
1387
+ {'allow': {
1388
+ 'ALA': [],
1389
+ 'ARG': [],
1390
+ ...
1391
+ 'SUR': [],
1392
+ 'TTO': [],
1393
+ 'USA': ['AL',
1394
+ 'AR',
1395
+ 'AZ',
1396
+ ...
1397
+ }
1398
+ """
1399
+
1400
+ # Validate
1401
+
1402
+ # species_string = next(iter(taxonomy_string_to_geofencing_rules.keys()))
1403
+ for species_string in taxonomy_string_to_geofencing_rules.keys():
1404
+
1405
+ species_rules = taxonomy_string_to_geofencing_rules[species_string]
1406
+
1407
+ # Every country should *either* have allow rules or block rules, no countries
1408
+ # currently have both
1409
+ assert len(species_rules.keys()) == 1
1410
+ rule_type = list(species_rules.keys())[0]
1411
+ assert rule_type in ('allow','block')
1412
+
1413
+ all_country_rules_this_species = species_rules[rule_type]
1414
+ for country_code in all_country_rules_this_species.keys():
1415
+
1416
+ assert country_code in country_code_to_country
1417
+
1418
+ region_rules = all_country_rules_this_species[country_code]
1419
+
1420
+ # Right now we only have regional rules for the USA; these may be part of
1421
+ # allow or block rules.
1422
+ if len(region_rules) > 0:
1423
+ assert country_code == 'USA'
1424
+
1425
+ # ...for each species
1426
+
1427
+ # ...def initialize_geofencing(...)
1428
+
1429
+
1430
+ def species_allowed_in_country(species,country,state=None,return_status=False):
1431
+ """
1432
+ Determines whether [species] is allowed in [country], according to
1433
+ already-initialized geofencing rules.
1434
+
1435
+ Args:
1436
+ species (str): can be a common name, a binomial name, or a species string
1437
+ country (str): country name or three-letter code
1438
+ state (str, optional): two-letter US state code
1439
+ return_status (bool, optional): by default, this function returns a bool;
1440
+ if you want to know *why* [species] is allowed/not allowed, settings
1441
+ return_status to True will return additional information.
1442
+
1443
+ Returns:
1444
+ bool or str: typically returns True if [species] is allowed in [country], else
1445
+ False. Returns a more detailed string if return_status is set.
1446
+ """
1447
+
1448
+ assert taxonomy_string_to_geofencing_rules is not None, \
1449
+ 'Initialize geofencing prior to species lookup'
1450
+ assert taxonomy_string_to_taxonomy_info is not None, \
1451
+ 'Initialize taxonomy lookup prior to species lookup'
1452
+
1453
+ # species = 'mammalia;cetartiodactyla;cervidae;odocoileus;virginianus'
1454
+ # species = 'didelphis marsupialis'
1455
+ # country = 'Guatemala'
1456
+
1457
+ # species = 'common opossum'
1458
+
1459
+ species = species.lower()
1460
+
1461
+ # Turn "species" into a taxonomy string
1462
+
1463
+ # If this is already a taxonomy string...
1464
+ if len(species.split(';')) == 5:
1465
+ pass
1466
+ # If this is a binomial name...
1467
+ elif len(species.split(' ')) == 2 and (species in binomial_name_to_taxonomy_info):
1468
+ taxonomy_info = binomial_name_to_taxonomy_info[species]
1469
+ taxonomy_string = taxonomy_info_to_taxonomy_string(taxonomy_info)
1470
+ # If this is a common name...
1471
+ elif species in common_name_to_taxonomy_info:
1472
+ taxonomy_info = common_name_to_taxonomy_info[species]
1473
+ taxonomy_string = taxonomy_info_to_taxonomy_string(taxonomy_info)
1474
+ else:
1475
+ raise ValueError('Could not find taxonomic information for {}'.format(species))
1476
+
1477
+
1478
+ # Normalize [state]
1479
+
1480
+ if state is not None:
1481
+ state = state.upper()
1482
+ assert len(state) == 2
1483
+
1484
+ # Turn "country" into a country code
1485
+
1486
+ if len(country) == 3:
1487
+ assert country.upper() in country_code_to_country
1488
+ country = country.upper()
1489
+ else:
1490
+ assert country.lower() in country_to_country_code
1491
+ country = country_to_country_code[country.lower()]
1492
+
1493
+ country_code = country.upper()
1494
+
1495
+ # Species with no rules are allowed everywhere
1496
+ if taxonomy_string not in taxonomy_string_to_geofencing_rules:
1497
+ status = 'allow_by_default'
1498
+ if return_status:
1499
+ return status
1500
+ else:
1501
+ return True
1502
+
1503
+ geofencing_rules_this_species = taxonomy_string_to_geofencing_rules[taxonomy_string]
1504
+ allowed_countries = []
1505
+ blocked_countries = []
1506
+
1507
+ assert len(geofencing_rules_this_species.keys()) == 1
1508
+ rule_type = list(geofencing_rules_this_species.keys())[0]
1509
+ assert rule_type in ('allow','block')
1510
+
1511
+ if rule_type == 'allow':
1512
+ allowed_countries = list(geofencing_rules_this_species['allow'])
1513
+ else:
1514
+ assert rule_type == 'block'
1515
+ blocked_countries = list(geofencing_rules_this_species['block'])
1516
+
1517
+ status = None
1518
+ if country_code in blocked_countries:
1519
+ status = 'blocked'
1520
+ elif country_code in allowed_countries:
1521
+ status = 'allowed'
1522
+ else:
1523
+ # The convention is that if allow rules exist, any country not on that list
1524
+ # is blocked.
1525
+ assert len(allowed_countries) > 0
1526
+ return 'not_on_country_allow_list'
1527
+
1528
+ # Now let's see whether we have to deal with any regional rules
1529
+ if state is None:
1530
+
1531
+ # If state rules are provided, we need to have a state
1532
+ if country_code == 'USA':
1533
+ state_list = geofencing_rules_this_species[rule_type][country_code]
1534
+ if len(state_list) > 0:
1535
+ raise ValueError('Cannot determine status for a species with state-level rules with no state information')
1536
+
1537
+ else:
1538
+
1539
+ # Right now state-level rules only exist for the US
1540
+ assert country_code == 'USA'
1541
+ state_list = geofencing_rules_this_species[rule_type][country_code]
1542
+
1543
+ if state in state_list:
1544
+ # If the state is on the list, do what the list says
1545
+ if rule_type == 'allow':
1546
+ status = 'allow_on_state_allow_list'
1547
+ else:
1548
+ status = 'block_on_state_block_list'
1549
+ else:
1550
+ # If the state is not on the list, do the opposite of what the list says
1551
+ if rule_type == 'allow':
1552
+ status = 'block_not_on_state_allow_list'
1553
+ else:
1554
+ status = 'allow_not_on_state_block_list'
1555
+
1556
+ if return_status:
1557
+ return status
1558
+ else:
1559
+ if status.startswith('allow'):
1560
+ return True
1561
+ else:
1562
+ assert status.startswith('block')
1563
+ return False
1564
+
1565
+ # ...def species_allowed_in_country(...)
1566
+
1567
+
1568
+ #%% Interactive driver(s)
1569
+
1570
+ if False:
1571
+
1572
+ pass
1573
+
1574
+ #%% instances.json generation test
1575
+
1576
+ from megadetector.utils.wi_utils import generate_instances_json_from_folder # noqa
1577
+
1578
+ instances_file = r'g:\temp\water-hole\instances.json'
1579
+
1580
+ _ = generate_instances_json_from_folder(folder=r'g:\temp\water-hole',
1581
+ country='NAM',
1582
+ lat=None,
1583
+ lon=None,
1584
+ output_file=instances_file,
1585
+ filename_replacements={'g:/temp':'/mnt/g/temp'})
1586
+
1587
+ # from megadetector.utils.path_utils import open_file; open_file(instances_file)
1588
+
1589
+
1590
+ #%% MD --> prediction conversion test
1591
+
1592
+ from megadetector.utils.wi_utils import generate_predictions_json_from_md_results # noqa
1593
+ md_results_file = r'G:\temp\md-test-images\mdv5a.relpaths.json'
1594
+ predictions_json_file = r'\\wsl$\Ubuntu\home\dmorris\tmp\speciesnet-tests\mdv5a.abspaths.predictions-format.json'
1595
+ generate_predictions_json_from_md_results(md_results_file,predictions_json_file,base_folder=
1596
+ '/home/dmorris/tmp/md-test-images/')
1597
+
1598
+ from megadetector.utils.wi_utils import generate_predictions_json_from_md_results # noqa
1599
+ md_results_file = r"G:\temp\water-hole\md_results.json"
1600
+ predictions_json_file = r"G:\temp\water-hole\md_results-prediction_format.json"
1601
+ generate_predictions_json_from_md_results(md_results_file,predictions_json_file,base_folder=
1602
+ '/mnt/g/temp/water-hole')
1603
+
1604
+
1605
+ #%% Geofencing tests
1606
+
1607
+ geofencing_file = r'g:\temp\geofence_mapping.json'
1608
+ country_code_file = r'G:/temp/country-codes.csv'
1609
+ encoding = 'cp1252'; taxonomy_file = r'g:\temp\taxonomy_mapping-' + encoding + '.json'
1610
+
1611
+ initialize_taxonomy_info(taxonomy_file, force_init=True, encoding=encoding)
1612
+ initialize_geofencing(geofencing_file, country_code_file, force_init=True)
1613
+
1614
+ species = 'didelphis marsupialis'
1615
+ print(binomial_name_to_taxonomy_info[species])
1616
+ country = 'Guatemala'
1617
+ assert species_allowed_in_country(species, country)
1618
+
1619
+ species = 'virginia opossum'
1620
+ print(common_name_to_taxonomy_info[species])
1621
+ country = 'USA'
1622
+ assert species_allowed_in_country(species, country)
1623
+
1624
+
1625
+ #%% Test several species
1626
+
1627
+ geofencing_file = r'g:\temp\geofence_mapping.json'
1628
+ country_code_file = r'G:/temp/country-codes.csv'
1629
+ encoding = 'cp1252'; taxonomy_file = r'g:\temp\taxonomy_mapping-' + encoding + '.json'
1630
+
1631
+ initialize_taxonomy_info(taxonomy_file, force_init=True, encoding=encoding)
1632
+ initialize_geofencing(geofencing_file, country_code_file, force_init=True)
1633
+
1634
+ if True:
1635
+
1636
+ # Make sure some Guatemalan species are allowed in Guatemala
1637
+ all_species = [
1638
+ 'didelphis marsupialis',
1639
+ 'didelphis virginiana',
1640
+ 'dasypus novemcinctus',
1641
+ 'urocyon cinereoargenteus',
1642
+ 'nasua narica',
1643
+ 'eira barbara',
1644
+ 'conepatus semistriatus',
1645
+ 'leopardus wiedii',
1646
+ 'leopardus pardalis',
1647
+ 'puma concolor',
1648
+ 'panthera onca',
1649
+ 'tapirus bairdii',
1650
+ 'pecari tajacu',
1651
+ 'tayassu pecari',
1652
+ 'mazama temama',
1653
+ 'mazama pandora',
1654
+ 'odocoileus virginianus',
1655
+ 'dasyprocta punctata',
1656
+ 'tinamus major',
1657
+ 'crax rubra',
1658
+ 'meleagris ocellata',
1659
+ 'gulo gulo' # Consistency check; this species should be blocked
1660
+ ]
1661
+
1662
+ country ='guatemala'
1663
+ state = None
1664
+
1665
+ if True:
1666
+
1667
+ # Make sure some PNW species are allowed in the right states
1668
+ all_species = \
1669
+ ['Taxidea taxus',
1670
+ 'Martes americana',
1671
+ 'Ovis canadensis',
1672
+ 'Ursus americanus',
1673
+ 'Lynx rufus',
1674
+ 'Lynx canadensis',
1675
+ 'Puma concolor',
1676
+ 'Canis latrans',
1677
+ 'Cervus canadensis',
1678
+ 'Canis lupus',
1679
+ 'Ursus arctos',
1680
+ 'Marmota caligata',
1681
+ 'Alces alces',
1682
+ 'Oreamnos americanus',
1683
+ 'Odocoileus hemionus',
1684
+ 'Vulpes vulpes',
1685
+ 'Lepus americanus',
1686
+ 'Mephitis mephitis',
1687
+ 'Odocoileus virginianus',
1688
+ 'Marmota flaviventris',
1689
+ 'tapirus bairdii' # Consistency check; this species should be blocked
1690
+ ]
1691
+
1692
+ all_species = [s.lower() for s in all_species]
1693
+
1694
+ country = 'USA'
1695
+ state = 'WA'
1696
+ # state = 'MT'
1697
+
1698
+ if True:
1699
+
1700
+ all_species = ['ammospermophilus harrisii']
1701
+ country = 'USA'
1702
+ state = 'CA'
1703
+
1704
+ for species in all_species:
1705
+
1706
+ taxonomy_info = binomial_name_to_taxonomy_info[species]
1707
+ allowed = species_allowed_in_country(species, country, state=state, return_status=True)
1708
+ state_string = ''
1709
+ if state is not None:
1710
+ state_string = ' ({})'.format(state)
1711
+ print('{} ({}) for {}{}: {}'.format(taxonomy_info['common_name'],species,country,state_string,allowed))
1712
+
1713
+
1714
+ #%% Test conversion from predictons.json to MD format
1715
+
1716
+ import os # noqa
1717
+ from megadetector.utils.wi_utils import generate_md_results_from_predictions_json # noqa
1718
+
1719
+ # detector_source = 'speciesnet'
1720
+ detector_source = 'md'
1721
+
1722
+ if False:
1723
+ image_folder = r'g:\temp\md-test-images'
1724
+ base_folder = '/home/dmorris/tmp/md-test-images/'
1725
+ if detector_source == 'speciesnet':
1726
+ predictions_json_file = r"\\wsl$\Ubuntu\home\dmorris\tmp\speciesnet-tests\ensemble-output.json"
1727
+ md_results_file = r"\\wsl$\Ubuntu\home\dmorris\tmp\speciesnet-tests\ensemble-output-md-format.json"
1728
+ else:
1729
+ assert detector_source == 'md'
1730
+ predictions_json_file = r"\\wsl$\Ubuntu\home\dmorris\tmp\speciesnet-tests\ensemble-output-from-md-results.json"
1731
+ md_results_file = r"\\wsl$\Ubuntu\home\dmorris\tmp\speciesnet-tests\ensemble-output-md-format-from-md-results.json"
1732
+ else:
1733
+ image_folder = r'g:\temp\water-hole'
1734
+ base_folder = '/mnt/g/temp/water-hole/'
1735
+ if detector_source == 'speciesnet':
1736
+ predictions_json_file = r'g:\temp\water-hole\ensemble-output.json'
1737
+ md_results_file = r'g:\temp\water-hole\ensemble-output.md_format.json'
1738
+ else:
1739
+ assert detector_source == 'md'
1740
+ predictions_json_file = r'g:\temp\water-hole\ensemble-output-md.json'
1741
+ md_results_file = r'g:\temp\water-hole\ensemble-output-md.md_format.json'
1742
+
1743
+ generate_md_results_from_predictions_json(predictions_json_file=predictions_json_file,
1744
+ md_results_file=md_results_file,
1745
+ base_folder=base_folder)
1746
+
1747
+ # from megadetector.utils.path_utils import open_file; open_file(md_results_file)
1748
+
1749
+ assert os.path.isdir(image_folder)
1750
+
1751
+
1752
+ #%% Preview
1753
+
1754
+ from megadetector.postprocessing.postprocess_batch_results import \
1755
+ PostProcessingOptions, process_batch_results
1756
+ from megadetector.utils import path_utils
1757
+
1758
+ render_animals_only = False
1759
+
1760
+ options = PostProcessingOptions()
1761
+ options.image_base_dir = image_folder
1762
+ options.include_almost_detections = True
1763
+ options.num_images_to_sample = None
1764
+ options.confidence_threshold = 0.2
1765
+ options.almost_detection_confidence_threshold = options.confidence_threshold - 0.05
1766
+ options.ground_truth_json_file = None
1767
+ options.separate_detections_by_category = True
1768
+ options.sample_seed = 0
1769
+ options.max_figures_per_html_file = 5000
1770
+
1771
+ options.parallelize_rendering = True
1772
+ options.parallelize_rendering_n_cores = 10
1773
+ options.parallelize_rendering_with_threads = True
1774
+ options.sort_classification_results_by_count = True
1775
+
1776
+ if render_animals_only:
1777
+ # Omit some pages from the output, useful when animals are rare
1778
+ options.rendering_bypass_sets = ['detections_person','detections_vehicle',
1779
+ 'detections_person_vehicle','non_detections']
1780
+
1781
+ output_base = r'g:\temp\preview' + '_' + detector_source
1782
+ if render_animals_only:
1783
+ output_base = output_base + '_render_animals_only'
1784
+ os.makedirs(output_base, exist_ok=True)
1785
+
1786
+ print('Writing preview to {}'.format(output_base))
1787
+
1788
+ options.md_results_file = md_results_file
1789
+ options.output_dir = output_base
1790
+ ppresults = process_batch_results(options)
1791
+ html_output_file = ppresults.output_html_file
1792
+
1793
+ path_utils.open_file(html_output_file,attempt_to_open_in_wsl_host=True,browser_name='chrome')
1794
+ # import clipboard; clipboard.copy(html_output_file)