megadetector 5.0.20__py3-none-any.whl → 5.0.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (41) hide show
  1. megadetector/data_management/cct_json_utils.py +143 -7
  2. megadetector/data_management/cct_to_md.py +12 -5
  3. megadetector/data_management/databases/integrity_check_json_db.py +83 -77
  4. megadetector/data_management/importers/osu-small-animals-to-json.py +4 -4
  5. megadetector/data_management/importers/raic_csv_to_md_results.py +416 -0
  6. megadetector/data_management/importers/zamba_results_to_md_results.py +1 -2
  7. megadetector/data_management/lila/create_lila_test_set.py +25 -11
  8. megadetector/data_management/lila/download_lila_subset.py +9 -2
  9. megadetector/data_management/lila/generate_lila_per_image_labels.py +3 -2
  10. megadetector/data_management/lila/test_lila_metadata_urls.py +5 -1
  11. megadetector/data_management/read_exif.py +10 -14
  12. megadetector/data_management/rename_images.py +1 -1
  13. megadetector/data_management/yolo_output_to_md_output.py +18 -5
  14. megadetector/detection/process_video.py +14 -3
  15. megadetector/detection/pytorch_detector.py +15 -3
  16. megadetector/detection/run_detector.py +4 -3
  17. megadetector/detection/run_inference_with_yolov5_val.py +121 -13
  18. megadetector/detection/video_utils.py +40 -17
  19. megadetector/postprocessing/classification_postprocessing.py +1 -1
  20. megadetector/postprocessing/combine_api_outputs.py +1 -1
  21. megadetector/postprocessing/compare_batch_results.py +931 -142
  22. megadetector/postprocessing/detector_calibration.py +565 -0
  23. megadetector/postprocessing/md_to_coco.py +85 -19
  24. megadetector/postprocessing/postprocess_batch_results.py +32 -21
  25. megadetector/postprocessing/validate_batch_results.py +174 -64
  26. megadetector/taxonomy_mapping/map_new_lila_datasets.py +15 -12
  27. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +1 -1
  28. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +3 -1
  29. megadetector/utils/ct_utils.py +64 -2
  30. megadetector/utils/md_tests.py +15 -13
  31. megadetector/utils/path_utils.py +153 -37
  32. megadetector/utils/process_utils.py +9 -3
  33. megadetector/utils/write_html_image_list.py +21 -6
  34. megadetector/visualization/visualization_utils.py +329 -102
  35. megadetector/visualization/visualize_db.py +104 -63
  36. {megadetector-5.0.20.dist-info → megadetector-5.0.22.dist-info}/LICENSE +0 -0
  37. {megadetector-5.0.20.dist-info → megadetector-5.0.22.dist-info}/METADATA +143 -142
  38. {megadetector-5.0.20.dist-info → megadetector-5.0.22.dist-info}/RECORD +40 -39
  39. {megadetector-5.0.20.dist-info → megadetector-5.0.22.dist-info}/WHEEL +1 -1
  40. {megadetector-5.0.20.dist-info → megadetector-5.0.22.dist-info}/top_level.txt +0 -0
  41. megadetector/data_management/importers/prepare-noaa-fish-data-for-lila.py +0 -359
@@ -15,15 +15,17 @@ import json
15
15
  # Created by get_lila_category_list.py
16
16
  input_lila_category_list_file = os.path.expanduser('~/lila/lila_categories_list/lila_dataset_to_categories.json')
17
17
 
18
- output_file = os.path.expanduser('~/lila/lila_additions_2024.10.05.csv')
18
+ output_file = os.path.expanduser('~/lila/lila_additions_2024.12.31.csv')
19
19
 
20
20
  datasets_to_map = [
21
- 'Ohio Small Animals'
21
+ 'Seattle(ish) Camera Traps'
22
22
  ]
23
23
 
24
24
 
25
25
  #%% Initialize taxonomic lookup
26
26
 
27
+ # Takes ~2 mins
28
+
27
29
  from megadetector.taxonomy_mapping.species_lookup import \
28
30
  initialize_taxonomy_lookup, get_preferred_taxonomic_match
29
31
 
@@ -39,27 +41,27 @@ lila_datasets = set()
39
41
 
40
42
  for dataset_name in input_lila_categories.keys():
41
43
  # The script that generates this dictionary creates a separate entry for bounding box
42
- # metadata files, but those don't represent new dataset names
44
+ # metadata files, but those don't represent new dataset names, so we ignore them here.
43
45
  lila_datasets.add(dataset_name.replace('_bbox',''))
44
-
46
+
45
47
  for s in datasets_to_map:
46
48
  assert s in lila_datasets
47
-
48
-
49
+
50
+
49
51
  #%% Find all categories
50
52
 
51
53
  category_mappings = []
52
54
 
53
55
  # dataset_name = datasets_to_map[0]
54
56
  for dataset_name in datasets_to_map:
55
-
57
+
56
58
  ds_categories = input_lila_categories[dataset_name]
57
59
  for category in ds_categories:
58
60
  category_name = category['name']
59
61
  assert ':' not in category_name
60
62
  mapping_name = dataset_name + ':' + category_name
61
63
  category_mappings.append(mapping_name)
62
-
64
+
63
65
  print('Need to create {} mappings'.format(len(category_mappings)))
64
66
 
65
67
 
@@ -128,22 +130,23 @@ output_df.to_csv(output_file, index=None, header=True)
128
130
 
129
131
  if False:
130
132
 
131
- #%%
132
-
133
+ #%% You probably want to open the .csv file first
134
+
133
135
  from megadetector.utils.path_utils import open_file
134
136
  open_file(output_file)
137
+
135
138
 
136
139
  #%%
137
140
 
138
141
  # q = 'white-throated monkey'
139
142
  # q = 'cingulata'
140
143
  # q = 'notamacropus'
141
- q = 'thamnophis saurita saurita'
144
+ q = 'insects'
142
145
  taxonomy_preference = 'inat'
143
146
  m = get_preferred_taxonomic_match(q,taxonomy_preference)
144
147
  # print(m.scientific_name); import clipboard; clipboard.copy(m.scientific_name)
145
148
 
146
- if m is None:
149
+ if (m is None) or (len(m.taxonomy_string) == 0):
147
150
  print('No match')
148
151
  else:
149
152
  if m.source != taxonomy_preference:
@@ -89,7 +89,7 @@ if False:
89
89
  'genus',
90
90
  'species','subspecies','variety']
91
91
 
92
- levels_to_exclude = ['stateofmatter','zoosection','parvorder','complex']
92
+ levels_to_exclude = ['stateofmatter','zoosection','parvorder','complex','epifamily']
93
93
 
94
94
  for s in levels_to_exclude:
95
95
  assert s not in levels_to_include
@@ -16,7 +16,7 @@ import os
16
16
  import pandas as pd
17
17
 
18
18
  # lila_taxonomy_file = r"c:\git\agentmorrisprivate\lila-taxonomy\lila-taxonomy-mapping.csv"
19
- lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2024.10.05.csv')
19
+ lila_taxonomy_file = os.path.expanduser('~/lila/lila_additions_2024.12.31.csv')
20
20
 
21
21
  preview_base = os.path.expanduser('~/lila/lila_taxonomy_preview')
22
22
  os.makedirs(preview_base,exist_ok=True)
@@ -399,6 +399,8 @@ images_per_query = 15
399
399
  min_valid_images_per_query = 3
400
400
  min_valid_image_size = 3000
401
401
 
402
+ # TODO: parallelize this loop
403
+ #
402
404
  # i_row = 0; row = df.iloc[i_row]
403
405
  for i_row,row in df.iterrows():
404
406
 
@@ -12,6 +12,7 @@ import inspect
12
12
  import json
13
13
  import math
14
14
  import os
15
+ import builtins
15
16
 
16
17
  import jsonpickle
17
18
  import numpy as np
@@ -613,6 +614,50 @@ def is_empty(v):
613
614
  return False
614
615
 
615
616
 
617
+ def min_none(a,b):
618
+ """
619
+ Returns the minimum of a and b. If both are None, returns None. If one is None,
620
+ returns the other.
621
+
622
+ Args:
623
+ a (numeric): the first value to compare
624
+ b (numeric): the second value to compare
625
+
626
+ Returns:
627
+ numeric: the minimum of a and b, or None
628
+ """
629
+ if a is None and b is None:
630
+ return None
631
+ elif a is None:
632
+ return b
633
+ elif b is None:
634
+ return a
635
+ else:
636
+ return min(a,b)
637
+
638
+
639
+ def max_none(a,b):
640
+ """
641
+ Returns the maximum of a and b. If both are None, returns None. If one is None,
642
+ returns the other.
643
+
644
+ Args:
645
+ a (numeric): the first value to compare
646
+ b (numeric): the second value to compare
647
+
648
+ Returns:
649
+ numeric: the maximum of a and b, or None
650
+ """
651
+ if a is None and b is None:
652
+ return None
653
+ elif a is None:
654
+ return b
655
+ elif b is None:
656
+ return a
657
+ else:
658
+ return max(a,b)
659
+
660
+
616
661
  def isnan(v):
617
662
  """
618
663
  Returns True if v is a nan-valued float, otherwise returns False.
@@ -645,7 +690,24 @@ def sets_overlap(set1, set2):
645
690
  return not set(set1).isdisjoint(set(set2))
646
691
 
647
692
 
648
-
693
+ def is_function_name(s,calling_namespace):
694
+ """
695
+ Determines whether [s] is a callable function in the global or local scope, or a
696
+ built-in function.
697
+
698
+ Args:
699
+ s (str): the string to test for function-ness
700
+ calling_namespace (dict): typically pass the output of locals()
701
+ """
702
+
703
+ assert isinstance(s,str), 'Input is not a string'
704
+
705
+ return callable(globals().get(s)) or \
706
+ callable(locals().get(s)) or \
707
+ callable(calling_namespace.get(s)) or \
708
+ callable(getattr(builtins, s, None))
709
+
710
+
649
711
  #%% Test drivers
650
712
 
651
713
  if False:
@@ -678,4 +740,4 @@ if False:
678
740
  L = [{'a':5},{'a':0},{'a':10}]
679
741
  k = 'a'
680
742
  sort_list_of_dicts_by_key(L, k, reverse=True)
681
-
743
+
@@ -29,10 +29,6 @@ import subprocess
29
29
  import argparse
30
30
  import inspect
31
31
 
32
- #: IoU threshold used to determine whether boxes in two detection files likely correspond
33
- #: to the same box.
34
- iou_threshold_for_file_comparison = 0.9
35
-
36
32
 
37
33
  #%% Classes
38
34
 
@@ -106,6 +102,10 @@ class MDTestOptions:
106
102
  #: PYTHONPATH to set for CLI tests; if None, inherits from the parent process. Only
107
103
  #: impacts the called functions, not the parent process.
108
104
  self.cli_test_pythonpath = None
105
+
106
+ #: IoU threshold used to determine whether boxes in two detection files likely correspond
107
+ #: to the same box.
108
+ self.iou_threshold_for_file_comparison = 0.85
109
109
 
110
110
  # ...class MDTestOptions()
111
111
 
@@ -410,7 +410,7 @@ def compare_detection_lists(detections_a,detections_b,options,bidirectional_comp
410
410
  iou = get_iou(det_a['bbox'],b_det['bbox'])
411
411
 
412
412
  # Is this likely the same detection as det_a?
413
- if iou >= iou_threshold_for_file_comparison and iou > highest_iou:
413
+ if iou >= options.iou_threshold_for_file_comparison and iou > highest_iou:
414
414
  matching_det_b = b_det
415
415
  highest_iou = iou
416
416
 
@@ -529,12 +529,14 @@ def compare_results(inference_output_file,expected_results_file,options):
529
529
  if not options.warning_mode:
530
530
 
531
531
  assert max_conf_error <= options.max_conf_error, \
532
- 'Confidence error {} is greater than allowable ({}), on file:\n{}'.format(
533
- max_conf_error,options.max_conf_error,max_conf_error_file)
532
+ 'Confidence error {} is greater than allowable ({}), on file:\n{} ({},{})'.format(
533
+ max_conf_error,options.max_conf_error,max_conf_error_file,
534
+ inference_output_file,expected_results_file)
534
535
 
535
536
  assert max_coord_error <= options.max_coord_error, \
536
- 'Coord error {} is greater than allowable ({}), on file:\n{}'.format(
537
- max_coord_error,options.max_coord_error,max_coord_error_file)
537
+ 'Coord error {} is greater than allowable ({}), on file:\n{} ({},{})'.format(
538
+ max_coord_error,options.max_coord_error,max_coord_error_file,
539
+ inference_output_file,expected_results_file)
538
540
 
539
541
  print('Max conf error: {} (file {})'.format(
540
542
  max_conf_error,max_conf_error_file))
@@ -847,7 +849,7 @@ def run_python_tests(options):
847
849
  video_options.frame_rendering_folder = os.path.join(options.scratch_dir,'video_scratch/rendered_frame_folder')
848
850
  video_options.render_output_video = True
849
851
  # video_options.keep_rendered_frames = False
850
- # video_options.keep_rendered_frames = False
852
+ # video_options.keep_extracted_frames = False
851
853
  video_options.force_extracted_frame_folder_deletion = True
852
854
  video_options.force_rendered_frame_folder_deletion = True
853
855
  # video_options.reuse_results_if_available = False
@@ -887,7 +889,7 @@ def run_python_tests(options):
887
889
  video_options.frame_rendering_folder = os.path.join(options.scratch_dir,'video_scratch/rendered_frame_folder')
888
890
  video_options.render_output_video = False
889
891
  video_options.keep_rendered_frames = False
890
- video_options.keep_rendered_frames = False
892
+ video_options.keep_extracted_frames = False
891
893
  video_options.force_extracted_frame_folder_deletion = True
892
894
  video_options.force_rendered_frame_folder_deletion = True
893
895
  video_options.reuse_results_if_available = False
@@ -1208,7 +1210,7 @@ def run_cli_tests(options):
1208
1210
  cmd += ' --overwrite_handling overwrite'
1209
1211
  cmd_results = execute_and_print(cmd)
1210
1212
 
1211
- # Run again with checkpointing, make sure the output are identical
1213
+ # Run again with checkpointing, make sure the outputs are identical
1212
1214
  cmd += ' --checkpoint_frequency 5'
1213
1215
  inference_output_file_yolo_val_checkpoint = \
1214
1216
  os.path.join(options.scratch_dir,'folder_inference_output_yolo_val_checkpoint.json')
@@ -1353,7 +1355,7 @@ if False:
1353
1355
  # options.cli_working_dir = r'c:\git\MegaDetector'
1354
1356
  # options.yolo_working_dir = r'c:\git\yolov5-md'
1355
1357
  options.cli_working_dir = os.path.expanduser('~')
1356
- options.yolo_working_dir = '/mnt/c/git/yolov5-md'
1358
+ # options.yolo_working_dir = '/mnt/c/git/yolov5-md'
1357
1359
  options = download_test_data(options)
1358
1360
 
1359
1361
  #%%
@@ -17,6 +17,7 @@ import platform
17
17
  import string
18
18
  import json
19
19
  import shutil
20
+ import hashlib
20
21
  import unicodedata
21
22
  import zipfile
22
23
  import tarfile
@@ -31,6 +32,8 @@ from functools import partial
31
32
  from shutil import which
32
33
  from tqdm import tqdm
33
34
 
35
+ from megadetector.utils.ct_utils import is_iterable
36
+
34
37
  # Should all be lower-case
35
38
  IMG_EXTENSIONS = ('.jpg', '.jpeg', '.gif', '.png', '.tif', '.tiff', '.bmp')
36
39
 
@@ -236,6 +239,30 @@ def path_is_abs(p):
236
239
  return (len(p) > 1) and (p[0] == '/' or p[1] == ':' or p[0] == '\\')
237
240
 
238
241
 
242
+ def safe_create_link(link_exists,link_new):
243
+ """
244
+ Creates a symlink at [link_new] pointing to [link_exists].
245
+
246
+ If [link_new] already exists, make sure it's a link (not a file),
247
+ and if it has a different target than [link_exists], removes and re-creates
248
+ it.
249
+
250
+ Errors if [link_new] already exists but it's not a link.
251
+
252
+ Args:
253
+ link_exists (str): the source of the (possibly-new) symlink
254
+ link_new (str): the target of the (possibly-new) symlink
255
+ """
256
+
257
+ if os.path.exists(link_new) or os.path.islink(link_new):
258
+ assert os.path.islink(link_new)
259
+ if not os.readlink(link_new) == link_exists:
260
+ os.remove(link_new)
261
+ os.symlink(link_exists,link_new)
262
+ else:
263
+ os.symlink(link_exists,link_new)
264
+
265
+
239
266
  def top_level_folder(p):
240
267
  r"""
241
268
  Gets the top-level folder from the path *p*.
@@ -296,31 +323,6 @@ if False:
296
323
  p = r'c:/foo'; s = top_level_folder(p); print(s); assert s == 'c:/foo'
297
324
  p = r'c:\foo/bar'; s = top_level_folder(p); print(s); assert s == 'c:\\foo'
298
325
 
299
- #%%
300
-
301
- def safe_create_link(link_exists,link_new):
302
- """
303
- Creates a symlink at [link_new] pointing to [link_exists].
304
-
305
- If [link_new] already exists, make sure it's a link (not a file),
306
- and if it has a different target than [link_exists], removes and re-creates
307
- it.
308
-
309
- Errors if [link_new] already exists but it's not a link.
310
-
311
- Args:
312
- link_exists (str): the source of the (possibly-new) symlink
313
- link_new (str): the target of the (possibly-new) symlink
314
- """
315
-
316
- if os.path.exists(link_new) or os.path.islink(link_new):
317
- assert os.path.islink(link_new)
318
- if not os.readlink(link_new) == link_exists:
319
- os.remove(link_new)
320
- os.symlink(link_exists,link_new)
321
- else:
322
- os.symlink(link_exists,link_new)
323
-
324
326
 
325
327
  #%% Image-related path functions
326
328
 
@@ -598,7 +600,9 @@ def open_file(filename, attempt_to_open_in_wsl_host=False, browser_name=None):
598
600
 
599
601
  opener = 'xdg-open'
600
602
  subprocess.call([opener, filename])
601
-
603
+
604
+ # ...def open_file(...)
605
+
602
606
 
603
607
  #%% File list functions
604
608
 
@@ -649,8 +653,12 @@ def _copy_file(input_output_tuple,overwrite=True,verbose=False):
649
653
  target_fn = input_output_tuple[1]
650
654
  if (not overwrite) and (os.path.isfile(target_fn)):
651
655
  if verbose:
652
- print('Skipping existing file {}'.format(target_fn))
653
- return
656
+ print('Skipping existing target file {}'.format(target_fn))
657
+ return
658
+
659
+ if verbose:
660
+ print('Copying to target file {}'.format(target_fn))
661
+
654
662
  os.makedirs(os.path.dirname(target_fn),exist_ok=True)
655
663
  shutil.copyfile(source_fn,target_fn)
656
664
 
@@ -667,7 +675,7 @@ def parallel_copy_files(input_file_to_output_file, max_workers=16,
667
675
  use_threads (bool, optional): whether to use threads (True) or processes (False) for
668
676
  parallel copying; ignored if max_workers <= 1
669
677
  overwrite (bool, optional): whether to overwrite existing destination files
670
- verbose (bool, optional): enable additionald debug output
678
+ verbose (bool, optional): enable additional debug output
671
679
  """
672
680
 
673
681
  n_workers = min(max_workers,len(input_file_to_output_file))
@@ -750,7 +758,7 @@ def parallel_get_file_sizes(filenames,
750
758
  max_workers (int, optional): number of concurrent workers; set to <=1 to disable parallelism
751
759
  use_threads (bool, optional): whether to use threads (True) or processes (False) for
752
760
  parallel copying; ignored if max_workers <= 1
753
- verbose (bool, optional): enable additionald debug output
761
+ verbose (bool, optional): enable additional debug output
754
762
  recursive (bool, optional): enumerate recursively, only relevant if [filenames] is a folder.
755
763
  convert_slashes (bool, optional): convert backslashes to forward slashes
756
764
  return_relative_paths (bool, optional): return relative paths; only relevant if [filenames]
@@ -764,16 +772,21 @@ def parallel_get_file_sizes(filenames,
764
772
 
765
773
  folder_name = None
766
774
 
767
- if verbose:
768
- print('Enumerating files')
769
-
770
- if isinstance(filenames,str) and os.path.isdir(filenames):
771
-
775
+ if isinstance(filenames,str):
776
+
772
777
  folder_name = filenames
778
+ assert os.path.isdir(filenames), 'Could not find folder {}'.format(folder_name)
773
779
 
780
+ if verbose:
781
+ print('Enumerating files in {}'.format(folder_name))
782
+
774
783
  # Enumerate absolute paths here, we'll convert to relative later if requested
775
- filenames = recursive_file_list(filenames,recursive=recursive,return_relative_paths=False)
784
+ filenames = recursive_file_list(folder_name,recursive=recursive,return_relative_paths=False)
776
785
 
786
+ else:
787
+
788
+ assert is_iterable(filenames), '[filenames] argument is neither a folder nor an iterable'
789
+
777
790
  if verbose:
778
791
  print('Creating worker pool')
779
792
 
@@ -804,6 +817,8 @@ def parallel_get_file_sizes(filenames,
804
817
 
805
818
  return to_return
806
819
 
820
+ # ...def parallel_get_file_sizes(...)
821
+
807
822
 
808
823
  #%% Zip functions
809
824
 
@@ -932,7 +947,7 @@ def zip_folder(input_folder, output_fn=None, overwrite=False, verbose=False, com
932
947
  output_fn (str, optional): output filename; if this is None, we'll write to [input_folder].zip
933
948
  overwrite (bool, optional): whether to overwrite an existing .tar file
934
949
  verbose (bool, optional): enable additional debug console output
935
- compresslevel (int, optional): compression level to use, between 0 and 9
950
+ compresslevel (int, optional): compression level to use, between 0 and 9
936
951
 
937
952
  Returns:
938
953
  str: the output zipfile, whether we created it or determined that it already exists
@@ -1075,3 +1090,104 @@ def unzip_file(input_file, output_folder=None):
1075
1090
 
1076
1091
  with zipfile.ZipFile(input_file, 'r') as zf:
1077
1092
  zf.extractall(output_folder)
1093
+
1094
+
1095
+ #%% File hashing functions
1096
+
1097
+ def compute_file_hash(file_path, algorithm='sha256', allow_failures=True):
1098
+ """
1099
+ Compute the hash of a file.
1100
+
1101
+ Adapted from:
1102
+
1103
+ https://www.geeksforgeeks.org/python-program-to-find-hash-of-file/
1104
+
1105
+ Args:
1106
+ file_path (str): the file to hash
1107
+ algorithm (str, optional): the hashing algorithm to use (e.g. md5, sha256)
1108
+
1109
+ Returns:
1110
+ str: the hash value for this file
1111
+ """
1112
+
1113
+ try:
1114
+
1115
+ hash_func = hashlib.new(algorithm)
1116
+
1117
+ with open(file_path, 'rb') as file:
1118
+ while chunk := file.read(8192): # Read the file in chunks of 8192 bytes
1119
+ hash_func.update(chunk)
1120
+
1121
+ return str(hash_func.hexdigest())
1122
+
1123
+ except Exception:
1124
+
1125
+ if allow_failures:
1126
+ return None
1127
+ else:
1128
+ raise
1129
+
1130
+ # ...def compute_file_hash(...)
1131
+
1132
+
1133
+ def parallel_compute_file_hashes(filenames,
1134
+ max_workers=16,
1135
+ use_threads=True,
1136
+ recursive=True,
1137
+ algorithm='sha256',
1138
+ verbose=False):
1139
+ """
1140
+ Compute file hashes for a list or folder of images.
1141
+
1142
+ Args:
1143
+ filenames (list or str): a list of filenames or a folder
1144
+ max_workers (int, optional): the number of parallel workers to use; set to <=1 to disable
1145
+ parallelization
1146
+ use_threads (bool, optional): whether to use threads (True) or processes (False) for
1147
+ parallelization
1148
+ algorithm (str, optional): the hashing algorithm to use (e.g. md5, sha256)
1149
+ recursive (bool, optional): if [filenames] is a folder, whether to enumerate recursively.
1150
+ Ignored if [filenames] is a list.
1151
+ verbose (bool, optional): enable additional debug output
1152
+
1153
+ Returns:
1154
+ dict: a dict mapping filenames to hash values; values will be None for files that fail
1155
+ to load.
1156
+ """
1157
+
1158
+ if isinstance(filenames,str) and os.path.isdir(filenames):
1159
+ if verbose:
1160
+ print('Enumerating files in {}'.format(filenames))
1161
+ filenames = recursive_file_list(filenames,recursive=recursive,return_relative_paths=False)
1162
+
1163
+ n_workers = min(max_workers,len(filenames))
1164
+
1165
+ if verbose:
1166
+ print('Computing hashes for {} files on {} workers'.format(len(filenames),n_workers))
1167
+
1168
+ if n_workers <= 1:
1169
+
1170
+ results = []
1171
+ for filename in filenames:
1172
+ results.append(compute_file_hash(filename,algorithm=algorithm,allow_failures=True))
1173
+
1174
+ else:
1175
+
1176
+ if use_threads:
1177
+ pool = ThreadPool(n_workers)
1178
+ else:
1179
+ pool = Pool(n_workers)
1180
+
1181
+ results = list(tqdm(pool.imap(
1182
+ partial(compute_file_hash,algorithm=algorithm,allow_failures=True),
1183
+ filenames), total=len(filenames)))
1184
+
1185
+ assert len(filenames) == len(results), 'Internal error in parallel_compute_file_hashes'
1186
+
1187
+ to_return = {}
1188
+ for i_file,filename in enumerate(filenames):
1189
+ to_return[filename] = results[i_file]
1190
+
1191
+ return to_return
1192
+
1193
+ # ...def parallel_compute_file_hashes(...)
@@ -59,8 +59,13 @@ def execute(cmd,encoding=None,errors=None,env=None,verbose=False):
59
59
  return return_code
60
60
 
61
61
 
62
- def execute_and_print(cmd,print_output=True,encoding=None,errors=None,
63
- env=None,verbose=False,catch_exceptions=True,
62
+ def execute_and_print(cmd,
63
+ print_output=True,
64
+ encoding=None,
65
+ errors=None,
66
+ env=None,
67
+ verbose=False,
68
+ catch_exceptions=True,
64
69
  echo_command=False):
65
70
  """
66
71
  Run [cmd] (a single string) in a shell, capturing and printing output. Returns
@@ -73,7 +78,8 @@ def execute_and_print(cmd,print_output=True,encoding=None,errors=None,
73
78
 
74
79
  Args:
75
80
  cmd (str): command to run
76
- print_output (bool, optional): whether to print output from [cmd]
81
+ print_output (bool, optional): whether to print output from [cmd] (stdout is
82
+ captured regardless of the value of print_output)
77
83
  encoding (str, optional): stdout encoding, see Popen() documentation
78
84
  errors (str, optional): error handling, see Popen() documentation
79
85
  env (dict, optional): environment variables, see Popen() documentation
@@ -42,7 +42,9 @@ def write_html_image_list(filename=None,images=None,options=None):
42
42
  options (dict, optional): a dict with one or more of the following fields:
43
43
 
44
44
  - fHtml (file pointer to write to, used for splitting write operations over multiple calls)
45
+ - pageTitle (HTML page title)
45
46
  - headerHtml (html text to include before the image list)
47
+ - subPageHeaderHtml (html text to include before the images when images are broken into pages)
46
48
  - trailerHtml (html text to include after the image list)
47
49
  - defaultImageStyle (default css style for images)
48
50
  - defaultTextStyle (default css style for image titles)
@@ -60,11 +62,17 @@ def write_html_image_list(filename=None,images=None,options=None):
60
62
  if 'fHtml' not in options:
61
63
  options['fHtml'] = -1
62
64
 
65
+ if 'pageTitle' not in options or options['pageTitle'] is None:
66
+ options['pageTitle'] = ''
67
+
63
68
  if 'headerHtml' not in options or options['headerHtml'] is None:
64
- options['headerHtml'] = ''
69
+ options['headerHtml'] = ''
65
70
 
71
+ if 'subPageHeaderHtml' not in options or options['subPageHeaderHtml'] is None:
72
+ options['subPageHeaderHtml'] = ''
73
+
66
74
  if 'trailerHtml' not in options or options['trailerHtml'] is None:
67
- options['trailerHtml'] = ''
75
+ options['trailerHtml'] = ''
68
76
 
69
77
  if 'defaultTextStyle' not in options or options['defaultTextStyle'] is None:
70
78
  options['defaultTextStyle'] = \
@@ -114,7 +122,7 @@ def write_html_image_list(filename=None,images=None,options=None):
114
122
  # You can't supply your own file handle in this case
115
123
  if options['fHtml'] != -1:
116
124
  raise ValueError(
117
- 'You can''t supply your own file handle if we have to page the image set')
125
+ "You can't supply your own file handle if we have to page the image set")
118
126
 
119
127
  figureFileStartingIndices = list(range(0,nImages,options['maxFiguresPerHtmlFile']))
120
128
 
@@ -124,7 +132,10 @@ def write_html_image_list(filename=None,images=None,options=None):
124
132
  fMeta = open(filename,'w')
125
133
 
126
134
  # Write header stuff
127
- fMeta.write('<html><body>\n')
135
+ titleString = '<title>Index page</title>'
136
+ if len(options['pageTitle']) > 0:
137
+ titleString = '<title>Index page for: {}</title>'.format(options['pageTitle'])
138
+ fMeta.write('<html><head>{}</head><body>\n'.format(titleString))
128
139
  fMeta.write(options['headerHtml'])
129
140
  fMeta.write('<table border = 0 cellpadding = 2>\n')
130
141
 
@@ -145,7 +156,7 @@ def write_html_image_list(filename=None,images=None,options=None):
145
156
  localImages = images[iStart:iEnd+1]
146
157
 
147
158
  localOptions = options.copy();
148
- localOptions['headerHtml'] = '';
159
+ localOptions['headerHtml'] = options['subPageHeaderHtml'];
149
160
  localOptions['trailerHtml'] = '';
150
161
 
151
162
  # Make a recursive call for this image set
@@ -170,7 +181,11 @@ def write_html_image_list(filename=None,images=None,options=None):
170
181
  else:
171
182
  fHtml = options['fHtml']
172
183
 
173
- fHtml.write('<html><body>\n')
184
+ titleString = ''
185
+ if len(options['pageTitle']) > 0:
186
+ titleString = '<title>{}</title>'.format(options['pageTitle'])
187
+
188
+ fHtml.write('<html>{}<body>\n'.format(titleString))
174
189
 
175
190
  fHtml.write(options['headerHtml'])
176
191