megadetector 10.0.6__py3-none-any.whl → 10.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

@@ -2,12 +2,8 @@
2
2
 
3
3
  convert_output_format.py
4
4
 
5
- Converts between file formats output by our batch processing API. Currently
6
- supports json <--> csv conversion, but this should be the landing place for any
7
- conversion - including between hypothetical alternative .json versions - that we support
8
- in the future.
9
-
10
- The .csv format is largely obsolete, don't use it unless you're super-duper sure you need it.
5
+ Converts between file .json and .csv representations of MD output. The .csv format is
6
+ largely obsolete, don't use it unless you're super-duper sure you need it.
11
7
 
12
8
  """
13
9
 
@@ -15,13 +11,16 @@ The .csv format is largely obsolete, don't use it unless you're super-duper sure
15
11
 
16
12
  import argparse
17
13
  import json
18
- import csv
19
14
  import sys
20
15
  import os
21
16
 
22
17
  from tqdm import tqdm
18
+ from collections import defaultdict
19
+
20
+ import pandas as pd
23
21
 
24
22
  from megadetector.postprocessing.load_api_results import load_api_results_csv
23
+ from megadetector.utils.wi_taxonomy_utils import load_md_or_speciesnet_file
25
24
  from megadetector.data_management.annotations import annotation_constants
26
25
  from megadetector.utils import ct_utils
27
26
 
@@ -35,16 +34,13 @@ def convert_json_to_csv(input_path,
35
34
  min_confidence=None,
36
35
  omit_bounding_boxes=False,
37
36
  output_encoding=None,
38
- overwrite=True):
37
+ overwrite=True,
38
+ verbose=False):
39
39
  """
40
40
  Converts a MD results .json file to a totally non-standard .csv format.
41
41
 
42
42
  If [output_path] is None, will convert x.json to x.csv.
43
43
 
44
- TODO: this function should obviously be using Pandas or some other sensible structured
45
- representation of tabular data. Even a list of dicts. This implementation is quite
46
- brittle and depends on adding fields to every row in exactly the right order.
47
-
48
44
  Args:
49
45
  input_path (str): the input .json file to convert
50
46
  output_path (str, optional): the output .csv file to generate; if this is None, uses
@@ -57,7 +53,7 @@ def convert_json_to_csv(input_path,
57
53
  output_encoding (str, optional): encoding to use for the .csv file
58
54
  overwrite (bool, optional): whether to overwrite an existing .csv file; if this is False and
59
55
  the output file exists, no-ops and returns
60
-
56
+ verbose (bool, optional): enable additional debug output
61
57
  """
62
58
 
63
59
  if output_path is None:
@@ -68,36 +64,28 @@ def convert_json_to_csv(input_path,
68
64
  return
69
65
 
70
66
  print('Loading json results from {}...'.format(input_path))
71
- json_output = json.load(open(input_path))
72
-
73
- rows = []
67
+ json_output = load_md_or_speciesnet_file(input_path,
68
+ verbose=verbose)
74
69
 
75
- fixed_columns = ['image_path', 'max_confidence', 'detections']
70
+ def clean_category_name(s):
71
+ return s.replace(',','_').replace(' ','_').lower()
76
72
 
77
- # We add an output column for each class other than 'empty',
78
- # containing the maximum probability of that class for each image
79
- # n_non_empty_detection_categories = len(annotation_constants.annotation_bbox_categories) - 1
80
- n_non_empty_detection_categories = annotation_constants.NUM_DETECTOR_CATEGORIES
81
- detection_category_column_names = []
82
- assert annotation_constants.detector_bbox_category_id_to_name[0] == 'empty'
83
- for cat_id in range(1,n_non_empty_detection_categories+1):
84
- cat_name = annotation_constants.detector_bbox_category_id_to_name[cat_id]
85
- detection_category_column_names.append('max_conf_' + cat_name)
73
+ # Create column names for max detection confidences
74
+ detection_category_id_to_max_conf_column_name = {}
75
+ for category_id in json_output['detection_categories'].keys():
76
+ category_name = clean_category_name(json_output['detection_categories'][category_id])
77
+ detection_category_id_to_max_conf_column_name[category_id] = \
78
+ 'max_conf_' + category_name
86
79
 
87
- n_classification_categories = 0
80
+ classification_category_id_to_max_conf_column_name = {}
88
81
 
82
+ # Create column names for max classification confidences (if necessary)
89
83
  if 'classification_categories' in json_output.keys():
90
- classification_category_id_to_name = json_output['classification_categories']
91
- classification_category_ids = list(classification_category_id_to_name.keys())
92
- classification_category_id_to_column_number = {}
93
- classification_category_column_names = []
94
- for i_category,category_id in enumerate(classification_category_ids):
95
- category_name = classification_category_id_to_name[category_id].\
96
- replace(' ','_').replace(',','')
97
- classification_category_column_names.append('max_classification_conf_' + category_name)
98
- classification_category_id_to_column_number[category_id] = i_category
99
-
100
- n_classification_categories = len(classification_category_ids)
84
+
85
+ for category_id in json_output['classification_categories'].keys():
86
+ category_name = clean_category_name(json_output['classification_categories'][category_id])
87
+ classification_category_id_to_max_conf_column_name[category_id] = \
88
+ 'max_classification_conf_' + category_name
101
89
 
102
90
  # There are several .json fields for which we add .csv columns; other random bespoke fields
103
91
  # will be ignored.
@@ -117,26 +105,43 @@ def convert_json_to_csv(input_path,
117
105
  if len(optional_fields_present) > 0:
118
106
  print('Found {} optional fields'.format(len(optional_fields_present)))
119
107
 
120
- expected_row_length = len(fixed_columns) + len(detection_category_column_names) + \
121
- n_classification_categories + len(optional_fields_present)
122
-
123
108
  print('Formatting results...')
124
109
 
110
+ output_records = []
111
+
125
112
  # i_image = 0; im = json_output['images'][i_image]
126
113
  for im in tqdm(json_output['images']):
127
114
 
128
- image_id = im['file']
115
+ output_record = {}
116
+ output_records.append(output_record)
117
+
118
+ output_record['image_path'] = im['file']
119
+ output_record['max_confidence'] = ''
120
+ output_record['detections'] = ''
121
+
122
+ for field_name in optional_fields_present:
123
+ output_record[field_name] = ''
124
+ if field_name in im:
125
+ output_record[field_name] = im[field_name]
126
+
127
+ for detection_category_id in detection_category_id_to_max_conf_column_name:
128
+ column_name = detection_category_id_to_max_conf_column_name[detection_category_id]
129
+ output_record[column_name] = 0
130
+
131
+ for classification_category_id in classification_category_id_to_max_conf_column_name:
132
+ column_name = classification_category_id_to_max_conf_column_name[classification_category_id]
133
+ output_record[column_name] = 0
129
134
 
130
135
  if 'failure' in im and im['failure'] is not None:
131
- row = [image_id, 'failure', im['failure']]
132
- rows.append(row)
136
+ output_record['max_confidence'] = 'failure'
137
+ output_record['detections'] = im['failure']
133
138
  # print('Skipping failed image {} ({})'.format(im['file'],im['failure']))
134
139
  continue
135
140
 
136
141
  max_conf = ct_utils.get_max_conf(im)
142
+ detection_category_id_to_max_conf = defaultdict(float)
143
+ classification_category_id_to_max_conf = defaultdict(float)
137
144
  detections = []
138
- max_detection_category_probabilities = [None] * n_non_empty_detection_categories
139
- max_classification_category_probabilities = [0] * n_classification_categories
140
145
 
141
146
  # d = im['detections'][0]
142
147
  for d in im['detections']:
@@ -155,31 +160,24 @@ def convert_json_to_csv(input_path,
155
160
  xmax = input_bbox[0] + input_bbox[2]
156
161
  ymax = input_bbox[1] + input_bbox[3]
157
162
  output_detection = [ymin, xmin, ymax, xmax]
158
-
159
163
  output_detection.append(d['conf'])
160
-
161
- # Category 0 is empty, for which we don't have a column, so the max
162
- # confidence for category N goes in column N-1
163
- detection_category_id = int(d['category'])
164
- assert detection_category_id > 0 and detection_category_id <= \
165
- n_non_empty_detection_categories
166
- detection_category_column = detection_category_id - 1
167
- detection_category_max = max_detection_category_probabilities[detection_category_column]
168
- if detection_category_max is None or d['conf'] > detection_category_max:
169
- max_detection_category_probabilities[detection_category_column] = d['conf']
170
-
171
- output_detection.append(detection_category_id)
164
+ output_detection.append(int(d['category']))
172
165
  detections.append(output_detection)
173
166
 
167
+ detection_category_id = d['category']
168
+ detection_category_max = detection_category_id_to_max_conf[detection_category_id]
169
+ if d['conf'] > detection_category_max:
170
+ detection_category_id_to_max_conf[detection_category_id] = d['conf']
171
+
174
172
  if 'classifications' in d:
175
- assert n_classification_categories > 0,\
176
- 'Oops, I have classification results, but no classification metadata'
173
+
177
174
  for c in d['classifications']:
178
- category_id = c[0]
179
- p = c[1]
180
- category_index = classification_category_id_to_column_number[category_id]
181
- if (max_classification_category_probabilities[category_index] < p):
182
- max_classification_category_probabilities[category_index] = p
175
+ classification_category_id = c[0]
176
+ classification_conf = c[1]
177
+ classification_category_max = \
178
+ classification_category_id_to_max_conf[classification_category_id]
179
+ if classification_conf > classification_category_max:
180
+ classification_category_id_to_max_conf[classification_category_id] = d['conf']
183
181
 
184
182
  # ...for each classification
185
183
 
@@ -191,40 +189,36 @@ def convert_json_to_csv(input_path,
191
189
  if not omit_bounding_boxes:
192
190
  detection_string = json.dumps(detections)
193
191
 
194
- row = [image_id, max_conf, detection_string]
195
- row.extend(max_detection_category_probabilities)
196
- row.extend(max_classification_category_probabilities)
192
+ output_record['detections'] = detection_string
193
+ output_record['max_confidence'] = max_conf
197
194
 
198
- for field_name in optional_fields_present:
199
- if field_name not in im:
200
- row.append('')
201
- else:
202
- row.append(str(im[field_name]))
195
+ for detection_category_id in detection_category_id_to_max_conf_column_name:
196
+ column_name = detection_category_id_to_max_conf_column_name[detection_category_id]
197
+ output_record[column_name] = \
198
+ detection_category_id_to_max_conf[detection_category_id]
203
199
 
204
- assert len(row) == expected_row_length
205
- rows.append(row)
200
+ for classification_category_id in classification_category_id_to_max_conf_column_name:
201
+ column_name = classification_category_id_to_max_conf_column_name[classification_category_id]
202
+ output_record[column_name] = \
203
+ classification_category_id_to_max_conf[classification_category_id]
206
204
 
207
205
  # ...for each image
208
206
 
209
207
  print('Writing to csv...')
210
208
 
211
- with open(output_path, 'w', newline='', encoding=output_encoding) as f:
212
- writer = csv.writer(f, delimiter=',')
213
- header = fixed_columns
214
- header.extend(detection_category_column_names)
215
- if n_classification_categories > 0:
216
- header.extend(classification_category_column_names)
217
- for field_name in optional_fields_present:
218
- header.append(field_name)
219
- writer.writerow(header)
220
- writer.writerows(rows)
209
+ df = pd.DataFrame(output_records)
210
+
211
+ if omit_bounding_boxes:
212
+ df = df.drop('detections',axis=1)
213
+ df.to_csv(output_path,index=False,header=True)
221
214
 
222
215
  # ...def convert_json_to_csv(...)
223
216
 
224
217
 
225
218
  def convert_csv_to_json(input_path,output_path=None,overwrite=True):
226
219
  """
227
- Convert .csv to .json. If output_path is None, will convert x.csv to x.json.
220
+ Convert .csv to .json. If output_path is None, will convert x.csv to x.json. This
221
+ supports a largely obsolete .csv format, there's almost no reason you want to do this.
228
222
 
229
223
  Args:
230
224
  input_path (str): .csv filename to convert to .json
@@ -83,6 +83,9 @@ class SubsetJsonDetectorOutputOptions:
83
83
  def __init__(self):
84
84
 
85
85
  #: Only process files containing the token 'query'
86
+ #:
87
+ #: Does not support general regexes, but supports ^ as a special case
88
+ #: regex-like notation for "starts with"
86
89
  self.query = None
87
90
 
88
91
  #: Replace 'query' with 'replacement' if 'replacement' is not None. If 'query' is None,
@@ -21,7 +21,7 @@ from megadetector.utils.path_utils import is_image_file
21
21
 
22
22
  #%% Directory enumeration functions
23
23
 
24
- def create_plain_index(root, dirs, files, dirname=None):
24
+ def _create_plain_index(root, dirs, files, dirname=None):
25
25
  """
26
26
  Creates the fairly plain HTML folder index including a preview of a single image file,
27
27
  if any is present.
@@ -40,6 +40,7 @@ def create_plain_index(root, dirs, files, dirname=None):
40
40
 
41
41
  if dirname is None:
42
42
  dirname = root or '/'
43
+ dirname = dirname.replace('\\','/')
43
44
 
44
45
  html = "<!DOCTYPE html>\n"
45
46
  html += "<html lang='en'><head>"
@@ -104,13 +105,14 @@ def create_plain_index(root, dirs, files, dirname=None):
104
105
  html += "</body></html>\n"
105
106
  return html
106
107
 
107
- # ...def create_plain_index(...)
108
+ # ...def _create_plain_index(...)
108
109
 
109
110
 
110
- def traverse_and_create_index(dir,
111
- overwrite_files=False,
112
- template_fun=create_plain_index,
113
- basepath=None):
111
+ def create_html_index(dir,
112
+ overwrite=False,
113
+ template_fun=_create_plain_index,
114
+ basepath=None,
115
+ recursive=True):
114
116
  """
115
117
  Recursively traverses the local directory [dir] and generates a index
116
118
  file for each folder using [template_fun] to generate the HTML output.
@@ -118,12 +120,13 @@ def traverse_and_create_index(dir,
118
120
 
119
121
  Args:
120
122
  dir (str): directory to process
121
- overwrite_files (bool, optional): whether to over-write existing index file
123
+ overwrite (bool, optional): whether to over-write existing index file
122
124
  template_fun (func, optional): function taking three arguments (string,
123
125
  list of string, list of string) representing the current root, the list of folders,
124
126
  and the list of files. Should return the HTML source of the index file.
125
127
  basepath (str, optional): if not None, the name used for each subfolder in [dir]
126
128
  in the output files will be relative to [basepath]
129
+ recursive (bool, optional): recurse into subfolders
127
130
  """
128
131
 
129
132
  print('Traversing {}'.format(dir))
@@ -141,7 +144,7 @@ def traverse_and_create_index(dir,
141
144
  # Output is written to file *root*/index.html
142
145
  output_file = os.path.join(root, "index.html")
143
146
 
144
- if not overwrite_files and os.path.isfile(output_file):
147
+ if (not overwrite) and os.path.isfile(output_file):
145
148
  print('Skipping {}, file exists'.format(output_file))
146
149
  continue
147
150
 
@@ -157,7 +160,10 @@ def traverse_and_create_index(dir,
157
160
  with open(output_file, 'wt') as fi:
158
161
  fi.write(html)
159
162
 
160
- # ...def traverse_and_create_index(...)
163
+ if not recursive:
164
+ break
165
+
166
+ # ...def create_html_index(...)
161
167
 
162
168
 
163
169
  #%% Command-line driver
@@ -171,7 +177,7 @@ def main(): # noqa
171
177
  parser.add_argument("--basepath", type=str,
172
178
  help='Folder names will be printed relative to basepath, if specified',
173
179
  default=None)
174
- parser.add_argument("--enable_overwrite", action='store_true', default=False,
180
+ parser.add_argument("--overwrite", action='store_true', default=False,
175
181
  help='If set, the script will overwrite existing index.html files.')
176
182
 
177
183
  if len(sys.argv[1:]) == 0:
@@ -182,9 +188,9 @@ def main(): # noqa
182
188
 
183
189
  assert os.path.isdir(args.directory), "{} is not a valid directory".format(args.directory)
184
190
 
185
- traverse_and_create_index(args.directory,
186
- overwrite_files=args.enable_overwrite,
187
- basepath=args.basepath)
191
+ create_html_index(args.directory,
192
+ overwrite=args.overwrite,
193
+ basepath=args.basepath)
188
194
 
189
195
  if __name__ == '__main__':
190
196
  main()
@@ -528,7 +528,8 @@ def find_images(dirname,
528
528
  def clean_filename(filename,
529
529
  allow_list=VALID_FILENAME_CHARS,
530
530
  char_limit=CHAR_LIMIT,
531
- force_lower= False):
531
+ force_lower=False,
532
+ remove_trailing_leading_whitespace=True):
532
533
  r"""
533
534
  Removes non-ASCII and other invalid filename characters (on any
534
535
  reasonable OS) from a filename, then optionally trims to a maximum length.
@@ -544,11 +545,27 @@ def clean_filename(filename,
544
545
  char_limit (int, optional): maximum allowable filename length, if None will skip this
545
546
  step
546
547
  force_lower (bool, optional): convert the resulting filename to lowercase
547
-
548
+ remove_trailing_leading_whitespace (bool, optional): remove trailing and
549
+ leading whitespace from each component of a path, e.g. does not allow
550
+ a/b/c /d.jpg
548
551
  Returns:
549
552
  str: cleaned version of [filename]
550
553
  """
551
554
 
555
+ if remove_trailing_leading_whitespace:
556
+
557
+ # Best effort to preserve the original separator
558
+ separator = '/'
559
+ if '\\' in filename:
560
+ separator = '\\'
561
+
562
+ filename = filename.replace('\\','/')
563
+ components = filename.split('/')
564
+ clean_components = [c.strip() for c in components]
565
+ filename = separator.join(clean_components)
566
+ if separator == '\\':
567
+ filename = filename.replace('/','\\')
568
+
552
569
  # keep only valid ascii chars
553
570
  cleaned_filename = (unicodedata.normalize('NFKD', filename)
554
571
  .encode('ASCII', 'ignore').decode())
@@ -565,7 +582,8 @@ def clean_filename(filename,
565
582
  def clean_path(pathname,
566
583
  allow_list=VALID_PATH_CHARS,
567
584
  char_limit=CHAR_LIMIT,
568
- force_lower=False):
585
+ force_lower=False,
586
+ remove_trailing_leading_whitespace=True):
569
587
  """
570
588
  Removes non-ASCII and other invalid path characters (on any reasonable
571
589
  OS) from a path, then optionally trims to a maximum length.
@@ -576,13 +594,20 @@ def clean_path(pathname,
576
594
  char_limit (int, optional): maximum allowable filename length, if None will skip this
577
595
  step
578
596
  force_lower (bool, optional): convert the resulting filename to lowercase
597
+ remove_trailing_leading_whitespace (bool, optional): remove trailing and
598
+ leading whitespace from each component of a path, e.g. does not allow
599
+ a/b/c /d.jpg
579
600
 
580
601
  Returns:
581
602
  str: cleaned version of [filename]
582
603
  """
583
604
 
584
- return clean_filename(pathname, allow_list=allow_list,
585
- char_limit=char_limit, force_lower=force_lower)
605
+ return clean_filename(pathname,
606
+ allow_list=allow_list,
607
+ char_limit=char_limit,
608
+ force_lower=force_lower,
609
+ remove_trailing_leading_whitespace=\
610
+ remove_trailing_leading_whitespace)
586
611
 
587
612
 
588
613
  def flatten_path(pathname,separator_chars=SEPARATOR_CHARS,separator_char_replacement='~'):
@@ -1553,6 +1578,7 @@ class TestPathUtils:
1553
1578
  """
1554
1579
 
1555
1580
  self.test_dir = make_test_folder(subfolder='megadetector/path_utils_tests')
1581
+ print('Using temporary folder {} for path utils testing'.format(self.test_dir))
1556
1582
  os.makedirs(self.test_dir, exist_ok=True)
1557
1583
 
1558
1584
 
@@ -1776,7 +1802,11 @@ class TestPathUtils:
1776
1802
  ])
1777
1803
  folders_non_recursive_abs = folder_list(folder_list_dir, recursive=False,
1778
1804
  return_relative_paths=False)
1779
- assert sorted(folders_non_recursive_abs) == expected_folders_non_recursive_abs
1805
+ assert sorted(folders_non_recursive_abs) == expected_folders_non_recursive_abs, \
1806
+ 'Non-recursive folder list failured, expected:\n\n{}\n\nFound:\n\n{}'.format(
1807
+ str(expected_folders_non_recursive_abs),
1808
+ str(folders_non_recursive_abs)
1809
+ )
1780
1810
 
1781
1811
  # Test non-recursive, relative paths
1782
1812
  expected_folders_non_recursive_rel = sorted(['subdir1', 'subdir2'])
@@ -2114,7 +2144,17 @@ class TestPathUtils:
2114
2144
  assert clean_filename("test*file?.txt", char_limit=10) == "testfile.t"
2115
2145
  assert clean_filename("TestFile.TXT", force_lower=True) == "testfile.txt"
2116
2146
  assert clean_filename("file:with<illegal>chars.txt") == "filewithillegalchars.txt"
2117
- assert clean_filename(" accented_name_éà.txt") == " accented_name_ea.txt"
2147
+
2148
+ s = " accented_name_éà.txt"
2149
+
2150
+ assert clean_filename(s,
2151
+ remove_trailing_leading_whitespace=False) == " accented_name_ea.txt", \
2152
+ 'clean_filename with remove_trailing_leading_whitespace=False: {}'.format(
2153
+ clean_filename(s, remove_trailing_leading_whitespace=False))
2154
+
2155
+ assert clean_filename(s, remove_trailing_leading_whitespace=True) == "accented_name_ea.txt", \
2156
+ 'clean_filename with remove_trailing_leading_whitespace=False: {}'.format(
2157
+ clean_filename(s, remove_trailing_leading_whitespace=True))
2118
2158
 
2119
2159
  # Separators are not allowed by default in clean_filename
2120
2160
  assert clean_filename("path/to/file.txt") == "pathtofile.txt"
@@ -2444,7 +2484,13 @@ class TestPathUtils:
2444
2484
  un_tar_dir = os.path.join(self.test_dir, "un_tar_contents")
2445
2485
  os.makedirs(un_tar_dir, exist_ok=True)
2446
2486
  with tarfile.open(output_tar_path, 'r:gz') as tf:
2447
- tf.extractall(path=un_tar_dir)
2487
+ # The "filter" option was added as of Python 3.12, and *not* specifying
2488
+ # filter=None will change behavior as of Python 3.14. We want the unmodified
2489
+ # behavior, but we want to support Python <3.12, so we do a version check.
2490
+ if sys.version_info >= (3, 12):
2491
+ tf.extractall(path=un_tar_dir, filter=None)
2492
+ else:
2493
+ tf.extractall(path=un_tar_dir)
2448
2494
 
2449
2495
  expected_untarred_file1 = os.path.join(un_tar_dir, os.path.relpath(file1_path, self.test_dir))
2450
2496
  expected_untarred_file2 = os.path.join(un_tar_dir, os.path.relpath(file2_path, self.test_dir))
@@ -2618,7 +2664,9 @@ def test_path_utils():
2618
2664
 
2619
2665
  test_instance = TestPathUtils()
2620
2666
  test_instance.set_up()
2667
+
2621
2668
  try:
2669
+
2622
2670
  test_instance.test_is_image_file()
2623
2671
  test_instance.test_find_image_strings()
2624
2672
  test_instance.test_find_images()
@@ -2643,5 +2691,7 @@ def test_path_utils():
2643
2691
  test_instance.test_add_files_to_single_tar_file()
2644
2692
  test_instance.test_parallel_zip_individual_files_and_folders()
2645
2693
  test_instance.test_compute_file_hash()
2694
+
2646
2695
  finally:
2696
+
2647
2697
  test_instance.tear_down()
@@ -2,7 +2,7 @@
2
2
 
3
3
  url_utils.py
4
4
 
5
- Frequently-used functions for downloading or manipulating URLs
5
+ Frequently-used functions for downloading, manipulating, or serving URLs
6
6
 
7
7
  """
8
8
 
@@ -16,6 +16,9 @@ import urllib.error
16
16
  import requests
17
17
  import shutil
18
18
  import pytest
19
+ import socketserver
20
+ import threading
21
+ import http.server
19
22
 
20
23
  from functools import partial
21
24
  from tqdm import tqdm
@@ -453,6 +456,93 @@ def get_url_sizes(urls,n_workers=1,pool_type='thread',timeout=None,verbose=False
453
456
  return url_to_size
454
457
 
455
458
 
459
+ #%% Singleton HTTP server
460
+
461
+ class QuietHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
462
+ """
463
+ SimpleHTTPRequestHandler sublcass that suppresses console printouts
464
+ """
465
+ def __init__(self, *args, directory=None, **kwargs):
466
+ super().__init__(*args, directory=directory, **kwargs)
467
+
468
+ def log_message(self, format, *args): # noqa
469
+ pass
470
+
471
+
472
+ class SingletonHTTPServer:
473
+ """
474
+ HTTP server that runs on a local port, serving a particular local folder. Runs as a
475
+ singleton, so starting a server in a new folder closes the previous server. I use this
476
+ primarily to serve MD/SpeciesNet previews from manage_local_batch, which can exceed
477
+ the 260-character filename length limitation imposed by browser on Windows, so really the
478
+ point here is just to remove characters from the URL.
479
+ """
480
+
481
+ _server = None
482
+ _thread = None
483
+
484
+ @classmethod
485
+ def start_server(cls, directory, port=8000, host='localhost'):
486
+ """
487
+ Start or restart the HTTP server with a specific directory
488
+
489
+ Args:
490
+ directory (str): the root folder served by the server
491
+ port (int, optional): the port on which to create the server
492
+ host (str, optional): the host on which to listen, typically
493
+ either "localhost" (default) or "0.0.0.0"
494
+
495
+ Returns:
496
+ str: URL to the running host
497
+ """
498
+
499
+ # Stop the existing server instance if necessary
500
+ cls.stop_server()
501
+
502
+ # Create new server
503
+ handler = partial(QuietHTTPRequestHandler, directory=directory)
504
+ cls._server = socketserver.TCPServer((host, port), handler)
505
+
506
+ # Start server in daemon thread (dies when parent process dies)
507
+ cls._thread = threading.Thread(target=cls._server.serve_forever)
508
+ cls._thread.daemon = True
509
+ cls._thread.start()
510
+
511
+ print(f"Serving {directory} at http://{host}:{port}")
512
+ return f"http://{host}:{port}"
513
+
514
+
515
+ @classmethod
516
+ def stop_server(cls):
517
+ """
518
+ Stop the current server (if one is running)
519
+ """
520
+
521
+ if cls._server:
522
+ cls._server.shutdown()
523
+ cls._server.server_close()
524
+ cls._server = None
525
+ if cls._thread:
526
+ cls._thread.join(timeout=1)
527
+ cls._thread = None
528
+
529
+
530
+ @classmethod
531
+ def is_running(cls):
532
+ """
533
+ Check whether the server is currently running.
534
+
535
+ Returns:
536
+ bool: True if the server is running
537
+ """
538
+
539
+ return (cls._server is not None) and \
540
+ (cls._thread is not None) and \
541
+ (cls._thread.is_alive())
542
+
543
+ # ...class SingletonHTTPServer
544
+
545
+
456
546
  #%% Tests
457
547
 
458
548
  # Constants for tests