megadetector 10.0.10__py3-none-any.whl → 10.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megadetector might be problematic. Click here for more details.

Files changed (80) hide show
  1. megadetector/data_management/animl_to_md.py +5 -2
  2. megadetector/data_management/cct_json_utils.py +4 -2
  3. megadetector/data_management/cct_to_md.py +5 -4
  4. megadetector/data_management/cct_to_wi.py +5 -1
  5. megadetector/data_management/coco_to_yolo.py +3 -2
  6. megadetector/data_management/databases/combine_coco_camera_traps_files.py +4 -4
  7. megadetector/data_management/databases/integrity_check_json_db.py +2 -2
  8. megadetector/data_management/databases/subset_json_db.py +0 -3
  9. megadetector/data_management/generate_crops_from_cct.py +6 -4
  10. megadetector/data_management/get_image_sizes.py +5 -35
  11. megadetector/data_management/labelme_to_coco.py +10 -6
  12. megadetector/data_management/labelme_to_yolo.py +19 -28
  13. megadetector/data_management/lila/create_lila_test_set.py +22 -2
  14. megadetector/data_management/lila/generate_lila_per_image_labels.py +7 -5
  15. megadetector/data_management/lila/lila_common.py +2 -2
  16. megadetector/data_management/lila/test_lila_metadata_urls.py +0 -1
  17. megadetector/data_management/ocr_tools.py +6 -10
  18. megadetector/data_management/read_exif.py +59 -16
  19. megadetector/data_management/remap_coco_categories.py +1 -1
  20. megadetector/data_management/remove_exif.py +10 -5
  21. megadetector/data_management/rename_images.py +20 -13
  22. megadetector/data_management/resize_coco_dataset.py +10 -4
  23. megadetector/data_management/speciesnet_to_md.py +3 -3
  24. megadetector/data_management/yolo_output_to_md_output.py +3 -1
  25. megadetector/data_management/yolo_to_coco.py +28 -19
  26. megadetector/detection/change_detection.py +26 -18
  27. megadetector/detection/process_video.py +1 -1
  28. megadetector/detection/pytorch_detector.py +5 -5
  29. megadetector/detection/run_detector.py +34 -10
  30. megadetector/detection/run_detector_batch.py +2 -1
  31. megadetector/detection/run_inference_with_yolov5_val.py +3 -1
  32. megadetector/detection/run_md_and_speciesnet.py +215 -101
  33. megadetector/detection/run_tiled_inference.py +7 -7
  34. megadetector/detection/tf_detector.py +1 -1
  35. megadetector/detection/video_utils.py +9 -6
  36. megadetector/postprocessing/add_max_conf.py +4 -4
  37. megadetector/postprocessing/categorize_detections_by_size.py +3 -2
  38. megadetector/postprocessing/classification_postprocessing.py +7 -8
  39. megadetector/postprocessing/combine_batch_outputs.py +3 -2
  40. megadetector/postprocessing/compare_batch_results.py +49 -27
  41. megadetector/postprocessing/convert_output_format.py +8 -6
  42. megadetector/postprocessing/create_crop_folder.py +13 -4
  43. megadetector/postprocessing/generate_csv_report.py +22 -8
  44. megadetector/postprocessing/load_api_results.py +8 -4
  45. megadetector/postprocessing/md_to_coco.py +2 -3
  46. megadetector/postprocessing/md_to_labelme.py +12 -8
  47. megadetector/postprocessing/md_to_wi.py +2 -1
  48. megadetector/postprocessing/merge_detections.py +4 -6
  49. megadetector/postprocessing/postprocess_batch_results.py +4 -3
  50. megadetector/postprocessing/remap_detection_categories.py +6 -3
  51. megadetector/postprocessing/render_detection_confusion_matrix.py +18 -10
  52. megadetector/postprocessing/repeat_detection_elimination/remove_repeat_detections.py +1 -1
  53. megadetector/postprocessing/repeat_detection_elimination/repeat_detections_core.py +5 -3
  54. megadetector/postprocessing/separate_detections_into_folders.py +10 -4
  55. megadetector/postprocessing/subset_json_detector_output.py +1 -1
  56. megadetector/postprocessing/top_folders_to_bottom.py +22 -7
  57. megadetector/postprocessing/validate_batch_results.py +1 -1
  58. megadetector/taxonomy_mapping/map_new_lila_datasets.py +59 -3
  59. megadetector/taxonomy_mapping/prepare_lila_taxonomy_release.py +1 -1
  60. megadetector/taxonomy_mapping/preview_lila_taxonomy.py +26 -17
  61. megadetector/taxonomy_mapping/species_lookup.py +51 -2
  62. megadetector/utils/ct_utils.py +9 -4
  63. megadetector/utils/extract_frames_from_video.py +4 -0
  64. megadetector/utils/gpu_test.py +6 -6
  65. megadetector/utils/md_tests.py +21 -21
  66. megadetector/utils/path_utils.py +112 -44
  67. megadetector/utils/split_locations_into_train_val.py +0 -4
  68. megadetector/utils/url_utils.py +5 -3
  69. megadetector/utils/wi_taxonomy_utils.py +37 -8
  70. megadetector/utils/write_html_image_list.py +1 -2
  71. megadetector/visualization/plot_utils.py +31 -19
  72. megadetector/visualization/render_images_with_thumbnails.py +3 -0
  73. megadetector/visualization/visualization_utils.py +18 -7
  74. megadetector/visualization/visualize_db.py +9 -26
  75. megadetector/visualization/visualize_video_output.py +14 -2
  76. {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/METADATA +1 -1
  77. {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/RECORD +80 -80
  78. {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/WHEEL +0 -0
  79. {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/licenses/LICENSE +0 -0
  80. {megadetector-10.0.10.dist-info → megadetector-10.0.11.dist-info}/top_level.txt +0 -0
@@ -67,8 +67,11 @@ def animl_results_to_md_results(input_file,output_file=None):
67
67
  im['file'] = row['file']
68
68
  filename_to_results[im['file']] = im
69
69
 
70
- assert isinstance(row['category'],int),'Invalid category identifier in row {}'.format(im['file'])
71
- detection_category_id = str(row['category'])
70
+ # Pandas often reads integer columns as float64, so check integer-ness
71
+ # rather than just isinstance(..., int)
72
+ assert pd.notna(row['category']) and float(row['category']).is_integer(), \
73
+ 'Invalid category identifier in row {} (file: {})'.format(i_row, im['file'])
74
+ detection_category_id = str(int(row['category']))
72
75
  assert detection_category_id in detection_category_id_to_name,\
73
76
  'Unrecognized detection category ID {}'.format(detection_category_id)
74
77
 
@@ -231,7 +231,6 @@ class IndexedJsonDb:
231
231
 
232
232
  # Image ID --> annotations
233
233
  # Each image can potentially multiple annotations, hence using lists
234
- self.image_id_to_annotations = {}
235
234
  self.image_id_to_annotations = defaultdict(list)
236
235
  for ann in self.db['annotations']:
237
236
  self.image_id_to_annotations[ann['image_id']].append(ann)
@@ -355,7 +354,9 @@ def parse_datetimes_from_cct_image_list(images,
355
354
 
356
355
  assert isinstance(images,list)
357
356
 
358
- for im in images:
357
+ print('Parsing datetimes from CCT image list...')
358
+
359
+ for im in tqdm(images):
359
360
 
360
361
  if 'datetime' not in im:
361
362
  continue
@@ -445,6 +446,7 @@ def create_sequences(image_info,options=None):
445
446
  to_return = image_info
446
447
 
447
448
  elif isinstance(image_info,str):
449
+ print('Reading image information from {}'.format(image_info))
448
450
  with open(image_info,'r') as f:
449
451
  d = json.load(f)
450
452
  to_return = d
@@ -64,7 +64,7 @@ def cct_to_md(input_filename,output_filename=None):
64
64
  d = json.load(f)
65
65
 
66
66
  for s in ['annotations','images','categories']:
67
- assert s in d.keys(), 'Cannot find category {} in input file, is this a CCT file?'.format(s)
67
+ assert s in d.keys(), 'Cannot find key {} in input file, is this a CCT file?'.format(s)
68
68
 
69
69
 
70
70
  ## Prepare metadata
@@ -149,10 +149,11 @@ def cct_to_md(input_filename,output_filename=None):
149
149
 
150
150
  results['images'] = images_out
151
151
 
152
- with open(output_filename,'w') as f:
153
- json.dump(results, f, indent=1)
152
+ if output_filename is not None:
153
+ with open(output_filename,'w') as f:
154
+ json.dump(results, f, indent=1)
154
155
 
155
- return output_filename
156
+ return results
156
157
 
157
158
  # ...cct_to_md()
158
159
 
@@ -263,8 +263,10 @@ def main(): # noqa
263
263
  else:
264
264
  row['number_of_objects'] = 1
265
265
 
266
+ assert isinstance(im['datetime'],str)
267
+
266
268
  row['uncertainty'] = None
267
- row['timestamp'] = im['datetime']; assert isinstance(im['datetime'],str)
269
+ row['timestamp'] = im['datetime']
268
270
  row['highlighted'] = 0
269
271
  row['age'] = None
270
272
  row['sex'] = None
@@ -276,6 +278,8 @@ def main(): # noqa
276
278
  assert len(row) == len(images_fields)
277
279
  rows.append(row)
278
280
 
281
+ # ...for each image
282
+
279
283
  df = pd.DataFrame(rows)
280
284
 
281
285
  df.to_csv(os.path.join(output_base,images_file_name),index=False)
@@ -81,7 +81,7 @@ def write_yolo_dataset_file(yolo_dataset_file,
81
81
  if val_folder_relative is not None:
82
82
  f.write('val: {}\n'.format(val_folder_relative))
83
83
  if test_folder_relative is not None:
84
- f.write('val: {}\n'.format(test_folder_relative))
84
+ f.write('test: {}\n'.format(test_folder_relative))
85
85
 
86
86
  f.write('\n')
87
87
 
@@ -454,13 +454,14 @@ def coco_to_yolo(input_image_folder,
454
454
  # Category IDs should range from 0..N-1
455
455
  assert i_class in yolo_id_to_name
456
456
  f.write(yolo_id_to_name[i_class] + '\n')
457
+ else:
458
+ class_list_filename = None
457
459
 
458
460
  if image_id_to_output_image_json_file is not None:
459
461
  print('Writing image ID mapping to {}'.format(image_id_to_output_image_json_file))
460
462
  with open(image_id_to_output_image_json_file,'w') as f:
461
463
  json.dump(image_id_to_output_image_name,f,indent=1)
462
464
 
463
-
464
465
  if (output_folder == input_image_folder) and (not create_image_and_label_folders):
465
466
  print('Creating annotation files (not copying images, input and output folder are the same)')
466
467
  else:
@@ -130,7 +130,7 @@ def combine_cct_dictionaries(input_dicts, require_uniqueness=True):
130
130
  for im in input_dict['images']:
131
131
 
132
132
  if 'seq_id' in im:
133
- im['seq_id'] = index_string + im['seq_id']
133
+ im['seq_id'] = index_string + str(im['seq_id'])
134
134
  if 'location' in im:
135
135
  im['location'] = index_string + im['location']
136
136
 
@@ -143,7 +143,7 @@ def combine_cct_dictionaries(input_dicts, require_uniqueness=True):
143
143
  print('Redundant image {}'.format(im_file))
144
144
 
145
145
  # Create a unique ID
146
- im['id'] = index_string + im['id']
146
+ im['id'] = index_string + str(im['id'])
147
147
  filename_to_image[im_file] = im
148
148
 
149
149
  # ...for each image
@@ -152,8 +152,8 @@ def combine_cct_dictionaries(input_dicts, require_uniqueness=True):
152
152
  # Same for annotations
153
153
  for ann in input_dict['annotations']:
154
154
 
155
- ann['image_id'] = index_string + ann['image_id']
156
- ann['id'] = index_string + ann['id']
155
+ ann['image_id'] = index_string + str(ann['image_id'])
156
+ ann['id'] = index_string + str(ann['id'])
157
157
  assert ann['category_id'] in old_cat_id_to_new_cat_id
158
158
  ann['category_id'] = old_cat_id_to_new_cat_id[ann['category_id']]
159
159
 
@@ -111,11 +111,11 @@ def _check_image_existence_and_size(image,options=None):
111
111
  # width, height = Image.open(file_path).size
112
112
  try:
113
113
  pil_im = open_image(file_path)
114
+ width,height = pil_im.size
115
+ pil_im.close()
114
116
  except Exception as e:
115
117
  s = 'Error opening {}: {}'.format(file_path,str(e))
116
118
  return s
117
-
118
- width,height = pil_im.size
119
119
  if (not (width == image['width'] and height == image['height'])):
120
120
  s = 'Size mismatch for image {}: {} (reported {},{}, actual {},{})'.format(
121
121
  image['id'], file_path, image['width'], image['height'], width, height)
@@ -12,7 +12,6 @@ subset_json_detector_output.py.
12
12
 
13
13
  #%% Constants and imports
14
14
 
15
- import os
16
15
  import sys
17
16
  import json
18
17
  import argparse
@@ -151,8 +150,6 @@ def subset_json_db(input_json,
151
150
  if output_json is not None:
152
151
  if verbose:
153
152
  print('Writing output .json to {}'.format(output_json))
154
- output_dir = os.path.dirname(output_json)
155
- os.makedirs(output_dir,exist_ok=True)
156
153
  ct_utils.write_json(output_json, output_data)
157
154
 
158
155
  if verbose:
@@ -72,7 +72,7 @@ def generate_crops_from_cct(cct_file,image_dir,output_dir,padding=0,flat_output=
72
72
  # im = d['images'][0]
73
73
  for im in tqdm(d['images']):
74
74
 
75
- input_image_fn = os.path.join(os.path.join(image_dir,im['file_name']))
75
+ input_image_fn = os.path.join(image_dir,im['file_name'])
76
76
  assert os.path.isfile(input_image_fn), 'Could not find image {}'.format(input_image_fn)
77
77
 
78
78
  if im['id'] not in image_id_to_boxes:
@@ -102,15 +102,17 @@ def generate_crops_from_cct(cct_file,image_dir,output_dir,padding=0,flat_output=
102
102
 
103
103
  xmin = max(xmin,0)
104
104
  ymin = max(ymin,0)
105
- xmax = min(xmax,img.width-1)
106
- ymax = min(ymax,img.height-1)
105
+ # PIL's crop() method uses exclusive upper bounds for the right and lower
106
+ # edges, hence "img.width" rather than "img.width-1" here.
107
+ xmax = min(xmax,img.width)
108
+ ymax = min(ymax,img.height)
107
109
 
108
110
  crop = img.crop(box=[xmin, ymin, xmax, ymax])
109
111
 
110
112
  output_fn = os.path.splitext(im['file_name'])[0].replace('\\','/')
111
113
  if flat_output:
112
114
  output_fn = output_fn.replace('/','_')
113
- output_fn = output_fn + '_crop' + str(i_ann).zfill(3) + '_id_' + ann['id']
115
+ output_fn = output_fn + '_crop' + str(i_ann).zfill(3) + '_id_' + str(ann['id'])
114
116
  output_fn = output_fn + '.jpg'
115
117
 
116
118
  output_full_path = os.path.join(output_dir,output_fn)
@@ -75,7 +75,7 @@ def get_image_sizes(filenames,image_prefix=None,output_file=None,
75
75
  image_prefix (str, optional): optional prefix to add to images to get to full paths;
76
76
  useful when [filenames] contains relative files, in which case [image_prefix] is the
77
77
  base folder for the source images.
78
- output_file (str, optional): a .json file to write the imgae sizes
78
+ output_file (str, optional): a .json file to write the image sizes
79
79
  n_workers (int, optional): number of parallel workers to use, set to <=1 to
80
80
  disable parallelization
81
81
  use_threads (bool, optional): whether to use threads (True) or processes (False)
@@ -88,8 +88,10 @@ def get_image_sizes(filenames,image_prefix=None,output_file=None,
88
88
  """
89
89
 
90
90
  if output_file is not None:
91
- assert os.path.isdir(os.path.dirname(output_file)), \
92
- 'Illegal output file {}, parent folder does not exist'.format(output_file)
91
+ output_dir = os.path.dirname(output_file)
92
+ if len(output_dir) > 0:
93
+ assert os.path.isdir(output_dir), \
94
+ 'Illegal output file {}, parent folder does not exist'.format(output_file)
93
95
 
94
96
  if isinstance(filenames,str) and os.path.isfile(filenames):
95
97
  with open(filenames,'r') as f:
@@ -130,38 +132,6 @@ def get_image_sizes(filenames,image_prefix=None,output_file=None,
130
132
  return all_results
131
133
 
132
134
 
133
- #%% Interactive driver
134
-
135
- if False:
136
-
137
- pass
138
-
139
- #%%
140
-
141
- # List images in a test folder
142
- base_dir = r'c:\temp\test_images'
143
- image_list_file = os.path.join(base_dir,'images.json')
144
- relative_image_list_file = os.path.join(base_dir,'images_relative.json')
145
- image_size_file = os.path.join(base_dir,'image_sizes.json')
146
- from megadetector.utils import path_utils
147
- image_names = path_utils.find_images(base_dir,recursive=True)
148
-
149
- with open(image_list_file,'w') as f:
150
- json.dump(image_names,f,indent=1)
151
-
152
- relative_image_names = []
153
- for s in image_names:
154
- relative_image_names.append(os.path.relpath(s,base_dir))
155
-
156
- with open(relative_image_list_file,'w') as f:
157
- json.dump(relative_image_names,f,indent=1)
158
-
159
-
160
- #%%
161
-
162
- get_image_sizes(relative_image_list_file,image_size_file,image_prefix=base_dir,n_threads=4)
163
-
164
-
165
135
  #%% Command-line driver
166
136
 
167
137
  def main(): # noqa
@@ -292,7 +292,8 @@ def labelme_to_coco(input_folder,
292
292
 
293
293
  # Enumerate images
294
294
  print('Enumerating images in {}'.format(input_folder))
295
- image_filenames_relative = path_utils.find_images(input_folder,recursive=recursive,
295
+ image_filenames_relative = path_utils.find_images(input_folder,
296
+ recursive=recursive,
296
297
  return_relative_paths=True,
297
298
  convert_slashes=True)
298
299
 
@@ -352,9 +353,10 @@ def labelme_to_coco(input_folder,
352
353
  allow_new_categories=False
353
354
  ),image_filenames_relative), total=len(image_filenames_relative)))
354
355
  finally:
355
- pool.close()
356
- pool.join()
357
- print("Pool closed and joined for labelme file processing")
356
+ if pool is not None:
357
+ pool.close()
358
+ pool.join()
359
+ print("Pool closed and joined for labelme file processing")
358
360
 
359
361
  images = []
360
362
  annotations = []
@@ -423,7 +425,9 @@ def find_empty_labelme_files(input_folder,recursive=True):
423
425
  - images_with_non_empty_json_files: a list of images in [input_folder] associated with .json
424
426
  files that have at least one box
425
427
  """
426
- image_filenames_relative = path_utils.find_images(input_folder,recursive=True,
428
+
429
+ image_filenames_relative = path_utils.find_images(input_folder,
430
+ recursive=recursive,
427
431
  return_relative_paths=True)
428
432
 
429
433
  images_with_empty_json_files = []
@@ -500,7 +504,7 @@ if False:
500
504
  options.bFindUnusedImages = True
501
505
  options.bRequireLocation = False
502
506
 
503
- sortec_categories, _, error_info = integrity_check_json_db.integrity_check_json_db(output_file,options)
507
+ sorted_categories, _, error_info = integrity_check_json_db.integrity_check_json_db(output_file,options)
504
508
 
505
509
 
506
510
  #%% Preview
@@ -107,10 +107,21 @@ def labelme_file_to_yolo_file(labelme_file,
107
107
  minx_abs = max(minx_abs,0.0)
108
108
  miny_abs = max(miny_abs,0.0)
109
109
 
110
- minx_rel = minx_abs / (im_width-1)
111
- maxx_rel = maxx_abs / (im_width-1)
112
- miny_rel = miny_abs / (im_height-1)
113
- maxy_rel = maxy_abs / (im_height-1)
110
+ # Handle degenerate cases where image is one pixel wide
111
+ if im_width == 1:
112
+ minx_rel = 0.0
113
+ maxx_rel = 0.0
114
+ else:
115
+ minx_rel = minx_abs / (im_width-1)
116
+ maxx_rel = maxx_abs / (im_width-1)
117
+
118
+ # Handle degenerate cases where image is one pixel tall
119
+ if im_height == 1:
120
+ miny_rel = 0.0
121
+ maxy_rel = 0.0
122
+ else:
123
+ miny_rel = miny_abs / (im_height-1)
124
+ maxy_rel = maxy_abs / (im_height-1)
114
125
 
115
126
  assert maxx_rel >= minx_rel
116
127
  assert maxy_rel >= miny_rel
@@ -252,9 +263,10 @@ def labelme_folder_to_yolo(labelme_folder,
252
263
  valid_labelme_files_abs),
253
264
  total=len(valid_labelme_files_abs)))
254
265
  finally:
255
- pool.close()
256
- pool.join()
257
- print('Pool closed and joined for labelme conversion to YOLO')
266
+ if pool is not None:
267
+ pool.close()
268
+ pool.join()
269
+ print('Pool closed and joined for labelme conversion to YOLO')
258
270
 
259
271
  assert len(valid_labelme_files_relative) == len(image_results)
260
272
 
@@ -270,27 +282,6 @@ def labelme_folder_to_yolo(labelme_folder,
270
282
  # ...def labelme_folder_to_yolo(...)
271
283
 
272
284
 
273
- #%% Interactive driver
274
-
275
- if False:
276
-
277
- pass
278
-
279
- #%%
280
-
281
- labelme_file = os.path.expanduser('~/tmp/labels/x.json')
282
- required_token = 'saved_by_labelme'
283
- category_name_to_category_id = {'animal':0}
284
- labelme_folder = os.path.expanduser('~/tmp/labels')
285
-
286
- #%%
287
-
288
- category_name_to_category_id = \
289
- labelme_folder_to_yolo(labelme_folder,
290
- category_name_to_category_id=category_name_to_category_id,
291
- required_token=required_token,
292
- overwrite_behavior='overwrite')
293
-
294
285
  #%% Command-line driver
295
286
 
296
287
  def main():
@@ -16,6 +16,7 @@ import random
16
16
  from megadetector.data_management.lila.lila_common import \
17
17
  read_lila_metadata, read_metadata_file_for_dataset
18
18
  from megadetector.utils.url_utils import parallel_download_urls
19
+ from megadetector.utils.path_utils import open_file
19
20
 
20
21
  n_empty_images_per_dataset = 1
21
22
  n_non_empty_images_per_dataset = 1
@@ -50,6 +51,8 @@ for ds_name in metadata_table.keys():
50
51
 
51
52
  # Takes ~60 seconds
52
53
 
54
+ empty_category_names = ['empty','blank']
55
+
53
56
  # ds_name = (list(metadata_table.keys()))[0]
54
57
  for ds_name in metadata_table.keys():
55
58
 
@@ -65,10 +68,22 @@ for ds_name in metadata_table.keys():
65
68
 
66
69
  ## Find empty images
67
70
 
68
- if 'empty' not in category_name_to_id:
71
+ empty_category_present = False
72
+ for category_name in category_name_to_id:
73
+ if category_name in empty_category_names:
74
+ empty_category_present = True
75
+ break
76
+ if not empty_category_present:
69
77
  empty_annotations_to_download = []
70
78
  else:
71
- empty_category_id = category_name_to_id['empty']
79
+ empty_category_id = None
80
+ for category_name in empty_category_names:
81
+ if category_name in category_name_to_id:
82
+ if empty_category_id is not None:
83
+ print('Warning: multiple empty categories in dataset {}'.format(ds_name))
84
+ else:
85
+ empty_category_id = category_name_to_id[category_name]
86
+ assert empty_category_id is not None
72
87
  empty_annotations = [ann for ann in d['annotations'] if ann['category_id'] == empty_category_id]
73
88
  try:
74
89
  empty_annotations_to_download = random.sample(empty_annotations,n_empty_images_per_dataset)
@@ -165,3 +180,8 @@ download_results = parallel_download_urls(url_to_target_file,
165
180
  # r = download_results[0]
166
181
  for r in download_results:
167
182
  assert r['status'] in ('skipped','success')
183
+
184
+
185
+ #%% Open the test test
186
+
187
+ open_file(output_dir)
@@ -21,7 +21,7 @@ import os
21
21
  import json
22
22
  import pandas as pd
23
23
  import numpy as np
24
- import dateparser
24
+ import dateparser # type: ignore
25
25
  import csv
26
26
 
27
27
  from collections import defaultdict
@@ -148,7 +148,6 @@ with open(output_file,'w',encoding='utf-8',newline='') as f:
148
148
  data = json.load(f)
149
149
 
150
150
  categories = data['categories']
151
- category_ids = [c['id'] for c in categories]
152
151
  for c in categories:
153
152
  category_id_to_name = {c['id']:c['name'] for c in categories}
154
153
 
@@ -355,7 +354,7 @@ print('\nProcessed {} datasets'.format(len(metadata_table)))
355
354
 
356
355
  #%% Read the .csv back
357
356
 
358
- df = pd.read_csv(output_file)
357
+ df = pd.read_csv(output_file, low_memory=False)
359
358
  print('Read {} rows from {}'.format(len(df),output_file))
360
359
 
361
360
 
@@ -426,6 +425,8 @@ os.makedirs(preview_folder,exist_ok=True)
426
425
 
427
426
  #%% Choose images to download
428
427
 
428
+ # Takes ~60 seconds
429
+
429
430
  np.random.seed(0)
430
431
  images_to_download = []
431
432
 
@@ -533,7 +534,7 @@ zipped_output_file = zip_file(output_file,verbose=True,overwrite=True)
533
534
  print('Zipped {} to {}'.format(output_file,zipped_output_file))
534
535
 
535
536
 
536
- #%% Convert to .json
537
+ #%% Experimental: convert to .json
537
538
 
538
539
  """
539
540
  The .csv file "output_file" (already loaded into the variable "df" at this point) has the following columns:
@@ -733,7 +734,8 @@ with open(output_file, 'r', encoding='utf-8') as csvfile:
733
734
 
734
735
  common_name = _clearnan(row['common_name'])
735
736
 
736
- frame_num = int(row['frame_num'])
737
+ # Convert to float first in case this appears in the .csv file as, e.g. "3.0"
738
+ frame_num = int(float(row['frame_num']))
737
739
 
738
740
  # Image data
739
741
  image_entry = {
@@ -65,7 +65,7 @@ def read_wildlife_insights_taxonomy_mapping(metadata_dir, force_download=False):
65
65
 
66
66
  wi_taxonomy_csv_path = os.path.join(metadata_dir,wildlife_insights_taxonomy_local_csv_filename)
67
67
 
68
- if os.path.exists(wi_taxonomy_csv_path):
68
+ if os.path.exists(wi_taxonomy_csv_path) and (not force_download):
69
69
  df = pd.read_csv(wi_taxonomy_csv_path)
70
70
  else:
71
71
  wi_taxonomy_json_path = os.path.join(metadata_dir,wildlife_insights_taxonomy_local_json_filename)
@@ -114,7 +114,7 @@ def read_lila_taxonomy_mapping(metadata_dir, force_download=False):
114
114
  download_url(lila_taxonomy_mapping_url, taxonomy_filename,
115
115
  force_download=force_download)
116
116
 
117
- df = pd.read_csv(lila_taxonomy_mapping_url)
117
+ df = pd.read_csv(taxonomy_filename)
118
118
 
119
119
  return df
120
120
 
@@ -162,4 +162,3 @@ for i_url,url in enumerate(urls_to_test):
162
162
  status_codes[i_url],url,url_to_source[url]))
163
163
 
164
164
  print('Tested {} URLs'.format(len(urls_to_test)))
165
-
@@ -271,11 +271,6 @@ def crop_to_solid_region(rough_crop,crop_location,options=None):
271
271
  w = max_x-min_x
272
272
  h = max_y-min_y
273
273
 
274
- x = min_x
275
- y = min_y
276
- w = max_x-min_x
277
- h = max_y-min_y
278
-
279
274
  # Crop the image
280
275
  crop_np = rough_crop_np[y:y+h,x:x+w]
281
276
 
@@ -650,9 +645,10 @@ def get_datetimes_for_folder(folder_name,output_file=None,n_to_sample=-1,options
650
645
  partial(try_get_datetime_from_image,options=options),image_file_names),
651
646
  total=len(image_file_names)))
652
647
  finally:
653
- pool.close()
654
- pool.join()
655
- print("Pool closed and joined for datetime extraction")
648
+ if pool is not None:
649
+ pool.close()
650
+ pool.join()
651
+ print("Pool closed and joined for datetime extraction")
656
652
 
657
653
  filename_to_results = {}
658
654
 
@@ -728,8 +724,8 @@ if False:
728
724
 
729
725
  if 'text_results' not in results:
730
726
  raise Exception('no results available for {} ({})'.format(i_fn,fn))
731
- print('Skipping {}, no results'.format(i_fn))
732
- continue
727
+ # print('Skipping {}, no results'.format(i_fn))
728
+ # continue
733
729
 
734
730
  s = ' '.join([x[0] for x in results['text_results']])
735
731