edgefirst-validator 4.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. deepview/modelpack/utils/argmax.py +16 -0
  2. edgefirst/validator/__init__.py +1 -0
  3. edgefirst/validator/__main__.py +375 -0
  4. edgefirst/validator/datasets/__init__.py +118 -0
  5. edgefirst/validator/datasets/cache.py +296 -0
  6. edgefirst/validator/datasets/core.py +250 -0
  7. edgefirst/validator/datasets/darknet.py +446 -0
  8. edgefirst/validator/datasets/database.py +1067 -0
  9. edgefirst/validator/datasets/instance/__init__.py +4 -0
  10. edgefirst/validator/datasets/instance/core.py +222 -0
  11. edgefirst/validator/datasets/instance/detection.py +145 -0
  12. edgefirst/validator/datasets/instance/multitask.py +80 -0
  13. edgefirst/validator/datasets/instance/segmentation.py +120 -0
  14. edgefirst/validator/datasets/utils/fetch.py +682 -0
  15. edgefirst/validator/datasets/utils/readers.py +425 -0
  16. edgefirst/validator/datasets/utils/transformations.py +1695 -0
  17. edgefirst/validator/evaluators/__init__.py +17 -0
  18. edgefirst/validator/evaluators/callbacks/__init__.py +3 -0
  19. edgefirst/validator/evaluators/callbacks/core.py +192 -0
  20. edgefirst/validator/evaluators/callbacks/plots.py +900 -0
  21. edgefirst/validator/evaluators/callbacks/studio.py +234 -0
  22. edgefirst/validator/evaluators/core.py +257 -0
  23. edgefirst/validator/evaluators/detection.py +749 -0
  24. edgefirst/validator/evaluators/multitask.py +270 -0
  25. edgefirst/validator/evaluators/parameters/__init__.py +53 -0
  26. edgefirst/validator/evaluators/parameters/core.py +554 -0
  27. edgefirst/validator/evaluators/parameters/dataset.py +239 -0
  28. edgefirst/validator/evaluators/parameters/model.py +338 -0
  29. edgefirst/validator/evaluators/parameters/validation.py +528 -0
  30. edgefirst/validator/evaluators/segmentation.py +729 -0
  31. edgefirst/validator/evaluators/utils/__init__.py +3 -0
  32. edgefirst/validator/evaluators/utils/classify.py +292 -0
  33. edgefirst/validator/evaluators/utils/match.py +262 -0
  34. edgefirst/validator/evaluators/utils/timer.py +132 -0
  35. edgefirst/validator/metrics/__init__.py +9 -0
  36. edgefirst/validator/metrics/data/__init__.py +7 -0
  37. edgefirst/validator/metrics/data/label.py +668 -0
  38. edgefirst/validator/metrics/data/metrics.py +759 -0
  39. edgefirst/validator/metrics/data/plots.py +476 -0
  40. edgefirst/validator/metrics/data/stats.py +507 -0
  41. edgefirst/validator/metrics/detection.py +595 -0
  42. edgefirst/validator/metrics/segmentation.py +173 -0
  43. edgefirst/validator/metrics/utils/math.py +717 -0
  44. edgefirst/validator/publishers/__init__.py +3 -0
  45. edgefirst/validator/publishers/console.py +147 -0
  46. edgefirst/validator/publishers/studio.py +128 -0
  47. edgefirst/validator/publishers/tensorboard.py +119 -0
  48. edgefirst/validator/publishers/utils/logger.py +111 -0
  49. edgefirst/validator/publishers/utils/table.py +403 -0
  50. edgefirst/validator/runners/__init__.py +8 -0
  51. edgefirst/validator/runners/core.py +727 -0
  52. edgefirst/validator/runners/deepviewrt.py +177 -0
  53. edgefirst/validator/runners/hailo.py +263 -0
  54. edgefirst/validator/runners/keras.py +150 -0
  55. edgefirst/validator/runners/kinara.py +265 -0
  56. edgefirst/validator/runners/offline.py +228 -0
  57. edgefirst/validator/runners/onnx.py +241 -0
  58. edgefirst/validator/runners/processing/decode.py +320 -0
  59. edgefirst/validator/runners/processing/dvapi.py +4192 -0
  60. edgefirst/validator/runners/processing/nms.py +637 -0
  61. edgefirst/validator/runners/processing/outputs.py +507 -0
  62. edgefirst/validator/runners/tensorrt.py +321 -0
  63. edgefirst/validator/runners/tflite.py +221 -0
  64. edgefirst/validator/validate.py +843 -0
  65. edgefirst/validator/visualize/__init__.py +3 -0
  66. edgefirst/validator/visualize/detection.py +623 -0
  67. edgefirst/validator/visualize/segmentation.py +281 -0
  68. edgefirst/validator/visualize/utils/plots.py +635 -0
  69. edgefirst_validator-4.2.1.dist-info/METADATA +111 -0
  70. edgefirst_validator-4.2.1.dist-info/RECORD +73 -0
  71. edgefirst_validator-4.2.1.dist-info/WHEEL +5 -0
  72. edgefirst_validator-4.2.1.dist-info/entry_points.txt +2 -0
  73. edgefirst_validator-4.2.1.dist-info/top_level.txt +2 -0
@@ -0,0 +1,682 @@
1
+ """
2
+ This module contains functions for fetching dataset artifacts.
3
+ """
4
+
5
+ import os
6
+ import glob
7
+ import zipfile
8
+ from typing import Union
9
+
10
+ from edgefirst.validator.publishers.utils.logger import logger
11
+ from edgefirst.validator.datasets.utils.readers import read_yaml_file
12
+ from edgefirst.validator.datasets.utils.readers import read_labels_file
13
+
14
+
15
+ def get_image_files(
16
+ directory_path: str,
17
+ check_empty: bool = True,
18
+ extensions: list = ['*.[pP][nN][gG]',
19
+ '*.[jJ][pP][gG]', '*.[jJ][pP][eE][gG]']
20
+ ) -> list:
21
+ """
22
+ Gets all the path of the image files within the specified directory.
23
+
24
+ Parameters
25
+ ----------
26
+ directory_path: str
27
+ The path to the directory containing the images.
28
+ check_empty: bool
29
+ If this is true, it will raise an error if there are no images
30
+ found at the path provided.
31
+ extensions: list
32
+ A list of image extensions to search.
33
+
34
+ Returns
35
+ -------
36
+ list
37
+ The list of all image paths found with various extensions.
38
+
39
+ Raises
40
+ ------
41
+ ValueError
42
+ This exception is raised if no images were found in the
43
+ directory.
44
+ """
45
+ images = list()
46
+ for ext in extensions:
47
+ partial = glob.glob(os.path.join(directory_path, ext))
48
+ images += partial
49
+
50
+ if check_empty and len(images) == 0:
51
+ raise ValueError(
52
+ f"There are no images found in {directory_path}"
53
+ )
54
+ return sorted(images)
55
+
56
+
57
+ def contains_annotations(annotations: list) -> bool:
58
+ """
59
+ Checks if the detected annotation files are actual Darknet annotations.
60
+
61
+ Parameters
62
+ ----------
63
+ annnotations: list
64
+ This contains paths of annotations files.
65
+
66
+ Returns
67
+ -------
68
+ bool
69
+ This is true if the annotations are indeed image
70
+ annotations, else it is returned as False.
71
+ """
72
+ non_annotation_files = ["readme.txt", "labels.txt"]
73
+
74
+ if len(annotations) == 0:
75
+ return False
76
+ if len(annotations) == 1:
77
+ # For additional, extranneous non annotation files, add it here.
78
+ for non_annotation in non_annotation_files:
79
+ if non_annotation in [os.path.basename(annotations[0]).lower()]:
80
+ return False
81
+ return True
82
+ elif len(annotations) == 2:
83
+ detected_files = sorted(
84
+ [os.path.basename(annotation).lower()
85
+ for annotation in annotations])
86
+ non_annotation_files = sorted(non_annotation_files)
87
+ # For additional, extranneous non annotation files, add it here.
88
+ return non_annotation_files != detected_files
89
+ else:
90
+ return True
91
+
92
+
93
+ def get_annotation_files(
94
+ directory_path: str,
95
+ check_empty: bool = True
96
+ ) -> list:
97
+ """
98
+ Gets all the path of the annotation files within the specified directory.
99
+
100
+ Parameters
101
+ ----------
102
+ directory_path: str
103
+ The path to the directory containing the text or JSON annotations.
104
+ check_empty: bool
105
+ If this is true, it will raise an error if there
106
+ are no annotations found at the path provided.
107
+
108
+ Returns
109
+ -------
110
+ list
111
+ The list of annotation paths found as either text or JSON files.
112
+
113
+ Raises
114
+ ------
115
+ FileNotFoundError
116
+ Raised if no annotation files were found in the directory.
117
+ """
118
+ annotations = list()
119
+ for ext in ['*.txt', '*.json']:
120
+ annotations = glob.glob(os.path.join(directory_path, ext))
121
+ if contains_annotations(annotations):
122
+ break
123
+ else:
124
+ continue
125
+
126
+ if check_empty and len(annotations) == 0:
127
+ raise FileNotFoundError(
128
+ f"There are no text or JSON files found in {directory_path}"
129
+ )
130
+ return annotations
131
+
132
+
133
+ def get_numpy_files(
134
+ directory_path: str,
135
+ check_empty: bool = True
136
+ ) -> list:
137
+ """
138
+ Gets all the path of the NumPy files within the specified directory.
139
+ These are usually the radar data annotations denoted by (cube.npy).
140
+
141
+ Parameters
142
+ ----------
143
+ directory_path: str
144
+ The path to the directory containing the NumPy files.
145
+ check_empty: bool
146
+ If this is true, it will raise an error if there
147
+ are no NumPy files found at the path provided.
148
+
149
+ Returns
150
+ -------
151
+ list
152
+ The list of NumPy file paths found.
153
+
154
+ Raises
155
+ ------
156
+ FileNotFoundError
157
+ Raised if no NumPy files were found in the directory.
158
+ """
159
+ files = glob.glob(os.path.join(directory_path, "*.cube.npy"))
160
+ if check_empty and len(files) == 0:
161
+ raise FileNotFoundError(
162
+ f"There are no NumPy files found in {directory_path}")
163
+ return files
164
+
165
+
166
+ def get_shape(shape: tuple) -> tuple:
167
+ """
168
+ Returns the (height, width) shape
169
+ of the original image dimensions.
170
+
171
+ Parameters
172
+ ----------
173
+ shape: tuple
174
+ The input shape with batch
175
+ size and channels in any order.
176
+
177
+ Returns
178
+ -------
179
+ tuple
180
+ The (height, width) shape
181
+ of the image dimensions.
182
+ """
183
+ # This will contain (height, width) already.
184
+ if len(shape) == 2:
185
+ return shape
186
+
187
+ # Fetch only (height, width) from the shape.
188
+ # Format channels from YUYV, RGB, RGBA.
189
+ if shape[-1] in [2, 3, 4]:
190
+ # This includes batch size. Format (1, height, width, channels).
191
+ if len(shape) == 4:
192
+ height, width = shape[1:3]
193
+ else:
194
+ height, width = shape[0:2]
195
+ else:
196
+ # This includes batch size. Format (1, channels, height, width).
197
+ if len(shape) == 4:
198
+ height, width = shape[2:4]
199
+ else:
200
+ height, width = shape[1:3]
201
+ return (height, width)
202
+
203
+
204
+ def validate_dataset_source(source: str) -> str:
205
+ """
206
+ Validates the existance of the source path.
207
+
208
+ Parameters
209
+ ----------
210
+ source: str
211
+ The path to the dataset.
212
+
213
+ Returns
214
+ -------
215
+ str
216
+ The validated path to the dataset.
217
+
218
+ Raises
219
+ ------
220
+ ValueError
221
+ Raised if the provided source to the dataset is not a string.
222
+ FileNotFoundError
223
+ Raised if the provided source to the dataset does not exist.
224
+ """
225
+ if not (isinstance(source, str)):
226
+ raise ValueError(
227
+ "The provided path to the dataset is not a string. " +
228
+ "Received type: {}".format(
229
+ type(source)))
230
+
231
+ # Strip for radar datasets, in YAML files containing these characters for
232
+ # their subdirectories.
233
+ if not os.path.exists(source.rstrip("/*/")):
234
+ raise FileNotFoundError(
235
+ "The given dataset path '{}' does not exist.".format(source))
236
+ return source
237
+
238
+
239
+ def find_yaml_file(source: str) -> Union[str, None]:
240
+ """
241
+ Finds YAML files inside a directory. Returns the path to the YAML file
242
+ if it exists, otherwise it returns None.
243
+
244
+ Parameters
245
+ ----------
246
+ source: str
247
+ The path to the directory to start to looking.
248
+
249
+ Returns
250
+ -------
251
+ Union[str, None]
252
+ str
253
+ The path to the YAML file if it exists.
254
+ None
255
+ There are no YAML files found.
256
+ """
257
+ for root, _, files in os.walk(source):
258
+ for file in files:
259
+ if os.path.splitext(file)[1] == ".yaml":
260
+ return os.path.join(root, file)
261
+ return None
262
+
263
+
264
+ def find_labels_file(
265
+ source: str,
266
+ labels_path: str = None,
267
+ labels_file: str = "labels.txt"
268
+ ) -> list:
269
+ """
270
+ Finds and reads the labels file inside the directory if
271
+ the `source` is provided. Otherwise if the `labels_path` is provided,
272
+ it will check if the file exists. The contents of the labels file is
273
+ returned.
274
+
275
+ Parameters
276
+ ----------
277
+ source: str
278
+ The path to the directory to search for `labels.txt`.
279
+ labels_path: str
280
+ The path to the `labels.txt` file if known.
281
+ labels_file: str
282
+ The name of the labels file to search.
283
+
284
+ Returns
285
+ -------
286
+ list
287
+ This is the list of labels that are the
288
+ contents of the labels file. If the label file is not found,
289
+ it will return an empty list.
290
+ """
291
+ labels = []
292
+ # Check if labels.txt is under /dataset_path (source)/labels.txt.
293
+ if os.path.exists(os.path.join(source, labels_file)):
294
+ labels_path = os.path.join(source, labels_file)
295
+ # Check if labels.txt path is explicitly provided.
296
+ elif labels_path is not None:
297
+ labels_path = validate_dataset_source(labels_path)
298
+ # If labels.txt is not found, then search through the dataset.
299
+ else:
300
+ for root, _, files in os.walk(source):
301
+ if labels_file in files:
302
+ labels_path = os.path.join(root, labels_file)
303
+ # Continue validation without the label file.
304
+ if labels_path is None:
305
+ logger("The dataset 'labels.txt' file could not be found.",
306
+ code="WARNING")
307
+
308
+ if labels_path is not None:
309
+ labels = read_labels_file(labels_path)
310
+
311
+ return labels
312
+
313
+
314
+ def create_info(
315
+ image_source: str,
316
+ annotation_source: str,
317
+ type: str = None,
318
+ labels: list = [],
319
+ ) -> dict:
320
+ """
321
+ This creates the info dataset which is a dictionary
322
+ containing the dataset information. This dictionary is formatted
323
+ based on contents of internal Au-Zone formatted dataset YAML files.
324
+
325
+ Parameters
326
+ ----------
327
+ image_source: str
328
+ This is the path to the images.
329
+ annotation_source: str
330
+ This is the path to the annotation files.
331
+ type: str
332
+ This is the type of the dataset ["darknet", "arrow"].
333
+ labels: list
334
+ This contains unique string labels.
335
+
336
+ Returns
337
+ -------
338
+ dict
339
+ The info dataset which contains dataset information.
340
+ """
341
+ info_dataset = dict()
342
+ info_dataset["type"] = type
343
+ info_dataset["classes"] = labels
344
+ info_dataset["validation"] = {
345
+ "images": image_source,
346
+ "annotations": annotation_source
347
+ }
348
+ return info_dataset
349
+
350
+
351
+ def collect_tfrecord_files(
352
+ source: str, labels: list = []) -> Union[dict, None]:
353
+ """
354
+ Searches the source directory provided to gather tfrecord files.
355
+
356
+ Parameters
357
+ ----------
358
+ source: str
359
+ The path to the directory to search for tfrecord files.
360
+ labels: list
361
+ The list of string labels to include in the dataset information.
362
+
363
+ Returns
364
+ -------
365
+ Union[dict, None]
366
+ This includes the path found for the tfrecord files and the labels.
367
+ If no tfecord files were found, then None is returned.
368
+ """
369
+ tfrecord_files = glob.glob(os.path.join(source, "*.tfrecord"))
370
+ if len(tfrecord_files) > 0:
371
+ # There are no polar yaml representations defined yet.
372
+ info_dataset = dict()
373
+ info_dataset["classes"] = labels
374
+ info_dataset["validation"] = {"path": source}
375
+ return info_dataset
376
+ return None
377
+
378
+
379
+ def collect_darknet_files(source: str, labels: list = []) -> Union[dict, None]:
380
+ """
381
+ Searches the source directory provided to gather images and text or JSON
382
+ files from Darknet datasets.
383
+
384
+ Parameters
385
+ ----------
386
+ source: str
387
+ The path to the directory to search for
388
+ images and annotation files.
389
+ labels: list
390
+ The list of string labels to include in the dataset information.
391
+
392
+ Returns
393
+ -------
394
+ Union[dict, None]
395
+ This includes the paths found for the images and the annotation
396
+ files and the labels. If no images were found,
397
+ then None is returned.
398
+ """
399
+ for location in ["", "images/validate", "images/validate/**",
400
+ "images/val", "images/val/**"]:
401
+ image_source = os.path.join(source, location)
402
+ images = get_image_files(image_source, False)
403
+ if len(images) > 0:
404
+ break
405
+
406
+ for location in ["", "labels/validate", "labels/validate/**",
407
+ "labels/val", "labels/val/**"]:
408
+ annotation_source = os.path.join(source, location)
409
+ annotations = get_annotation_files(annotation_source, False)
410
+ if len(annotations) > 0:
411
+ break
412
+
413
+ if len(images) == 0:
414
+ return None
415
+
416
+ return create_info(
417
+ image_source,
418
+ annotation_source,
419
+ "darknet",
420
+ labels
421
+ )
422
+
423
+
424
+ def collect_edgefirst_files(source: str, labels: list = []) -> dict:
425
+ """
426
+ Searches the source directory provided to look for 'dataset.arrow'
427
+ which indicates an edgefirst dataset.
428
+
429
+ Parameters
430
+ ----------
431
+ source: str
432
+ The path to the directory to search for 'dataset.arrow'.
433
+ labels: list
434
+ The list of string labels to include in the dataset information.
435
+
436
+ Returns
437
+ -------
438
+ dict
439
+ This includes the paths found for the images and the 'dataset.arrow'
440
+ file containing annotations and the labels. If the 'dataset.arrow' file
441
+ was not found, then None is returned.
442
+ """
443
+ images_source = source
444
+ annotation_source = os.path.join(source, "dataset.arrow")
445
+
446
+ if os.path.exists(annotation_source):
447
+ return create_info(
448
+ images_source,
449
+ annotation_source,
450
+ "edgefirst",
451
+ labels
452
+ )
453
+ else:
454
+ return None
455
+
456
+
457
+ def download_and_extract(url: str, download_path: str, extract_to: str = None):
458
+ """
459
+ Downloads a ZIP file from a URL and extracts it to a specified location.
460
+
461
+ Parameters
462
+ ----------
463
+ url: str
464
+ URL of the ZIP file to download.
465
+ download_path: str
466
+ Path where the ZIP file will be saved.
467
+ extract_to: str, optional
468
+ Directory where the ZIP contents will be extracted. If not specified,
469
+ uses the directory of `download_path`.
470
+ """
471
+ import requests
472
+ os.makedirs(os.path.dirname(download_path), exist_ok=True)
473
+
474
+ logger(
475
+ f"Downloading dataset from {url} to {download_path}...", code="INFO")
476
+ response = requests.get(url, stream=True)
477
+ response.raise_for_status()
478
+
479
+ with open(download_path, 'wb') as f:
480
+ for chunk in response.iter_content(chunk_size=8192):
481
+ f.write(chunk)
482
+
483
+ logger("Download complete.", code="SUCCESS")
484
+
485
+ extract_path = extract_to or os.path.dirname(download_path)
486
+ logger(f"Extracting to {extract_path}...", code="INFO")
487
+
488
+ with zipfile.ZipFile(download_path, 'r') as zip_ref:
489
+ zip_ref.extractall(extract_path)
490
+
491
+ logger("Extraction complete.", code="SUCCESS")
492
+
493
+
494
+ def download_file(url: str, download_path: str):
495
+ """
496
+ Downloads a file from a URL to the specified path.
497
+
498
+ Parameters
499
+ ----------
500
+ url: str
501
+ URL of the file to download.
502
+ download_path: str
503
+ Path to save the downloaded file.
504
+ """
505
+ import requests
506
+ os.makedirs(os.path.dirname(download_path), exist_ok=True)
507
+
508
+ logger(f"Downloading model from {url} to {download_path}...", code="INFO")
509
+ response = requests.get(url, stream=True)
510
+ response.raise_for_status()
511
+
512
+ with open(download_path, 'wb') as f:
513
+ for chunk in response.iter_content(chunk_size=8192):
514
+ f.write(chunk)
515
+
516
+ logger("Download complete.", code="SUCCESS")
517
+
518
+
519
+ def classify_directory(source: str, labels_path: str = None) -> dict:
520
+ """
521
+ Inspects the source path that points to a directory. Returns the
522
+ info dataset which contains dataset information.
523
+
524
+ Parameters
525
+ ----------
526
+ source: str
527
+ The validated path to the dataset.
528
+ This can point to a YAML file or a directory containing
529
+ tfrecords or images and text annotations.
530
+ labels_path: str
531
+ The path to the labels.txt (optional).
532
+
533
+ Returns
534
+ -------
535
+ dict
536
+ This dictionary contains the paths of the dataset files
537
+ either the tfrecords or the images and the annotation files.
538
+ This dictionary also contains the string labels if it exists.
539
+
540
+ Raises
541
+ ------
542
+ FileNotFoundError
543
+ Raised if the dataset could not be parsed. Might be due
544
+ to missing dataset file.
545
+ """
546
+
547
+ """Handle AuZoneNet and AuZoneTFRecords format."""
548
+ # Check if a dataset yaml file is inside the directory.
549
+ yaml_file = find_yaml_file(source)
550
+ if yaml_file:
551
+ return read_yaml_file(yaml_file)
552
+
553
+ # Find and read the contents of the labels file.
554
+ labels = find_labels_file(source, labels_path)
555
+
556
+ """Handle standard TFRecord datasets."""
557
+ info_dataset = collect_tfrecord_files(source, labels)
558
+ if info_dataset:
559
+ return info_dataset
560
+
561
+ """Handle standard Darknet datasets."""
562
+ info_dataset = collect_darknet_files(source, labels)
563
+ if info_dataset:
564
+ return info_dataset
565
+
566
+ """Handle EdgeFirst datasets"""
567
+ info_dataset = collect_edgefirst_files(source, labels)
568
+ if info_dataset:
569
+ return info_dataset
570
+ else:
571
+ raise FileNotFoundError(
572
+ "The info_dataset returned None. " +
573
+ f"Check if the path provided ({source}) contains " +
574
+ "either tfrecord files or images and annotations files."
575
+ )
576
+
577
+
578
+ def classify_file(source: str) -> dict:
579
+ """
580
+ Inspects the source path that points to a file.
581
+
582
+ Parameters
583
+ ----------
584
+ source: str
585
+ The validated path to the dataset.
586
+ This can point to a YAML file or a directory containing
587
+ tfrecords or images and text annotations.
588
+
589
+ Returns
590
+ -------
591
+ dict
592
+ This dictionary contains the paths of the dataset files
593
+ either the tfrecords or the images and the annotation files.
594
+ This dictionary also contains the string labels if it exists.
595
+
596
+ Raises
597
+ ------
598
+ NotImplementedError
599
+ Reading certain dataset formats are currently not implemented.
600
+ """
601
+ # Darknet dataset YAML file.
602
+ if os.path.splitext(os.path.basename(source))[1] == ".yaml":
603
+ contents = read_yaml_file(source)
604
+
605
+ try:
606
+ images_path = contents.get(
607
+ "dataset").get('validation').get('images')
608
+ if not os.path.isabs(images_path):
609
+ images_path = os.path.join(
610
+ os.path.dirname(source), images_path)
611
+ contents["dataset"]["validation"]["images"] = images_path
612
+
613
+ annotations_path = contents.get(
614
+ "dataset").get('validation').get('annotations')
615
+ if not os.path.isabs(annotations_path):
616
+ annotations_path = os.path.join(os.path.dirname(source),
617
+ annotations_path)
618
+ contents["dataset"]["validation"]["annotations"] = annotations_path
619
+
620
+ except AttributeError:
621
+ images_path = contents.get('validation').get('images')
622
+ if not os.path.isabs(images_path):
623
+ images_path = os.path.join(
624
+ os.path.dirname(source), images_path)
625
+ contents["validation"]["images"] = images_path
626
+
627
+ annotations_path = contents.get('validation').get('annotations')
628
+ if not os.path.isabs(annotations_path):
629
+ annotations_path = os.path.join(os.path.dirname(source),
630
+ annotations_path)
631
+ contents["validation"]["annotations"] = annotations_path
632
+ return contents
633
+ # Dataset cache LMDB file.
634
+ elif os.path.splitext(os.path.basename(source))[1] == ".db":
635
+ return {"type": "lmdb"}
636
+ elif os.path.splitext(os.path.basename(source))[1] == ".txt":
637
+ raise NotImplementedError(
638
+ "Single text file is not currently supported.")
639
+ elif os.path.splitext(source)[1] == ".deepview":
640
+ raise NotImplementedError(
641
+ "DeepView files are not currently supported.")
642
+ else:
643
+ raise NotImplementedError(
644
+ "Parsing dataset '{}' is currently not supported.".format(source))
645
+
646
+
647
+ def classify_dataset(source: str, labels_path: str = None) -> dict:
648
+ """
649
+ Inspects the (.yaml) file contents if it exists.
650
+ Otherwise it will search for either images with text
651
+ annotations (Darknet) or tfrecord files (TFRecord Dataset).
652
+
653
+ Parameters
654
+ ----------
655
+ source: str
656
+ The validated path to the dataset.
657
+ This can point to a YAML file or a directory containing
658
+ tfrecords or images and text annotations.
659
+ labels_path: str
660
+ The path to the labels.txt (optional).
661
+
662
+ Returns
663
+ -------
664
+ dict
665
+ This dictionary contains the paths of the dataset files
666
+ either the tfrecords or the images and the annotation files.
667
+ This dictionary also contains the string labels if it exists.
668
+
669
+ Raises
670
+ ------
671
+ NotImplementedError
672
+ Reading certain dataset formats are currently not implemented.
673
+ """
674
+ source = validate_dataset_source(source)
675
+
676
+ if os.path.isdir(source):
677
+ return classify_directory(source, labels_path)
678
+ elif os.path.isfile(source):
679
+ return classify_file(source)
680
+ else:
681
+ NotImplementedError(
682
+ "Parsing dataset '{}' is currently not supported.".format(source))