edgefirst-validator 4.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepview/modelpack/utils/argmax.py +16 -0
- edgefirst/validator/__init__.py +1 -0
- edgefirst/validator/__main__.py +375 -0
- edgefirst/validator/datasets/__init__.py +118 -0
- edgefirst/validator/datasets/cache.py +296 -0
- edgefirst/validator/datasets/core.py +250 -0
- edgefirst/validator/datasets/darknet.py +446 -0
- edgefirst/validator/datasets/database.py +1067 -0
- edgefirst/validator/datasets/instance/__init__.py +4 -0
- edgefirst/validator/datasets/instance/core.py +222 -0
- edgefirst/validator/datasets/instance/detection.py +145 -0
- edgefirst/validator/datasets/instance/multitask.py +80 -0
- edgefirst/validator/datasets/instance/segmentation.py +120 -0
- edgefirst/validator/datasets/utils/fetch.py +682 -0
- edgefirst/validator/datasets/utils/readers.py +425 -0
- edgefirst/validator/datasets/utils/transformations.py +1695 -0
- edgefirst/validator/evaluators/__init__.py +17 -0
- edgefirst/validator/evaluators/callbacks/__init__.py +3 -0
- edgefirst/validator/evaluators/callbacks/core.py +192 -0
- edgefirst/validator/evaluators/callbacks/plots.py +900 -0
- edgefirst/validator/evaluators/callbacks/studio.py +234 -0
- edgefirst/validator/evaluators/core.py +257 -0
- edgefirst/validator/evaluators/detection.py +749 -0
- edgefirst/validator/evaluators/multitask.py +270 -0
- edgefirst/validator/evaluators/parameters/__init__.py +53 -0
- edgefirst/validator/evaluators/parameters/core.py +554 -0
- edgefirst/validator/evaluators/parameters/dataset.py +239 -0
- edgefirst/validator/evaluators/parameters/model.py +338 -0
- edgefirst/validator/evaluators/parameters/validation.py +528 -0
- edgefirst/validator/evaluators/segmentation.py +729 -0
- edgefirst/validator/evaluators/utils/__init__.py +3 -0
- edgefirst/validator/evaluators/utils/classify.py +292 -0
- edgefirst/validator/evaluators/utils/match.py +262 -0
- edgefirst/validator/evaluators/utils/timer.py +132 -0
- edgefirst/validator/metrics/__init__.py +9 -0
- edgefirst/validator/metrics/data/__init__.py +7 -0
- edgefirst/validator/metrics/data/label.py +668 -0
- edgefirst/validator/metrics/data/metrics.py +759 -0
- edgefirst/validator/metrics/data/plots.py +476 -0
- edgefirst/validator/metrics/data/stats.py +507 -0
- edgefirst/validator/metrics/detection.py +595 -0
- edgefirst/validator/metrics/segmentation.py +173 -0
- edgefirst/validator/metrics/utils/math.py +717 -0
- edgefirst/validator/publishers/__init__.py +3 -0
- edgefirst/validator/publishers/console.py +147 -0
- edgefirst/validator/publishers/studio.py +128 -0
- edgefirst/validator/publishers/tensorboard.py +119 -0
- edgefirst/validator/publishers/utils/logger.py +111 -0
- edgefirst/validator/publishers/utils/table.py +403 -0
- edgefirst/validator/runners/__init__.py +8 -0
- edgefirst/validator/runners/core.py +727 -0
- edgefirst/validator/runners/deepviewrt.py +177 -0
- edgefirst/validator/runners/hailo.py +263 -0
- edgefirst/validator/runners/keras.py +150 -0
- edgefirst/validator/runners/kinara.py +265 -0
- edgefirst/validator/runners/offline.py +228 -0
- edgefirst/validator/runners/onnx.py +241 -0
- edgefirst/validator/runners/processing/decode.py +320 -0
- edgefirst/validator/runners/processing/dvapi.py +4192 -0
- edgefirst/validator/runners/processing/nms.py +637 -0
- edgefirst/validator/runners/processing/outputs.py +507 -0
- edgefirst/validator/runners/tensorrt.py +321 -0
- edgefirst/validator/runners/tflite.py +221 -0
- edgefirst/validator/validate.py +843 -0
- edgefirst/validator/visualize/__init__.py +3 -0
- edgefirst/validator/visualize/detection.py +623 -0
- edgefirst/validator/visualize/segmentation.py +281 -0
- edgefirst/validator/visualize/utils/plots.py +635 -0
- edgefirst_validator-4.2.1.dist-info/METADATA +111 -0
- edgefirst_validator-4.2.1.dist-info/RECORD +73 -0
- edgefirst_validator-4.2.1.dist-info/WHEEL +5 -0
- edgefirst_validator-4.2.1.dist-info/entry_points.txt +2 -0
- edgefirst_validator-4.2.1.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,682 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module contains functions for fetching dataset artifacts.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import glob
|
|
7
|
+
import zipfile
|
|
8
|
+
from typing import Union
|
|
9
|
+
|
|
10
|
+
from edgefirst.validator.publishers.utils.logger import logger
|
|
11
|
+
from edgefirst.validator.datasets.utils.readers import read_yaml_file
|
|
12
|
+
from edgefirst.validator.datasets.utils.readers import read_labels_file
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_image_files(
|
|
16
|
+
directory_path: str,
|
|
17
|
+
check_empty: bool = True,
|
|
18
|
+
extensions: list = ['*.[pP][nN][gG]',
|
|
19
|
+
'*.[jJ][pP][gG]', '*.[jJ][pP][eE][gG]']
|
|
20
|
+
) -> list:
|
|
21
|
+
"""
|
|
22
|
+
Gets all the path of the image files within the specified directory.
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
directory_path: str
|
|
27
|
+
The path to the directory containing the images.
|
|
28
|
+
check_empty: bool
|
|
29
|
+
If this is true, it will raise an error if there are no images
|
|
30
|
+
found at the path provided.
|
|
31
|
+
extensions: list
|
|
32
|
+
A list of image extensions to search.
|
|
33
|
+
|
|
34
|
+
Returns
|
|
35
|
+
-------
|
|
36
|
+
list
|
|
37
|
+
The list of all image paths found with various extensions.
|
|
38
|
+
|
|
39
|
+
Raises
|
|
40
|
+
------
|
|
41
|
+
ValueError
|
|
42
|
+
This exception is raised if no images were found in the
|
|
43
|
+
directory.
|
|
44
|
+
"""
|
|
45
|
+
images = list()
|
|
46
|
+
for ext in extensions:
|
|
47
|
+
partial = glob.glob(os.path.join(directory_path, ext))
|
|
48
|
+
images += partial
|
|
49
|
+
|
|
50
|
+
if check_empty and len(images) == 0:
|
|
51
|
+
raise ValueError(
|
|
52
|
+
f"There are no images found in {directory_path}"
|
|
53
|
+
)
|
|
54
|
+
return sorted(images)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def contains_annotations(annotations: list) -> bool:
|
|
58
|
+
"""
|
|
59
|
+
Checks if the detected annotation files are actual Darknet annotations.
|
|
60
|
+
|
|
61
|
+
Parameters
|
|
62
|
+
----------
|
|
63
|
+
annnotations: list
|
|
64
|
+
This contains paths of annotations files.
|
|
65
|
+
|
|
66
|
+
Returns
|
|
67
|
+
-------
|
|
68
|
+
bool
|
|
69
|
+
This is true if the annotations are indeed image
|
|
70
|
+
annotations, else it is returned as False.
|
|
71
|
+
"""
|
|
72
|
+
non_annotation_files = ["readme.txt", "labels.txt"]
|
|
73
|
+
|
|
74
|
+
if len(annotations) == 0:
|
|
75
|
+
return False
|
|
76
|
+
if len(annotations) == 1:
|
|
77
|
+
# For additional, extranneous non annotation files, add it here.
|
|
78
|
+
for non_annotation in non_annotation_files:
|
|
79
|
+
if non_annotation in [os.path.basename(annotations[0]).lower()]:
|
|
80
|
+
return False
|
|
81
|
+
return True
|
|
82
|
+
elif len(annotations) == 2:
|
|
83
|
+
detected_files = sorted(
|
|
84
|
+
[os.path.basename(annotation).lower()
|
|
85
|
+
for annotation in annotations])
|
|
86
|
+
non_annotation_files = sorted(non_annotation_files)
|
|
87
|
+
# For additional, extranneous non annotation files, add it here.
|
|
88
|
+
return non_annotation_files != detected_files
|
|
89
|
+
else:
|
|
90
|
+
return True
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def get_annotation_files(
|
|
94
|
+
directory_path: str,
|
|
95
|
+
check_empty: bool = True
|
|
96
|
+
) -> list:
|
|
97
|
+
"""
|
|
98
|
+
Gets all the path of the annotation files within the specified directory.
|
|
99
|
+
|
|
100
|
+
Parameters
|
|
101
|
+
----------
|
|
102
|
+
directory_path: str
|
|
103
|
+
The path to the directory containing the text or JSON annotations.
|
|
104
|
+
check_empty: bool
|
|
105
|
+
If this is true, it will raise an error if there
|
|
106
|
+
are no annotations found at the path provided.
|
|
107
|
+
|
|
108
|
+
Returns
|
|
109
|
+
-------
|
|
110
|
+
list
|
|
111
|
+
The list of annotation paths found as either text or JSON files.
|
|
112
|
+
|
|
113
|
+
Raises
|
|
114
|
+
------
|
|
115
|
+
FileNotFoundError
|
|
116
|
+
Raised if no annotation files were found in the directory.
|
|
117
|
+
"""
|
|
118
|
+
annotations = list()
|
|
119
|
+
for ext in ['*.txt', '*.json']:
|
|
120
|
+
annotations = glob.glob(os.path.join(directory_path, ext))
|
|
121
|
+
if contains_annotations(annotations):
|
|
122
|
+
break
|
|
123
|
+
else:
|
|
124
|
+
continue
|
|
125
|
+
|
|
126
|
+
if check_empty and len(annotations) == 0:
|
|
127
|
+
raise FileNotFoundError(
|
|
128
|
+
f"There are no text or JSON files found in {directory_path}"
|
|
129
|
+
)
|
|
130
|
+
return annotations
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def get_numpy_files(
|
|
134
|
+
directory_path: str,
|
|
135
|
+
check_empty: bool = True
|
|
136
|
+
) -> list:
|
|
137
|
+
"""
|
|
138
|
+
Gets all the path of the NumPy files within the specified directory.
|
|
139
|
+
These are usually the radar data annotations denoted by (cube.npy).
|
|
140
|
+
|
|
141
|
+
Parameters
|
|
142
|
+
----------
|
|
143
|
+
directory_path: str
|
|
144
|
+
The path to the directory containing the NumPy files.
|
|
145
|
+
check_empty: bool
|
|
146
|
+
If this is true, it will raise an error if there
|
|
147
|
+
are no NumPy files found at the path provided.
|
|
148
|
+
|
|
149
|
+
Returns
|
|
150
|
+
-------
|
|
151
|
+
list
|
|
152
|
+
The list of NumPy file paths found.
|
|
153
|
+
|
|
154
|
+
Raises
|
|
155
|
+
------
|
|
156
|
+
FileNotFoundError
|
|
157
|
+
Raised if no NumPy files were found in the directory.
|
|
158
|
+
"""
|
|
159
|
+
files = glob.glob(os.path.join(directory_path, "*.cube.npy"))
|
|
160
|
+
if check_empty and len(files) == 0:
|
|
161
|
+
raise FileNotFoundError(
|
|
162
|
+
f"There are no NumPy files found in {directory_path}")
|
|
163
|
+
return files
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def get_shape(shape: tuple) -> tuple:
|
|
167
|
+
"""
|
|
168
|
+
Returns the (height, width) shape
|
|
169
|
+
of the original image dimensions.
|
|
170
|
+
|
|
171
|
+
Parameters
|
|
172
|
+
----------
|
|
173
|
+
shape: tuple
|
|
174
|
+
The input shape with batch
|
|
175
|
+
size and channels in any order.
|
|
176
|
+
|
|
177
|
+
Returns
|
|
178
|
+
-------
|
|
179
|
+
tuple
|
|
180
|
+
The (height, width) shape
|
|
181
|
+
of the image dimensions.
|
|
182
|
+
"""
|
|
183
|
+
# This will contain (height, width) already.
|
|
184
|
+
if len(shape) == 2:
|
|
185
|
+
return shape
|
|
186
|
+
|
|
187
|
+
# Fetch only (height, width) from the shape.
|
|
188
|
+
# Format channels from YUYV, RGB, RGBA.
|
|
189
|
+
if shape[-1] in [2, 3, 4]:
|
|
190
|
+
# This includes batch size. Format (1, height, width, channels).
|
|
191
|
+
if len(shape) == 4:
|
|
192
|
+
height, width = shape[1:3]
|
|
193
|
+
else:
|
|
194
|
+
height, width = shape[0:2]
|
|
195
|
+
else:
|
|
196
|
+
# This includes batch size. Format (1, channels, height, width).
|
|
197
|
+
if len(shape) == 4:
|
|
198
|
+
height, width = shape[2:4]
|
|
199
|
+
else:
|
|
200
|
+
height, width = shape[1:3]
|
|
201
|
+
return (height, width)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def validate_dataset_source(source: str) -> str:
|
|
205
|
+
"""
|
|
206
|
+
Validates the existance of the source path.
|
|
207
|
+
|
|
208
|
+
Parameters
|
|
209
|
+
----------
|
|
210
|
+
source: str
|
|
211
|
+
The path to the dataset.
|
|
212
|
+
|
|
213
|
+
Returns
|
|
214
|
+
-------
|
|
215
|
+
str
|
|
216
|
+
The validated path to the dataset.
|
|
217
|
+
|
|
218
|
+
Raises
|
|
219
|
+
------
|
|
220
|
+
ValueError
|
|
221
|
+
Raised if the provided source to the dataset is not a string.
|
|
222
|
+
FileNotFoundError
|
|
223
|
+
Raised if the provided source to the dataset does not exist.
|
|
224
|
+
"""
|
|
225
|
+
if not (isinstance(source, str)):
|
|
226
|
+
raise ValueError(
|
|
227
|
+
"The provided path to the dataset is not a string. " +
|
|
228
|
+
"Received type: {}".format(
|
|
229
|
+
type(source)))
|
|
230
|
+
|
|
231
|
+
# Strip for radar datasets, in YAML files containing these characters for
|
|
232
|
+
# their subdirectories.
|
|
233
|
+
if not os.path.exists(source.rstrip("/*/")):
|
|
234
|
+
raise FileNotFoundError(
|
|
235
|
+
"The given dataset path '{}' does not exist.".format(source))
|
|
236
|
+
return source
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def find_yaml_file(source: str) -> Union[str, None]:
|
|
240
|
+
"""
|
|
241
|
+
Finds YAML files inside a directory. Returns the path to the YAML file
|
|
242
|
+
if it exists, otherwise it returns None.
|
|
243
|
+
|
|
244
|
+
Parameters
|
|
245
|
+
----------
|
|
246
|
+
source: str
|
|
247
|
+
The path to the directory to start to looking.
|
|
248
|
+
|
|
249
|
+
Returns
|
|
250
|
+
-------
|
|
251
|
+
Union[str, None]
|
|
252
|
+
str
|
|
253
|
+
The path to the YAML file if it exists.
|
|
254
|
+
None
|
|
255
|
+
There are no YAML files found.
|
|
256
|
+
"""
|
|
257
|
+
for root, _, files in os.walk(source):
|
|
258
|
+
for file in files:
|
|
259
|
+
if os.path.splitext(file)[1] == ".yaml":
|
|
260
|
+
return os.path.join(root, file)
|
|
261
|
+
return None
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def find_labels_file(
|
|
265
|
+
source: str,
|
|
266
|
+
labels_path: str = None,
|
|
267
|
+
labels_file: str = "labels.txt"
|
|
268
|
+
) -> list:
|
|
269
|
+
"""
|
|
270
|
+
Finds and reads the labels file inside the directory if
|
|
271
|
+
the `source` is provided. Otherwise if the `labels_path` is provided,
|
|
272
|
+
it will check if the file exists. The contents of the labels file is
|
|
273
|
+
returned.
|
|
274
|
+
|
|
275
|
+
Parameters
|
|
276
|
+
----------
|
|
277
|
+
source: str
|
|
278
|
+
The path to the directory to search for `labels.txt`.
|
|
279
|
+
labels_path: str
|
|
280
|
+
The path to the `labels.txt` file if known.
|
|
281
|
+
labels_file: str
|
|
282
|
+
The name of the labels file to search.
|
|
283
|
+
|
|
284
|
+
Returns
|
|
285
|
+
-------
|
|
286
|
+
list
|
|
287
|
+
This is the list of labels that are the
|
|
288
|
+
contents of the labels file. If the label file is not found,
|
|
289
|
+
it will return an empty list.
|
|
290
|
+
"""
|
|
291
|
+
labels = []
|
|
292
|
+
# Check if labels.txt is under /dataset_path (source)/labels.txt.
|
|
293
|
+
if os.path.exists(os.path.join(source, labels_file)):
|
|
294
|
+
labels_path = os.path.join(source, labels_file)
|
|
295
|
+
# Check if labels.txt path is explicitly provided.
|
|
296
|
+
elif labels_path is not None:
|
|
297
|
+
labels_path = validate_dataset_source(labels_path)
|
|
298
|
+
# If labels.txt is not found, then search through the dataset.
|
|
299
|
+
else:
|
|
300
|
+
for root, _, files in os.walk(source):
|
|
301
|
+
if labels_file in files:
|
|
302
|
+
labels_path = os.path.join(root, labels_file)
|
|
303
|
+
# Continue validation without the label file.
|
|
304
|
+
if labels_path is None:
|
|
305
|
+
logger("The dataset 'labels.txt' file could not be found.",
|
|
306
|
+
code="WARNING")
|
|
307
|
+
|
|
308
|
+
if labels_path is not None:
|
|
309
|
+
labels = read_labels_file(labels_path)
|
|
310
|
+
|
|
311
|
+
return labels
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def create_info(
|
|
315
|
+
image_source: str,
|
|
316
|
+
annotation_source: str,
|
|
317
|
+
type: str = None,
|
|
318
|
+
labels: list = [],
|
|
319
|
+
) -> dict:
|
|
320
|
+
"""
|
|
321
|
+
This creates the info dataset which is a dictionary
|
|
322
|
+
containing the dataset information. This dictionary is formatted
|
|
323
|
+
based on contents of internal Au-Zone formatted dataset YAML files.
|
|
324
|
+
|
|
325
|
+
Parameters
|
|
326
|
+
----------
|
|
327
|
+
image_source: str
|
|
328
|
+
This is the path to the images.
|
|
329
|
+
annotation_source: str
|
|
330
|
+
This is the path to the annotation files.
|
|
331
|
+
type: str
|
|
332
|
+
This is the type of the dataset ["darknet", "arrow"].
|
|
333
|
+
labels: list
|
|
334
|
+
This contains unique string labels.
|
|
335
|
+
|
|
336
|
+
Returns
|
|
337
|
+
-------
|
|
338
|
+
dict
|
|
339
|
+
The info dataset which contains dataset information.
|
|
340
|
+
"""
|
|
341
|
+
info_dataset = dict()
|
|
342
|
+
info_dataset["type"] = type
|
|
343
|
+
info_dataset["classes"] = labels
|
|
344
|
+
info_dataset["validation"] = {
|
|
345
|
+
"images": image_source,
|
|
346
|
+
"annotations": annotation_source
|
|
347
|
+
}
|
|
348
|
+
return info_dataset
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def collect_tfrecord_files(
|
|
352
|
+
source: str, labels: list = []) -> Union[dict, None]:
|
|
353
|
+
"""
|
|
354
|
+
Searches the source directory provided to gather tfrecord files.
|
|
355
|
+
|
|
356
|
+
Parameters
|
|
357
|
+
----------
|
|
358
|
+
source: str
|
|
359
|
+
The path to the directory to search for tfrecord files.
|
|
360
|
+
labels: list
|
|
361
|
+
The list of string labels to include in the dataset information.
|
|
362
|
+
|
|
363
|
+
Returns
|
|
364
|
+
-------
|
|
365
|
+
Union[dict, None]
|
|
366
|
+
This includes the path found for the tfrecord files and the labels.
|
|
367
|
+
If no tfecord files were found, then None is returned.
|
|
368
|
+
"""
|
|
369
|
+
tfrecord_files = glob.glob(os.path.join(source, "*.tfrecord"))
|
|
370
|
+
if len(tfrecord_files) > 0:
|
|
371
|
+
# There are no polar yaml representations defined yet.
|
|
372
|
+
info_dataset = dict()
|
|
373
|
+
info_dataset["classes"] = labels
|
|
374
|
+
info_dataset["validation"] = {"path": source}
|
|
375
|
+
return info_dataset
|
|
376
|
+
return None
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def collect_darknet_files(source: str, labels: list = []) -> Union[dict, None]:
|
|
380
|
+
"""
|
|
381
|
+
Searches the source directory provided to gather images and text or JSON
|
|
382
|
+
files from Darknet datasets.
|
|
383
|
+
|
|
384
|
+
Parameters
|
|
385
|
+
----------
|
|
386
|
+
source: str
|
|
387
|
+
The path to the directory to search for
|
|
388
|
+
images and annotation files.
|
|
389
|
+
labels: list
|
|
390
|
+
The list of string labels to include in the dataset information.
|
|
391
|
+
|
|
392
|
+
Returns
|
|
393
|
+
-------
|
|
394
|
+
Union[dict, None]
|
|
395
|
+
This includes the paths found for the images and the annotation
|
|
396
|
+
files and the labels. If no images were found,
|
|
397
|
+
then None is returned.
|
|
398
|
+
"""
|
|
399
|
+
for location in ["", "images/validate", "images/validate/**",
|
|
400
|
+
"images/val", "images/val/**"]:
|
|
401
|
+
image_source = os.path.join(source, location)
|
|
402
|
+
images = get_image_files(image_source, False)
|
|
403
|
+
if len(images) > 0:
|
|
404
|
+
break
|
|
405
|
+
|
|
406
|
+
for location in ["", "labels/validate", "labels/validate/**",
|
|
407
|
+
"labels/val", "labels/val/**"]:
|
|
408
|
+
annotation_source = os.path.join(source, location)
|
|
409
|
+
annotations = get_annotation_files(annotation_source, False)
|
|
410
|
+
if len(annotations) > 0:
|
|
411
|
+
break
|
|
412
|
+
|
|
413
|
+
if len(images) == 0:
|
|
414
|
+
return None
|
|
415
|
+
|
|
416
|
+
return create_info(
|
|
417
|
+
image_source,
|
|
418
|
+
annotation_source,
|
|
419
|
+
"darknet",
|
|
420
|
+
labels
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def collect_edgefirst_files(source: str, labels: list = []) -> dict:
|
|
425
|
+
"""
|
|
426
|
+
Searches the source directory provided to look for 'dataset.arrow'
|
|
427
|
+
which indicates an edgefirst dataset.
|
|
428
|
+
|
|
429
|
+
Parameters
|
|
430
|
+
----------
|
|
431
|
+
source: str
|
|
432
|
+
The path to the directory to search for 'dataset.arrow'.
|
|
433
|
+
labels: list
|
|
434
|
+
The list of string labels to include in the dataset information.
|
|
435
|
+
|
|
436
|
+
Returns
|
|
437
|
+
-------
|
|
438
|
+
dict
|
|
439
|
+
This includes the paths found for the images and the 'dataset.arrow'
|
|
440
|
+
file containing annotations and the labels. If the 'dataset.arrow' file
|
|
441
|
+
was not found, then None is returned.
|
|
442
|
+
"""
|
|
443
|
+
images_source = source
|
|
444
|
+
annotation_source = os.path.join(source, "dataset.arrow")
|
|
445
|
+
|
|
446
|
+
if os.path.exists(annotation_source):
|
|
447
|
+
return create_info(
|
|
448
|
+
images_source,
|
|
449
|
+
annotation_source,
|
|
450
|
+
"edgefirst",
|
|
451
|
+
labels
|
|
452
|
+
)
|
|
453
|
+
else:
|
|
454
|
+
return None
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
def download_and_extract(url: str, download_path: str, extract_to: str = None):
|
|
458
|
+
"""
|
|
459
|
+
Downloads a ZIP file from a URL and extracts it to a specified location.
|
|
460
|
+
|
|
461
|
+
Parameters
|
|
462
|
+
----------
|
|
463
|
+
url: str
|
|
464
|
+
URL of the ZIP file to download.
|
|
465
|
+
download_path: str
|
|
466
|
+
Path where the ZIP file will be saved.
|
|
467
|
+
extract_to: str, optional
|
|
468
|
+
Directory where the ZIP contents will be extracted. If not specified,
|
|
469
|
+
uses the directory of `download_path`.
|
|
470
|
+
"""
|
|
471
|
+
import requests
|
|
472
|
+
os.makedirs(os.path.dirname(download_path), exist_ok=True)
|
|
473
|
+
|
|
474
|
+
logger(
|
|
475
|
+
f"Downloading dataset from {url} to {download_path}...", code="INFO")
|
|
476
|
+
response = requests.get(url, stream=True)
|
|
477
|
+
response.raise_for_status()
|
|
478
|
+
|
|
479
|
+
with open(download_path, 'wb') as f:
|
|
480
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
481
|
+
f.write(chunk)
|
|
482
|
+
|
|
483
|
+
logger("Download complete.", code="SUCCESS")
|
|
484
|
+
|
|
485
|
+
extract_path = extract_to or os.path.dirname(download_path)
|
|
486
|
+
logger(f"Extracting to {extract_path}...", code="INFO")
|
|
487
|
+
|
|
488
|
+
with zipfile.ZipFile(download_path, 'r') as zip_ref:
|
|
489
|
+
zip_ref.extractall(extract_path)
|
|
490
|
+
|
|
491
|
+
logger("Extraction complete.", code="SUCCESS")
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def download_file(url: str, download_path: str):
|
|
495
|
+
"""
|
|
496
|
+
Downloads a file from a URL to the specified path.
|
|
497
|
+
|
|
498
|
+
Parameters
|
|
499
|
+
----------
|
|
500
|
+
url: str
|
|
501
|
+
URL of the file to download.
|
|
502
|
+
download_path: str
|
|
503
|
+
Path to save the downloaded file.
|
|
504
|
+
"""
|
|
505
|
+
import requests
|
|
506
|
+
os.makedirs(os.path.dirname(download_path), exist_ok=True)
|
|
507
|
+
|
|
508
|
+
logger(f"Downloading model from {url} to {download_path}...", code="INFO")
|
|
509
|
+
response = requests.get(url, stream=True)
|
|
510
|
+
response.raise_for_status()
|
|
511
|
+
|
|
512
|
+
with open(download_path, 'wb') as f:
|
|
513
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
514
|
+
f.write(chunk)
|
|
515
|
+
|
|
516
|
+
logger("Download complete.", code="SUCCESS")
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
def classify_directory(source: str, labels_path: str = None) -> dict:
|
|
520
|
+
"""
|
|
521
|
+
Inspects the source path that points to a directory. Returns the
|
|
522
|
+
info dataset which contains dataset information.
|
|
523
|
+
|
|
524
|
+
Parameters
|
|
525
|
+
----------
|
|
526
|
+
source: str
|
|
527
|
+
The validated path to the dataset.
|
|
528
|
+
This can point to a YAML file or a directory containing
|
|
529
|
+
tfrecords or images and text annotations.
|
|
530
|
+
labels_path: str
|
|
531
|
+
The path to the labels.txt (optional).
|
|
532
|
+
|
|
533
|
+
Returns
|
|
534
|
+
-------
|
|
535
|
+
dict
|
|
536
|
+
This dictionary contains the paths of the dataset files
|
|
537
|
+
either the tfrecords or the images and the annotation files.
|
|
538
|
+
This dictionary also contains the string labels if it exists.
|
|
539
|
+
|
|
540
|
+
Raises
|
|
541
|
+
------
|
|
542
|
+
FileNotFoundError
|
|
543
|
+
Raised if the dataset could not be parsed. Might be due
|
|
544
|
+
to missing dataset file.
|
|
545
|
+
"""
|
|
546
|
+
|
|
547
|
+
"""Handle AuZoneNet and AuZoneTFRecords format."""
|
|
548
|
+
# Check if a dataset yaml file is inside the directory.
|
|
549
|
+
yaml_file = find_yaml_file(source)
|
|
550
|
+
if yaml_file:
|
|
551
|
+
return read_yaml_file(yaml_file)
|
|
552
|
+
|
|
553
|
+
# Find and read the contents of the labels file.
|
|
554
|
+
labels = find_labels_file(source, labels_path)
|
|
555
|
+
|
|
556
|
+
"""Handle standard TFRecord datasets."""
|
|
557
|
+
info_dataset = collect_tfrecord_files(source, labels)
|
|
558
|
+
if info_dataset:
|
|
559
|
+
return info_dataset
|
|
560
|
+
|
|
561
|
+
"""Handle standard Darknet datasets."""
|
|
562
|
+
info_dataset = collect_darknet_files(source, labels)
|
|
563
|
+
if info_dataset:
|
|
564
|
+
return info_dataset
|
|
565
|
+
|
|
566
|
+
"""Handle EdgeFirst datasets"""
|
|
567
|
+
info_dataset = collect_edgefirst_files(source, labels)
|
|
568
|
+
if info_dataset:
|
|
569
|
+
return info_dataset
|
|
570
|
+
else:
|
|
571
|
+
raise FileNotFoundError(
|
|
572
|
+
"The info_dataset returned None. " +
|
|
573
|
+
f"Check if the path provided ({source}) contains " +
|
|
574
|
+
"either tfrecord files or images and annotations files."
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
def classify_file(source: str) -> dict:
|
|
579
|
+
"""
|
|
580
|
+
Inspects the source path that points to a file.
|
|
581
|
+
|
|
582
|
+
Parameters
|
|
583
|
+
----------
|
|
584
|
+
source: str
|
|
585
|
+
The validated path to the dataset.
|
|
586
|
+
This can point to a YAML file or a directory containing
|
|
587
|
+
tfrecords or images and text annotations.
|
|
588
|
+
|
|
589
|
+
Returns
|
|
590
|
+
-------
|
|
591
|
+
dict
|
|
592
|
+
This dictionary contains the paths of the dataset files
|
|
593
|
+
either the tfrecords or the images and the annotation files.
|
|
594
|
+
This dictionary also contains the string labels if it exists.
|
|
595
|
+
|
|
596
|
+
Raises
|
|
597
|
+
------
|
|
598
|
+
NotImplementedError
|
|
599
|
+
Reading certain dataset formats are currently not implemented.
|
|
600
|
+
"""
|
|
601
|
+
# Darknet dataset YAML file.
|
|
602
|
+
if os.path.splitext(os.path.basename(source))[1] == ".yaml":
|
|
603
|
+
contents = read_yaml_file(source)
|
|
604
|
+
|
|
605
|
+
try:
|
|
606
|
+
images_path = contents.get(
|
|
607
|
+
"dataset").get('validation').get('images')
|
|
608
|
+
if not os.path.isabs(images_path):
|
|
609
|
+
images_path = os.path.join(
|
|
610
|
+
os.path.dirname(source), images_path)
|
|
611
|
+
contents["dataset"]["validation"]["images"] = images_path
|
|
612
|
+
|
|
613
|
+
annotations_path = contents.get(
|
|
614
|
+
"dataset").get('validation').get('annotations')
|
|
615
|
+
if not os.path.isabs(annotations_path):
|
|
616
|
+
annotations_path = os.path.join(os.path.dirname(source),
|
|
617
|
+
annotations_path)
|
|
618
|
+
contents["dataset"]["validation"]["annotations"] = annotations_path
|
|
619
|
+
|
|
620
|
+
except AttributeError:
|
|
621
|
+
images_path = contents.get('validation').get('images')
|
|
622
|
+
if not os.path.isabs(images_path):
|
|
623
|
+
images_path = os.path.join(
|
|
624
|
+
os.path.dirname(source), images_path)
|
|
625
|
+
contents["validation"]["images"] = images_path
|
|
626
|
+
|
|
627
|
+
annotations_path = contents.get('validation').get('annotations')
|
|
628
|
+
if not os.path.isabs(annotations_path):
|
|
629
|
+
annotations_path = os.path.join(os.path.dirname(source),
|
|
630
|
+
annotations_path)
|
|
631
|
+
contents["validation"]["annotations"] = annotations_path
|
|
632
|
+
return contents
|
|
633
|
+
# Dataset cache LMDB file.
|
|
634
|
+
elif os.path.splitext(os.path.basename(source))[1] == ".db":
|
|
635
|
+
return {"type": "lmdb"}
|
|
636
|
+
elif os.path.splitext(os.path.basename(source))[1] == ".txt":
|
|
637
|
+
raise NotImplementedError(
|
|
638
|
+
"Single text file is not currently supported.")
|
|
639
|
+
elif os.path.splitext(source)[1] == ".deepview":
|
|
640
|
+
raise NotImplementedError(
|
|
641
|
+
"DeepView files are not currently supported.")
|
|
642
|
+
else:
|
|
643
|
+
raise NotImplementedError(
|
|
644
|
+
"Parsing dataset '{}' is currently not supported.".format(source))
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
def classify_dataset(source: str, labels_path: str = None) -> dict:
|
|
648
|
+
"""
|
|
649
|
+
Inspects the (.yaml) file contents if it exists.
|
|
650
|
+
Otherwise it will search for either images with text
|
|
651
|
+
annotations (Darknet) or tfrecord files (TFRecord Dataset).
|
|
652
|
+
|
|
653
|
+
Parameters
|
|
654
|
+
----------
|
|
655
|
+
source: str
|
|
656
|
+
The validated path to the dataset.
|
|
657
|
+
This can point to a YAML file or a directory containing
|
|
658
|
+
tfrecords or images and text annotations.
|
|
659
|
+
labels_path: str
|
|
660
|
+
The path to the labels.txt (optional).
|
|
661
|
+
|
|
662
|
+
Returns
|
|
663
|
+
-------
|
|
664
|
+
dict
|
|
665
|
+
This dictionary contains the paths of the dataset files
|
|
666
|
+
either the tfrecords or the images and the annotation files.
|
|
667
|
+
This dictionary also contains the string labels if it exists.
|
|
668
|
+
|
|
669
|
+
Raises
|
|
670
|
+
------
|
|
671
|
+
NotImplementedError
|
|
672
|
+
Reading certain dataset formats are currently not implemented.
|
|
673
|
+
"""
|
|
674
|
+
source = validate_dataset_source(source)
|
|
675
|
+
|
|
676
|
+
if os.path.isdir(source):
|
|
677
|
+
return classify_directory(source, labels_path)
|
|
678
|
+
elif os.path.isfile(source):
|
|
679
|
+
return classify_file(source)
|
|
680
|
+
else:
|
|
681
|
+
NotImplementedError(
|
|
682
|
+
"Parsing dataset '{}' is currently not supported.".format(source))
|