mapillary-tools 0.13.3__py3-none-any.whl → 0.14.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mapillary_tools/__init__.py +1 -1
- mapillary_tools/api_v4.py +106 -7
- mapillary_tools/authenticate.py +325 -64
- mapillary_tools/{geotag/blackvue_parser.py → blackvue_parser.py} +74 -54
- mapillary_tools/camm/camm_builder.py +55 -97
- mapillary_tools/camm/camm_parser.py +425 -177
- mapillary_tools/commands/__main__.py +2 -0
- mapillary_tools/commands/authenticate.py +8 -1
- mapillary_tools/commands/process.py +27 -51
- mapillary_tools/commands/process_and_upload.py +18 -5
- mapillary_tools/commands/sample_video.py +2 -3
- mapillary_tools/commands/upload.py +18 -9
- mapillary_tools/commands/video_process_and_upload.py +19 -5
- mapillary_tools/config.py +28 -12
- mapillary_tools/constants.py +46 -4
- mapillary_tools/exceptions.py +34 -35
- mapillary_tools/exif_read.py +158 -53
- mapillary_tools/exiftool_read.py +19 -5
- mapillary_tools/exiftool_read_video.py +12 -1
- mapillary_tools/exiftool_runner.py +77 -0
- mapillary_tools/geo.py +148 -107
- mapillary_tools/geotag/factory.py +298 -0
- mapillary_tools/geotag/geotag_from_generic.py +152 -11
- mapillary_tools/geotag/geotag_images_from_exif.py +43 -124
- mapillary_tools/geotag/geotag_images_from_exiftool.py +66 -70
- mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +32 -48
- mapillary_tools/geotag/geotag_images_from_gpx.py +41 -116
- mapillary_tools/geotag/geotag_images_from_gpx_file.py +15 -96
- mapillary_tools/geotag/geotag_images_from_nmea_file.py +4 -2
- mapillary_tools/geotag/geotag_images_from_video.py +46 -46
- mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +98 -92
- mapillary_tools/geotag/geotag_videos_from_gpx.py +140 -0
- mapillary_tools/geotag/geotag_videos_from_video.py +149 -181
- mapillary_tools/geotag/options.py +159 -0
- mapillary_tools/{geotag → gpmf}/gpmf_parser.py +194 -171
- mapillary_tools/history.py +3 -11
- mapillary_tools/mp4/io_utils.py +0 -1
- mapillary_tools/mp4/mp4_sample_parser.py +11 -3
- mapillary_tools/mp4/simple_mp4_parser.py +0 -10
- mapillary_tools/process_geotag_properties.py +151 -386
- mapillary_tools/process_sequence_properties.py +554 -202
- mapillary_tools/sample_video.py +8 -15
- mapillary_tools/telemetry.py +24 -12
- mapillary_tools/types.py +80 -22
- mapillary_tools/upload.py +311 -261
- mapillary_tools/upload_api_v4.py +55 -95
- mapillary_tools/uploader.py +396 -254
- mapillary_tools/utils.py +26 -0
- mapillary_tools/video_data_extraction/extract_video_data.py +17 -36
- mapillary_tools/video_data_extraction/extractors/blackvue_parser.py +34 -19
- mapillary_tools/video_data_extraction/extractors/camm_parser.py +41 -17
- mapillary_tools/video_data_extraction/extractors/exiftool_runtime_parser.py +4 -1
- mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py +1 -2
- mapillary_tools/video_data_extraction/extractors/gopro_parser.py +37 -22
- {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0a1.dist-info}/METADATA +3 -2
- mapillary_tools-0.14.0a1.dist-info/RECORD +78 -0
- {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0a1.dist-info}/WHEEL +1 -1
- mapillary_tools/geotag/utils.py +0 -26
- mapillary_tools-0.13.3.dist-info/RECORD +0 -75
- /mapillary_tools/{geotag → gpmf}/gpmf_gps_filter.py +0 -0
- /mapillary_tools/{geotag → gpmf}/gps_filter.py +0 -0
- {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0a1.dist-info}/entry_points.txt +0 -0
- {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0a1.dist-info/licenses}/LICENSE +0 -0
- {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0a1.dist-info}/top_level.txt +0 -0
|
@@ -4,61 +4,91 @@ import math
|
|
|
4
4
|
import os
|
|
5
5
|
import typing as T
|
|
6
6
|
|
|
7
|
-
from . import constants, geo, types
|
|
8
|
-
from .exceptions import MapillaryBadParameterError, MapillaryDuplicationError
|
|
7
|
+
from . import constants, exceptions, geo, types, utils
|
|
9
8
|
|
|
10
9
|
LOG = logging.getLogger(__name__)
|
|
11
10
|
|
|
12
11
|
|
|
13
|
-
|
|
14
|
-
PointSequence = T.List[
|
|
12
|
+
SeqItem = T.TypeVar("SeqItem")
|
|
13
|
+
PointSequence = T.List[geo.PointLike]
|
|
15
14
|
|
|
16
15
|
|
|
17
|
-
def
|
|
18
|
-
sequence:
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
sequences
|
|
16
|
+
def split_sequence_by(
|
|
17
|
+
sequence: T.List[SeqItem],
|
|
18
|
+
should_split: T.Callable[[SeqItem, SeqItem], bool],
|
|
19
|
+
) -> T.List[T.List[SeqItem]]:
|
|
20
|
+
"""
|
|
21
|
+
Split a sequence into multiple sequences by should_split(prev, cur) => True
|
|
22
|
+
"""
|
|
23
|
+
output_sequences: T.List[T.List[SeqItem]] = []
|
|
23
24
|
|
|
24
|
-
|
|
25
|
-
|
|
25
|
+
seq = iter(sequence)
|
|
26
|
+
|
|
27
|
+
prev = next(seq, None)
|
|
28
|
+
if prev is None:
|
|
29
|
+
return output_sequences
|
|
26
30
|
|
|
27
|
-
|
|
31
|
+
output_sequences.append([prev])
|
|
32
|
+
|
|
33
|
+
for cur in seq:
|
|
28
34
|
# invariant: prev is processed
|
|
29
|
-
|
|
30
|
-
(
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
sequences.append([cur])
|
|
35
|
-
continue
|
|
36
|
-
time_diff = cur.time - prev.time
|
|
37
|
-
assert 0 <= time_diff, "sequence must be sorted by capture times"
|
|
38
|
-
if cutoff_time <= time_diff:
|
|
39
|
-
sequences.append([cur])
|
|
40
|
-
continue
|
|
41
|
-
sequences[-1].append(cur)
|
|
35
|
+
if should_split(prev, cur):
|
|
36
|
+
output_sequences.append([cur])
|
|
37
|
+
else:
|
|
38
|
+
output_sequences[-1].append(cur)
|
|
39
|
+
prev = cur
|
|
42
40
|
# invariant: cur is processed
|
|
43
41
|
|
|
44
|
-
|
|
42
|
+
assert sum(len(s) for s in output_sequences) == len(sequence)
|
|
43
|
+
|
|
44
|
+
return output_sequences
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def split_sequence_by_agg(
|
|
48
|
+
sequence: T.List[SeqItem],
|
|
49
|
+
should_split_with_sequence_state: T.Callable[[SeqItem, T.Dict], bool],
|
|
50
|
+
) -> T.List[T.List[SeqItem]]:
|
|
51
|
+
"""
|
|
52
|
+
Split a sequence by should_split_with_sequence_state(cur, sequence_state) => True
|
|
53
|
+
"""
|
|
54
|
+
output_sequences: T.List[T.List[SeqItem]] = []
|
|
55
|
+
sequence_state: T.Dict = {}
|
|
56
|
+
|
|
57
|
+
for cur in sequence:
|
|
58
|
+
start_new_sequence = should_split_with_sequence_state(cur, sequence_state)
|
|
59
|
+
|
|
60
|
+
if not output_sequences:
|
|
61
|
+
output_sequences.append([])
|
|
62
|
+
|
|
63
|
+
if start_new_sequence:
|
|
64
|
+
# DO NOT reset the state because it contains the information of current item
|
|
65
|
+
# sequence_state = {}
|
|
66
|
+
if output_sequences[-1]:
|
|
67
|
+
output_sequences.append([])
|
|
68
|
+
|
|
69
|
+
output_sequences[-1].append(cur)
|
|
70
|
+
|
|
71
|
+
assert sum(len(s) for s in output_sequences) == len(sequence)
|
|
72
|
+
|
|
73
|
+
return output_sequences
|
|
45
74
|
|
|
46
75
|
|
|
47
76
|
def duplication_check(
|
|
48
77
|
sequence: PointSequence,
|
|
49
|
-
|
|
50
|
-
|
|
78
|
+
max_duplicate_distance: float,
|
|
79
|
+
max_duplicate_angle: float,
|
|
51
80
|
) -> T.Tuple[PointSequence, T.List[types.ErrorMetadata]]:
|
|
52
81
|
dedups: PointSequence = []
|
|
53
82
|
dups: T.List[types.ErrorMetadata] = []
|
|
54
83
|
|
|
55
|
-
|
|
56
|
-
prev = next(
|
|
84
|
+
it = iter(sequence)
|
|
85
|
+
prev = next(it)
|
|
57
86
|
if prev is None:
|
|
58
87
|
return dedups, dups
|
|
88
|
+
|
|
59
89
|
dedups.append(prev)
|
|
60
90
|
|
|
61
|
-
for cur in
|
|
91
|
+
for cur in it:
|
|
62
92
|
# invariant: prev is processed
|
|
63
93
|
distance = geo.gps_distance(
|
|
64
94
|
(prev.lat, prev.lon),
|
|
@@ -70,21 +100,21 @@ def duplication_check(
|
|
|
70
100
|
else:
|
|
71
101
|
angle_diff = None
|
|
72
102
|
|
|
73
|
-
if distance <=
|
|
74
|
-
angle_diff is
|
|
103
|
+
if distance <= max_duplicate_distance and (
|
|
104
|
+
angle_diff is None or angle_diff <= max_duplicate_angle
|
|
75
105
|
):
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
),
|
|
84
|
-
cur.filename,
|
|
85
|
-
filetype=types.FileType.IMAGE,
|
|
106
|
+
msg = f"Duplicate of its previous image in terms of distance <= {max_duplicate_distance} and angle <= {max_duplicate_angle}"
|
|
107
|
+
dup = types.describe_error_metadata(
|
|
108
|
+
exceptions.MapillaryDuplicationError(
|
|
109
|
+
msg,
|
|
110
|
+
types.as_desc(cur),
|
|
111
|
+
distance=distance,
|
|
112
|
+
angle_diff=angle_diff,
|
|
86
113
|
),
|
|
114
|
+
cur.filename,
|
|
115
|
+
filetype=types.FileType.IMAGE,
|
|
87
116
|
)
|
|
117
|
+
dups.append(dup)
|
|
88
118
|
# prev does not change
|
|
89
119
|
else:
|
|
90
120
|
dedups.append(cur)
|
|
@@ -94,86 +124,14 @@ def duplication_check(
|
|
|
94
124
|
return dedups, dups
|
|
95
125
|
|
|
96
126
|
|
|
97
|
-
def
|
|
98
|
-
sequence: T.List[types.ImageMetadata],
|
|
99
|
-
max_images: int,
|
|
100
|
-
max_sequence_filesize: int,
|
|
101
|
-
max_sequence_pixels: int,
|
|
102
|
-
) -> T.List[T.List[types.ImageMetadata]]:
|
|
103
|
-
"""
|
|
104
|
-
Cut a sequence into multiple sequences by max_images or max filesize
|
|
105
|
-
"""
|
|
106
|
-
sequences: T.List[T.List[types.ImageMetadata]] = []
|
|
107
|
-
last_sequence_file_size = 0
|
|
108
|
-
last_sequence_pixels = 0
|
|
109
|
-
|
|
110
|
-
for image in sequence:
|
|
111
|
-
# decent default values if width/height not available
|
|
112
|
-
width = 1024 if image.width is None else image.width
|
|
113
|
-
height = 1024 if image.height is None else image.height
|
|
114
|
-
|
|
115
|
-
filesize = os.path.getsize(image.filename)
|
|
116
|
-
|
|
117
|
-
if len(sequences) == 0:
|
|
118
|
-
start_new_sequence = True
|
|
119
|
-
else:
|
|
120
|
-
if sequences[-1]:
|
|
121
|
-
if max_images < len(sequences[-1]):
|
|
122
|
-
LOG.debug(
|
|
123
|
-
"Cut the sequence because the current sequence (%s) reaches the max number of images (%s)",
|
|
124
|
-
len(sequences[-1]),
|
|
125
|
-
max_images,
|
|
126
|
-
)
|
|
127
|
-
start_new_sequence = True
|
|
128
|
-
elif max_sequence_filesize < last_sequence_file_size + filesize:
|
|
129
|
-
LOG.debug(
|
|
130
|
-
"Cut the sequence because the current sequence (%s) reaches the max filesize (%s)",
|
|
131
|
-
last_sequence_file_size + filesize,
|
|
132
|
-
max_sequence_filesize,
|
|
133
|
-
)
|
|
134
|
-
start_new_sequence = True
|
|
135
|
-
elif max_sequence_pixels < last_sequence_pixels + width * height:
|
|
136
|
-
LOG.debug(
|
|
137
|
-
"Cut the sequence because the current sequence (%s) reaches the max pixels (%s)",
|
|
138
|
-
last_sequence_pixels + width * height,
|
|
139
|
-
max_sequence_pixels,
|
|
140
|
-
)
|
|
141
|
-
start_new_sequence = True
|
|
142
|
-
else:
|
|
143
|
-
start_new_sequence = False
|
|
144
|
-
else:
|
|
145
|
-
start_new_sequence = False
|
|
146
|
-
|
|
147
|
-
if start_new_sequence:
|
|
148
|
-
sequences.append([])
|
|
149
|
-
last_sequence_file_size = 0
|
|
150
|
-
last_sequence_pixels = 0
|
|
151
|
-
|
|
152
|
-
sequences[-1].append(image)
|
|
153
|
-
last_sequence_file_size += filesize
|
|
154
|
-
last_sequence_pixels += width * height
|
|
155
|
-
|
|
156
|
-
assert sum(len(s) for s in sequences) == len(sequence)
|
|
157
|
-
|
|
158
|
-
return sequences
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
def _group_sort_images_by_folder(
|
|
127
|
+
def _group_by(
|
|
162
128
|
image_metadatas: T.List[types.ImageMetadata],
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
for
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
sequences = list(sequences_by_parent.values())
|
|
171
|
-
for sequence in sequences:
|
|
172
|
-
sequence.sort(
|
|
173
|
-
key=lambda metadata: metadata.sort_key(),
|
|
174
|
-
)
|
|
175
|
-
|
|
176
|
-
return sequences
|
|
129
|
+
group_key_func=T.Callable[[types.ImageMetadata], T.Hashable],
|
|
130
|
+
) -> T.Dict[T.Hashable, T.List[types.ImageMetadata]]:
|
|
131
|
+
grouped: T.Dict[T.Hashable, T.List[types.ImageMetadata]] = {}
|
|
132
|
+
for metadata in image_metadatas:
|
|
133
|
+
grouped.setdefault(group_key_func(metadata), []).append(metadata)
|
|
134
|
+
return grouped
|
|
177
135
|
|
|
178
136
|
|
|
179
137
|
def _interpolate_subsecs_for_sorting(sequence: PointSequence) -> None:
|
|
@@ -217,55 +175,423 @@ def _interpolate_subsecs_for_sorting(sequence: PointSequence) -> None:
|
|
|
217
175
|
def _parse_filesize_in_bytes(filesize_str: str) -> int:
|
|
218
176
|
filesize_str = filesize_str.strip().upper()
|
|
219
177
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
178
|
+
try:
|
|
179
|
+
if filesize_str.endswith("B"):
|
|
180
|
+
return int(filesize_str[:-1])
|
|
181
|
+
elif filesize_str.endswith("K"):
|
|
182
|
+
return int(filesize_str[:-1]) * 1024
|
|
183
|
+
elif filesize_str.endswith("M"):
|
|
184
|
+
return int(filesize_str[:-1]) * 1024 * 1024
|
|
185
|
+
elif filesize_str.endswith("G"):
|
|
186
|
+
return int(filesize_str[:-1]) * 1024 * 1024 * 1024
|
|
187
|
+
else:
|
|
188
|
+
return int(filesize_str)
|
|
189
|
+
except ValueError:
|
|
190
|
+
raise exceptions.MapillaryBadParameterError(
|
|
191
|
+
f"Expect valid file size that ends with B, K, M, or G, but got {filesize_str}"
|
|
192
|
+
)
|
|
230
193
|
|
|
231
194
|
|
|
232
195
|
def _parse_pixels(pixels_str: str) -> int:
|
|
233
196
|
pixels_str = pixels_str.strip().upper()
|
|
234
197
|
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
198
|
+
try:
|
|
199
|
+
if pixels_str.endswith("K"):
|
|
200
|
+
return int(pixels_str[:-1]) * 1000
|
|
201
|
+
elif pixels_str.endswith("M"):
|
|
202
|
+
return int(pixels_str[:-1]) * 1000 * 1000
|
|
203
|
+
elif pixels_str.endswith("G"):
|
|
204
|
+
return int(pixels_str[:-1]) * 1000 * 1000 * 1000
|
|
205
|
+
else:
|
|
206
|
+
return int(pixels_str)
|
|
207
|
+
except ValueError:
|
|
208
|
+
raise exceptions.MapillaryBadParameterError(
|
|
209
|
+
f"Expect valid number of pixels that ends with K, M, or G, but got {pixels_str}"
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def _avg_speed(sequence: T.Sequence[geo.PointLike]) -> float:
|
|
214
|
+
total_distance = 0.0
|
|
215
|
+
for cur, nxt in geo.pairwise(sequence):
|
|
216
|
+
total_distance += geo.gps_distance(
|
|
217
|
+
(cur.lat, cur.lon),
|
|
218
|
+
(nxt.lat, nxt.lon),
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
if sequence:
|
|
222
|
+
time_diff = sequence[-1].time - sequence[0].time
|
|
241
223
|
else:
|
|
242
|
-
|
|
224
|
+
time_diff = 0.0
|
|
243
225
|
|
|
226
|
+
if time_diff == 0.0:
|
|
227
|
+
return float("inf")
|
|
244
228
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
229
|
+
return total_distance / time_diff
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def _is_video_stationary(
|
|
233
|
+
sequence: T.Sequence[geo.PointLike], max_radius_in_meters: float
|
|
234
|
+
) -> bool:
|
|
235
|
+
if not sequence:
|
|
236
|
+
return 0.0 <= max_radius_in_meters
|
|
237
|
+
|
|
238
|
+
start = (sequence[0].lat, sequence[0].lon)
|
|
239
|
+
for p in sequence:
|
|
240
|
+
distance = geo.gps_distance(start, (p.lat, p.lon))
|
|
241
|
+
if distance > max_radius_in_meters:
|
|
242
|
+
return False
|
|
243
|
+
|
|
244
|
+
return True
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _check_video_limits(
|
|
248
|
+
video_metadatas: T.Sequence[types.VideoMetadata],
|
|
249
|
+
max_sequence_filesize_in_bytes: int,
|
|
250
|
+
max_avg_speed: float,
|
|
251
|
+
max_radius_for_stationary_check: float,
|
|
252
|
+
) -> T.Tuple[T.List[types.VideoMetadata], T.List[types.ErrorMetadata]]:
|
|
253
|
+
output_video_metadatas: T.List[types.VideoMetadata] = []
|
|
254
|
+
error_metadatas: T.List[types.ErrorMetadata] = []
|
|
255
|
+
|
|
256
|
+
for video_metadata in video_metadatas:
|
|
257
|
+
try:
|
|
258
|
+
is_stationary = _is_video_stationary(
|
|
259
|
+
video_metadata.points,
|
|
260
|
+
max_radius_in_meters=max_radius_for_stationary_check,
|
|
261
|
+
)
|
|
262
|
+
if is_stationary:
|
|
263
|
+
raise exceptions.MapillaryStationaryVideoError("Stationary video")
|
|
264
|
+
|
|
265
|
+
video_filesize = (
|
|
266
|
+
utils.get_file_size(video_metadata.filename)
|
|
267
|
+
if video_metadata.filesize is None
|
|
268
|
+
else video_metadata.filesize
|
|
269
|
+
)
|
|
270
|
+
if video_filesize > max_sequence_filesize_in_bytes:
|
|
271
|
+
raise exceptions.MapillaryFileTooLargeError(
|
|
272
|
+
f"Video file size exceeds the maximum allowed file size ({max_sequence_filesize_in_bytes} bytes)",
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
contains_null_island = any(
|
|
276
|
+
p.lat == 0 and p.lon == 0 for p in video_metadata.points
|
|
277
|
+
)
|
|
278
|
+
if contains_null_island:
|
|
279
|
+
raise exceptions.MapillaryNullIslandError(
|
|
280
|
+
"Found GPS coordinates in Null Island (0, 0)",
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
too_fast = (
|
|
284
|
+
len(video_metadata.points) >= 2
|
|
285
|
+
and _avg_speed(video_metadata.points) > max_avg_speed
|
|
286
|
+
)
|
|
287
|
+
if too_fast:
|
|
288
|
+
raise exceptions.MapillaryCaptureSpeedTooFastError(
|
|
289
|
+
f"Capture speed too fast (exceeds {round(max_avg_speed, 3)} m/s)",
|
|
290
|
+
)
|
|
291
|
+
except exceptions.MapillaryDescriptionError as ex:
|
|
292
|
+
error_metadatas.append(
|
|
293
|
+
types.describe_error_metadata(
|
|
294
|
+
exc=ex,
|
|
295
|
+
filename=video_metadata.filename,
|
|
296
|
+
filetype=video_metadata.filetype,
|
|
297
|
+
)
|
|
298
|
+
)
|
|
299
|
+
else:
|
|
300
|
+
output_video_metadatas.append(video_metadata)
|
|
301
|
+
|
|
302
|
+
LOG.info(
|
|
303
|
+
"Found %s videos and %s errors after video limit checks",
|
|
304
|
+
len(output_video_metadatas),
|
|
305
|
+
len(error_metadatas),
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
return output_video_metadatas, error_metadatas
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _check_sequences_by_limits(
|
|
312
|
+
input_sequences: T.Sequence[PointSequence],
|
|
313
|
+
max_sequence_filesize_in_bytes: int,
|
|
314
|
+
max_avg_speed: float,
|
|
315
|
+
) -> T.Tuple[T.List[PointSequence], T.List[types.ErrorMetadata]]:
|
|
316
|
+
output_sequences: T.List[PointSequence] = []
|
|
317
|
+
output_errors: T.List[types.ErrorMetadata] = []
|
|
318
|
+
|
|
319
|
+
for sequence in input_sequences:
|
|
320
|
+
sequence_filesize = sum(
|
|
321
|
+
utils.get_file_size(image.filename)
|
|
322
|
+
if image.filesize is None
|
|
323
|
+
else image.filesize
|
|
324
|
+
for image in sequence
|
|
256
325
|
)
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
326
|
+
|
|
327
|
+
try:
|
|
328
|
+
if sequence_filesize > max_sequence_filesize_in_bytes:
|
|
329
|
+
raise exceptions.MapillaryFileTooLargeError(
|
|
330
|
+
f"Sequence file size exceeds the maximum allowed file size ({max_sequence_filesize_in_bytes} bytes)",
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
contains_null_island = any(
|
|
334
|
+
image.lat == 0 and image.lon == 0 for image in sequence
|
|
335
|
+
)
|
|
336
|
+
if contains_null_island:
|
|
337
|
+
raise exceptions.MapillaryNullIslandError(
|
|
338
|
+
"Found GPS coordinates in Null Island (0, 0)",
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
too_fast = len(sequence) >= 2 and _avg_speed(sequence) > max_avg_speed
|
|
342
|
+
if too_fast:
|
|
343
|
+
raise exceptions.MapillaryCaptureSpeedTooFastError(
|
|
344
|
+
f"Capture speed too fast (exceeds {round(max_avg_speed, 3)} m/s)",
|
|
345
|
+
)
|
|
346
|
+
except exceptions.MapillaryDescriptionError as ex:
|
|
347
|
+
for image in sequence:
|
|
348
|
+
output_errors.append(
|
|
349
|
+
types.describe_error_metadata(
|
|
350
|
+
exc=ex,
|
|
351
|
+
filename=image.filename,
|
|
352
|
+
filetype=types.FileType.IMAGE,
|
|
353
|
+
)
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
else:
|
|
357
|
+
output_sequences.append(sequence)
|
|
358
|
+
|
|
359
|
+
assert sum(len(s) for s in output_sequences) + len(output_errors) == sum(
|
|
360
|
+
len(s) for s in input_sequences
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
LOG.info(
|
|
364
|
+
"Found %s sequences and %s errors after sequence limit checks",
|
|
365
|
+
len(output_sequences),
|
|
366
|
+
len(output_errors),
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
return output_sequences, output_errors
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def _group_by_folder_and_camera(
|
|
373
|
+
image_metadatas: T.List[types.ImageMetadata],
|
|
374
|
+
) -> T.List[T.List[types.ImageMetadata]]:
|
|
375
|
+
grouped = _group_by(
|
|
376
|
+
image_metadatas,
|
|
377
|
+
lambda metadata: (
|
|
378
|
+
str(metadata.filename.parent),
|
|
379
|
+
metadata.MAPDeviceMake,
|
|
380
|
+
metadata.MAPDeviceModel,
|
|
381
|
+
metadata.width,
|
|
382
|
+
metadata.height,
|
|
383
|
+
),
|
|
384
|
+
)
|
|
385
|
+
for key in grouped:
|
|
386
|
+
LOG.debug("Group sequences by %s: %s images", key, len(grouped[key]))
|
|
387
|
+
output_sequences = list(grouped.values())
|
|
388
|
+
|
|
389
|
+
LOG.info(
|
|
390
|
+
"Found %s sequences from different folders and cameras",
|
|
391
|
+
len(output_sequences),
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
return output_sequences
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def _split_sequences_by_cutoff_time(
|
|
398
|
+
input_sequences: T.List[PointSequence], cutoff_time: float
|
|
399
|
+
) -> T.List[PointSequence]:
|
|
400
|
+
def _should_split_by_cutoff_time(
|
|
401
|
+
prev: types.ImageMetadata, cur: types.ImageMetadata
|
|
402
|
+
) -> bool:
|
|
403
|
+
time_diff = cur.time - prev.time
|
|
404
|
+
assert 0 <= time_diff, "sequence must be sorted by capture times"
|
|
405
|
+
should = cutoff_time < time_diff
|
|
406
|
+
if should:
|
|
407
|
+
LOG.debug(
|
|
408
|
+
"Split because the capture time gap %s seconds exceeds cutoff_time (%s seconds): %s: %s -> %s",
|
|
409
|
+
round(time_diff, 2),
|
|
410
|
+
round(cutoff_time, 2),
|
|
411
|
+
prev.filename.parent,
|
|
412
|
+
prev.filename.name,
|
|
413
|
+
cur.filename.name,
|
|
414
|
+
)
|
|
415
|
+
return should
|
|
416
|
+
|
|
417
|
+
output_sequences = []
|
|
418
|
+
for sequence in input_sequences:
|
|
419
|
+
output_sequences.extend(
|
|
420
|
+
split_sequence_by(sequence, should_split=_should_split_by_cutoff_time)
|
|
260
421
|
)
|
|
261
422
|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
423
|
+
assert sum(len(s) for s in output_sequences) == sum(len(s) for s in input_sequences)
|
|
424
|
+
|
|
425
|
+
LOG.info(
|
|
426
|
+
"Found %s sequences after split by cutoff_time %d seconds",
|
|
427
|
+
len(output_sequences),
|
|
428
|
+
cutoff_time,
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
return output_sequences
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def _split_sequences_by_cutoff_distance(
|
|
435
|
+
input_sequences: T.List[PointSequence], cutoff_distance: float
|
|
436
|
+
) -> T.List[PointSequence]:
|
|
437
|
+
def _should_split_by_cutoff_distance(
|
|
438
|
+
prev: types.ImageMetadata, cur: types.ImageMetadata
|
|
439
|
+
) -> bool:
|
|
440
|
+
distance = geo.gps_distance(
|
|
441
|
+
(prev.lat, prev.lon),
|
|
442
|
+
(cur.lat, cur.lon),
|
|
443
|
+
)
|
|
444
|
+
should = cutoff_distance < distance
|
|
445
|
+
if should:
|
|
446
|
+
LOG.debug(
|
|
447
|
+
"Split because the distance gap %s meters exceeds cutoff_distance (%s meters): %s: %s -> %s",
|
|
448
|
+
round(distance, 2),
|
|
449
|
+
round(cutoff_distance, 2),
|
|
450
|
+
prev.filename.parent,
|
|
451
|
+
prev.filename.name,
|
|
452
|
+
cur.filename.name,
|
|
453
|
+
)
|
|
454
|
+
return should
|
|
455
|
+
|
|
456
|
+
output_sequences = []
|
|
457
|
+
for sequence in input_sequences:
|
|
458
|
+
output_sequences.extend(
|
|
459
|
+
split_sequence_by(sequence, _should_split_by_cutoff_distance)
|
|
267
460
|
)
|
|
268
461
|
|
|
462
|
+
assert sum(len(s) for s in output_sequences) == sum(len(s) for s in input_sequences)
|
|
463
|
+
|
|
464
|
+
LOG.info(
|
|
465
|
+
"Found %s sequences after split by cutoff_distance %d meters",
|
|
466
|
+
len(output_sequences),
|
|
467
|
+
cutoff_distance,
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
return output_sequences
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
def _check_sequences_duplication(
|
|
474
|
+
input_sequences: T.List[PointSequence],
|
|
475
|
+
duplicate_distance: float,
|
|
476
|
+
duplicate_angle: float,
|
|
477
|
+
) -> T.Tuple[T.List[PointSequence], T.List[types.ErrorMetadata]]:
|
|
478
|
+
output_sequences: T.List[PointSequence] = []
|
|
479
|
+
output_errors: T.List[types.ErrorMetadata] = []
|
|
480
|
+
|
|
481
|
+
for sequence in input_sequences:
|
|
482
|
+
output_sequence, errors = duplication_check(
|
|
483
|
+
sequence,
|
|
484
|
+
max_duplicate_distance=duplicate_distance,
|
|
485
|
+
max_duplicate_angle=duplicate_angle,
|
|
486
|
+
)
|
|
487
|
+
assert len(sequence) == len(output_sequence) + len(errors)
|
|
488
|
+
output_sequences.append(output_sequence)
|
|
489
|
+
output_errors.extend(errors)
|
|
490
|
+
|
|
491
|
+
assert sum(len(s) for s in output_sequences) + len(output_errors) == sum(
|
|
492
|
+
len(s) for s in input_sequences
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
LOG.info(
|
|
496
|
+
"Found %s sequences and %s errors after duplication check",
|
|
497
|
+
len(output_sequences),
|
|
498
|
+
len(output_errors),
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
return output_sequences, output_errors
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
def _split_sequences_by_limits(
|
|
505
|
+
input_sequences: T.List[PointSequence],
|
|
506
|
+
max_sequence_filesize_in_bytes: float,
|
|
507
|
+
max_sequence_pixels: float,
|
|
508
|
+
) -> T.List[PointSequence]:
|
|
509
|
+
max_sequence_images = constants.MAX_SEQUENCE_LENGTH
|
|
510
|
+
max_sequence_filesize = max_sequence_filesize_in_bytes
|
|
511
|
+
|
|
512
|
+
def _should_split(image: types.ImageMetadata, sequence_state: T.Dict) -> bool:
|
|
513
|
+
last_sequence_images = sequence_state.get("last_sequence_images", 0)
|
|
514
|
+
last_sequence_file_size = sequence_state.get("last_sequence_file_size", 0)
|
|
515
|
+
last_sequence_pixels = sequence_state.get("last_sequence_pixels", 0)
|
|
516
|
+
|
|
517
|
+
# decent default values if width/height not available
|
|
518
|
+
width = 1024 if image.width is None else image.width
|
|
519
|
+
height = 1024 if image.height is None else image.height
|
|
520
|
+
pixels = width * height
|
|
521
|
+
|
|
522
|
+
if image.filesize is None:
|
|
523
|
+
filesize = os.path.getsize(image.filename)
|
|
524
|
+
else:
|
|
525
|
+
filesize = image.filesize
|
|
526
|
+
|
|
527
|
+
new_sequence_images = last_sequence_images + 1
|
|
528
|
+
new_sequence_file_size = last_sequence_file_size + filesize
|
|
529
|
+
new_sequence_pixels = last_sequence_pixels + pixels
|
|
530
|
+
|
|
531
|
+
if max_sequence_images < new_sequence_images:
|
|
532
|
+
LOG.debug(
|
|
533
|
+
"Split because the current sequence (%s) reaches the max number of images (%s)",
|
|
534
|
+
new_sequence_images,
|
|
535
|
+
max_sequence_images,
|
|
536
|
+
)
|
|
537
|
+
start_new_sequence = True
|
|
538
|
+
elif max_sequence_filesize < new_sequence_file_size:
|
|
539
|
+
LOG.debug(
|
|
540
|
+
"Split because the current sequence (%s) reaches the max filesize (%s)",
|
|
541
|
+
new_sequence_file_size,
|
|
542
|
+
max_sequence_filesize,
|
|
543
|
+
)
|
|
544
|
+
start_new_sequence = True
|
|
545
|
+
elif max_sequence_pixels < new_sequence_pixels:
|
|
546
|
+
LOG.debug(
|
|
547
|
+
"Split because the current sequence (%s) reaches the max pixels (%s)",
|
|
548
|
+
new_sequence_pixels,
|
|
549
|
+
max_sequence_pixels,
|
|
550
|
+
)
|
|
551
|
+
start_new_sequence = True
|
|
552
|
+
else:
|
|
553
|
+
start_new_sequence = False
|
|
554
|
+
|
|
555
|
+
if not start_new_sequence:
|
|
556
|
+
sequence_state["last_sequence_images"] = new_sequence_images
|
|
557
|
+
sequence_state["last_sequence_file_size"] = new_sequence_file_size
|
|
558
|
+
sequence_state["last_sequence_pixels"] = new_sequence_pixels
|
|
559
|
+
else:
|
|
560
|
+
sequence_state["last_sequence_images"] = 1
|
|
561
|
+
sequence_state["last_sequence_file_size"] = filesize
|
|
562
|
+
sequence_state["last_sequence_pixels"] = pixels
|
|
563
|
+
|
|
564
|
+
return start_new_sequence
|
|
565
|
+
|
|
566
|
+
output_sequences = []
|
|
567
|
+
for sequence in input_sequences:
|
|
568
|
+
output_sequences.extend(
|
|
569
|
+
split_sequence_by_agg(
|
|
570
|
+
sequence, should_split_with_sequence_state=_should_split
|
|
571
|
+
)
|
|
572
|
+
)
|
|
573
|
+
|
|
574
|
+
assert sum(len(s) for s in output_sequences) == sum(len(s) for s in input_sequences)
|
|
575
|
+
|
|
576
|
+
LOG.info("Found %s sequences after split by sequence limits", len(output_sequences))
|
|
577
|
+
|
|
578
|
+
return output_sequences
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
def process_sequence_properties(
|
|
582
|
+
metadatas: T.Sequence[types.MetadataOrError],
|
|
583
|
+
cutoff_distance: float = constants.CUTOFF_DISTANCE,
|
|
584
|
+
cutoff_time: float = constants.CUTOFF_TIME,
|
|
585
|
+
interpolate_directions: bool = False,
|
|
586
|
+
duplicate_distance: float = constants.DUPLICATE_DISTANCE,
|
|
587
|
+
duplicate_angle: float = constants.DUPLICATE_ANGLE,
|
|
588
|
+
max_avg_speed: float = constants.MAX_AVG_SPEED,
|
|
589
|
+
) -> T.List[types.MetadataOrError]:
|
|
590
|
+
max_sequence_filesize_in_bytes = _parse_filesize_in_bytes(
|
|
591
|
+
constants.MAX_SEQUENCE_FILESIZE
|
|
592
|
+
)
|
|
593
|
+
max_sequence_pixels = _parse_pixels(constants.MAX_SEQUENCE_PIXELS)
|
|
594
|
+
|
|
269
595
|
error_metadatas: T.List[types.ErrorMetadata] = []
|
|
270
596
|
image_metadatas: T.List[types.ImageMetadata] = []
|
|
271
597
|
video_metadatas: T.List[types.VideoMetadata] = []
|
|
@@ -280,66 +606,92 @@ def process_sequence_properties(
|
|
|
280
606
|
else:
|
|
281
607
|
raise RuntimeError(f"invalid metadata type: {metadata}")
|
|
282
608
|
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
609
|
+
if video_metadatas:
|
|
610
|
+
# Check limits for videos
|
|
611
|
+
video_metadatas, video_error_metadatas = _check_video_limits(
|
|
612
|
+
video_metadatas,
|
|
613
|
+
max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
|
|
614
|
+
max_avg_speed=max_avg_speed,
|
|
615
|
+
max_radius_for_stationary_check=10.0,
|
|
616
|
+
)
|
|
617
|
+
error_metadatas.extend(video_error_metadatas)
|
|
288
618
|
|
|
289
|
-
|
|
290
|
-
|
|
619
|
+
if image_metadatas:
|
|
620
|
+
sequences: T.List[PointSequence]
|
|
291
621
|
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
for sequence in sequences_by_folder:
|
|
295
|
-
cut = cut_sequence_by_time_distance(sequence, cutoff_distance, cutoff_time)
|
|
296
|
-
sequences_after_cut.extend(cut)
|
|
297
|
-
assert len(image_metadatas) == sum(len(s) for s in sequences_after_cut)
|
|
622
|
+
# Group by folder and camera
|
|
623
|
+
sequences = _group_by_folder_and_camera(image_metadatas)
|
|
298
624
|
|
|
299
|
-
|
|
300
|
-
|
|
625
|
+
# Make sure each sequence is sorted (in-place update)
|
|
626
|
+
for sequence in sequences:
|
|
627
|
+
sequence.sort(
|
|
628
|
+
key=lambda metadata: metadata.sort_key(),
|
|
629
|
+
)
|
|
301
630
|
|
|
302
|
-
|
|
631
|
+
# Interpolate subseconds for same timestamps (in-place update)
|
|
632
|
+
for sequence in sequences:
|
|
633
|
+
_interpolate_subsecs_for_sorting(sequence)
|
|
303
634
|
|
|
304
|
-
|
|
305
|
-
#
|
|
306
|
-
|
|
307
|
-
|
|
635
|
+
# Split sequences by cutoff time
|
|
636
|
+
# NOTE: Do not split by distance here because it affects the speed limit check
|
|
637
|
+
sequences = _split_sequences_by_cutoff_time(sequences, cutoff_time=cutoff_time)
|
|
638
|
+
|
|
639
|
+
# Duplication check
|
|
640
|
+
sequences, errors = _check_sequences_duplication(
|
|
641
|
+
sequences,
|
|
308
642
|
duplicate_distance=duplicate_distance,
|
|
309
643
|
duplicate_angle=duplicate_angle,
|
|
310
644
|
)
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
#
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
645
|
+
error_metadatas.extend(errors)
|
|
646
|
+
|
|
647
|
+
# Interpolate angles (in-place update)
|
|
648
|
+
for sequence in sequences:
|
|
649
|
+
if interpolate_directions:
|
|
650
|
+
for image in sequence:
|
|
651
|
+
image.angle = None
|
|
652
|
+
geo.interpolate_directions_if_none(sequence)
|
|
653
|
+
|
|
654
|
+
# Split sequences by max number of images, max filesize, and max pixels
|
|
655
|
+
sequences = _split_sequences_by_limits(
|
|
656
|
+
sequences,
|
|
657
|
+
max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
|
|
658
|
+
max_sequence_pixels=max_sequence_pixels,
|
|
659
|
+
)
|
|
660
|
+
|
|
661
|
+
# Check limits for sequences
|
|
662
|
+
sequences, errors = _check_sequences_by_limits(
|
|
663
|
+
sequences,
|
|
664
|
+
max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
|
|
665
|
+
max_avg_speed=max_avg_speed,
|
|
666
|
+
)
|
|
667
|
+
error_metadatas.extend(errors)
|
|
668
|
+
|
|
669
|
+
# Split sequences by cutoff distance
|
|
670
|
+
# NOTE: The speed limit check probably rejects most of anomalies
|
|
671
|
+
sequences = _split_sequences_by_cutoff_distance(
|
|
672
|
+
sequences, cutoff_distance=cutoff_distance
|
|
326
673
|
)
|
|
327
674
|
|
|
328
|
-
#
|
|
329
|
-
|
|
330
|
-
|
|
675
|
+
# Assign sequence UUIDs (in-place update)
|
|
676
|
+
sequence_idx = 0
|
|
677
|
+
for sequence in sequences:
|
|
678
|
+
for image in sequence:
|
|
331
679
|
# using incremental id as shorter "uuid", so we can save some space for the desc file
|
|
332
|
-
|
|
333
|
-
image_metadatas.append(p)
|
|
680
|
+
image.MAPSequenceUUID = str(sequence_idx)
|
|
334
681
|
sequence_idx += 1
|
|
335
682
|
|
|
683
|
+
image_metadatas = []
|
|
684
|
+
for sequence in sequences:
|
|
685
|
+
image_metadatas.extend(sequence)
|
|
686
|
+
|
|
687
|
+
assert sequence_idx == len(
|
|
688
|
+
set(metadata.MAPSequenceUUID for metadata in image_metadatas)
|
|
689
|
+
)
|
|
690
|
+
|
|
336
691
|
results = error_metadatas + image_metadatas + video_metadatas
|
|
337
692
|
|
|
338
693
|
assert len(metadatas) == len(results), (
|
|
339
694
|
f"expected {len(metadatas)} results but got {len(results)}"
|
|
340
695
|
)
|
|
341
|
-
assert sequence_idx == len(
|
|
342
|
-
set(metadata.MAPSequenceUUID for metadata in image_metadatas)
|
|
343
|
-
)
|
|
344
696
|
|
|
345
697
|
return results
|