mapillary-tools 0.13.3a1__py3-none-any.whl → 0.14.0a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mapillary_tools/__init__.py +1 -1
- mapillary_tools/api_v4.py +237 -16
- mapillary_tools/authenticate.py +325 -64
- mapillary_tools/{geotag/blackvue_parser.py → blackvue_parser.py} +74 -54
- mapillary_tools/camm/camm_builder.py +55 -97
- mapillary_tools/camm/camm_parser.py +429 -181
- mapillary_tools/commands/__main__.py +12 -6
- mapillary_tools/commands/authenticate.py +8 -1
- mapillary_tools/commands/process.py +27 -51
- mapillary_tools/commands/process_and_upload.py +19 -5
- mapillary_tools/commands/sample_video.py +2 -3
- mapillary_tools/commands/upload.py +18 -9
- mapillary_tools/commands/video_process_and_upload.py +19 -5
- mapillary_tools/config.py +31 -13
- mapillary_tools/constants.py +47 -6
- mapillary_tools/exceptions.py +34 -35
- mapillary_tools/exif_read.py +221 -116
- mapillary_tools/exif_write.py +7 -7
- mapillary_tools/exiftool_read.py +33 -42
- mapillary_tools/exiftool_read_video.py +46 -33
- mapillary_tools/exiftool_runner.py +77 -0
- mapillary_tools/ffmpeg.py +24 -23
- mapillary_tools/geo.py +144 -120
- mapillary_tools/geotag/base.py +147 -0
- mapillary_tools/geotag/factory.py +291 -0
- mapillary_tools/geotag/geotag_images_from_exif.py +14 -131
- mapillary_tools/geotag/geotag_images_from_exiftool.py +126 -82
- mapillary_tools/geotag/geotag_images_from_gpx.py +53 -118
- mapillary_tools/geotag/geotag_images_from_gpx_file.py +13 -126
- mapillary_tools/geotag/geotag_images_from_nmea_file.py +4 -5
- mapillary_tools/geotag/geotag_images_from_video.py +53 -51
- mapillary_tools/geotag/geotag_videos_from_exiftool.py +97 -0
- mapillary_tools/geotag/geotag_videos_from_gpx.py +39 -0
- mapillary_tools/geotag/geotag_videos_from_video.py +20 -185
- mapillary_tools/geotag/image_extractors/base.py +18 -0
- mapillary_tools/geotag/image_extractors/exif.py +60 -0
- mapillary_tools/geotag/image_extractors/exiftool.py +18 -0
- mapillary_tools/geotag/options.py +160 -0
- mapillary_tools/geotag/utils.py +52 -16
- mapillary_tools/geotag/video_extractors/base.py +18 -0
- mapillary_tools/geotag/video_extractors/exiftool.py +70 -0
- mapillary_tools/{video_data_extraction/extractors/gpx_parser.py → geotag/video_extractors/gpx.py} +57 -39
- mapillary_tools/geotag/video_extractors/native.py +157 -0
- mapillary_tools/{geotag → gpmf}/gpmf_parser.py +205 -182
- mapillary_tools/{geotag → gpmf}/gps_filter.py +5 -3
- mapillary_tools/history.py +7 -13
- mapillary_tools/mp4/construct_mp4_parser.py +9 -8
- mapillary_tools/mp4/io_utils.py +0 -1
- mapillary_tools/mp4/mp4_sample_parser.py +36 -28
- mapillary_tools/mp4/simple_mp4_builder.py +10 -9
- mapillary_tools/mp4/simple_mp4_parser.py +13 -22
- mapillary_tools/process_geotag_properties.py +155 -392
- mapillary_tools/process_sequence_properties.py +562 -208
- mapillary_tools/sample_video.py +13 -20
- mapillary_tools/telemetry.py +26 -13
- mapillary_tools/types.py +111 -58
- mapillary_tools/upload.py +316 -298
- mapillary_tools/upload_api_v4.py +55 -122
- mapillary_tools/uploader.py +396 -254
- mapillary_tools/utils.py +42 -18
- {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info}/METADATA +3 -2
- mapillary_tools-0.14.0a2.dist-info/RECORD +72 -0
- {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info}/WHEEL +1 -1
- mapillary_tools/geotag/__init__.py +0 -1
- mapillary_tools/geotag/geotag_from_generic.py +0 -22
- mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +0 -93
- mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +0 -145
- mapillary_tools/video_data_extraction/cli_options.py +0 -22
- mapillary_tools/video_data_extraction/extract_video_data.py +0 -176
- mapillary_tools/video_data_extraction/extractors/base_parser.py +0 -75
- mapillary_tools/video_data_extraction/extractors/blackvue_parser.py +0 -34
- mapillary_tools/video_data_extraction/extractors/camm_parser.py +0 -38
- mapillary_tools/video_data_extraction/extractors/exiftool_runtime_parser.py +0 -71
- mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py +0 -53
- mapillary_tools/video_data_extraction/extractors/generic_video_parser.py +0 -52
- mapillary_tools/video_data_extraction/extractors/gopro_parser.py +0 -43
- mapillary_tools/video_data_extraction/extractors/nmea_parser.py +0 -24
- mapillary_tools/video_data_extraction/video_data_parser_factory.py +0 -39
- mapillary_tools-0.13.3a1.dist-info/RECORD +0 -75
- /mapillary_tools/{geotag → gpmf}/gpmf_gps_filter.py +0 -0
- {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info}/entry_points.txt +0 -0
- {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info/licenses}/LICENSE +0 -0
- {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info}/top_level.txt +0 -0
|
@@ -1,64 +1,96 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import itertools
|
|
2
4
|
import logging
|
|
3
5
|
import math
|
|
4
6
|
import os
|
|
5
7
|
import typing as T
|
|
6
8
|
|
|
7
|
-
from . import constants, geo, types
|
|
8
|
-
from .exceptions import MapillaryBadParameterError, MapillaryDuplicationError
|
|
9
|
+
from . import constants, exceptions, geo, types, utils
|
|
9
10
|
|
|
10
11
|
LOG = logging.getLogger(__name__)
|
|
11
12
|
|
|
12
13
|
|
|
13
|
-
|
|
14
|
-
PointSequence = T.List[
|
|
14
|
+
SeqItem = T.TypeVar("SeqItem")
|
|
15
|
+
PointSequence = T.List[geo.PointLike]
|
|
15
16
|
|
|
16
17
|
|
|
17
|
-
def
|
|
18
|
-
sequence:
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
sequences
|
|
18
|
+
def split_sequence_by(
|
|
19
|
+
sequence: T.Sequence[SeqItem],
|
|
20
|
+
should_split: T.Callable[[SeqItem, SeqItem], bool],
|
|
21
|
+
) -> list[list[SeqItem]]:
|
|
22
|
+
"""
|
|
23
|
+
Split a sequence into multiple sequences by should_split(prev, cur) => True
|
|
24
|
+
"""
|
|
25
|
+
output_sequences: list[list[SeqItem]] = []
|
|
23
26
|
|
|
24
|
-
|
|
25
|
-
|
|
27
|
+
seq = iter(sequence)
|
|
28
|
+
|
|
29
|
+
prev = next(seq, None)
|
|
30
|
+
if prev is None:
|
|
31
|
+
return output_sequences
|
|
32
|
+
|
|
33
|
+
output_sequences.append([prev])
|
|
26
34
|
|
|
27
|
-
for
|
|
35
|
+
for cur in seq:
|
|
28
36
|
# invariant: prev is processed
|
|
29
|
-
|
|
30
|
-
(
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
sequences.append([cur])
|
|
35
|
-
continue
|
|
36
|
-
time_diff = cur.time - prev.time
|
|
37
|
-
assert 0 <= time_diff, "sequence must be sorted by capture times"
|
|
38
|
-
if cutoff_time <= time_diff:
|
|
39
|
-
sequences.append([cur])
|
|
40
|
-
continue
|
|
41
|
-
sequences[-1].append(cur)
|
|
37
|
+
if should_split(prev, cur):
|
|
38
|
+
output_sequences.append([cur])
|
|
39
|
+
else:
|
|
40
|
+
output_sequences[-1].append(cur)
|
|
41
|
+
prev = cur
|
|
42
42
|
# invariant: cur is processed
|
|
43
43
|
|
|
44
|
-
|
|
44
|
+
assert sum(len(s) for s in output_sequences) == len(sequence)
|
|
45
|
+
|
|
46
|
+
return output_sequences
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def split_sequence_by_agg(
|
|
50
|
+
sequence: T.Sequence[SeqItem],
|
|
51
|
+
should_split_with_sequence_state: T.Callable[[SeqItem, dict], bool],
|
|
52
|
+
) -> list[list[SeqItem]]:
|
|
53
|
+
"""
|
|
54
|
+
Split a sequence by should_split_with_sequence_state(cur, sequence_state) => True
|
|
55
|
+
"""
|
|
56
|
+
output_sequences: list[list[SeqItem]] = []
|
|
57
|
+
sequence_state: dict = {}
|
|
58
|
+
|
|
59
|
+
for cur in sequence:
|
|
60
|
+
start_new_sequence = should_split_with_sequence_state(cur, sequence_state)
|
|
61
|
+
|
|
62
|
+
if not output_sequences:
|
|
63
|
+
output_sequences.append([])
|
|
64
|
+
|
|
65
|
+
if start_new_sequence:
|
|
66
|
+
# DO NOT reset the state because it contains the information of current item
|
|
67
|
+
# sequence_state = {}
|
|
68
|
+
if output_sequences[-1]:
|
|
69
|
+
output_sequences.append([])
|
|
70
|
+
|
|
71
|
+
output_sequences[-1].append(cur)
|
|
72
|
+
|
|
73
|
+
assert sum(len(s) for s in output_sequences) == len(sequence)
|
|
74
|
+
|
|
75
|
+
return output_sequences
|
|
45
76
|
|
|
46
77
|
|
|
47
78
|
def duplication_check(
|
|
48
79
|
sequence: PointSequence,
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
) ->
|
|
80
|
+
max_duplicate_distance: float,
|
|
81
|
+
max_duplicate_angle: float,
|
|
82
|
+
) -> tuple[PointSequence, list[types.ErrorMetadata]]:
|
|
52
83
|
dedups: PointSequence = []
|
|
53
|
-
dups:
|
|
84
|
+
dups: list[types.ErrorMetadata] = []
|
|
54
85
|
|
|
55
|
-
|
|
56
|
-
prev = next(
|
|
86
|
+
it = iter(sequence)
|
|
87
|
+
prev = next(it)
|
|
57
88
|
if prev is None:
|
|
58
89
|
return dedups, dups
|
|
90
|
+
|
|
59
91
|
dedups.append(prev)
|
|
60
92
|
|
|
61
|
-
for cur in
|
|
93
|
+
for cur in it:
|
|
62
94
|
# invariant: prev is processed
|
|
63
95
|
distance = geo.gps_distance(
|
|
64
96
|
(prev.lat, prev.lon),
|
|
@@ -70,21 +102,21 @@ def duplication_check(
|
|
|
70
102
|
else:
|
|
71
103
|
angle_diff = None
|
|
72
104
|
|
|
73
|
-
if distance <=
|
|
74
|
-
angle_diff is
|
|
105
|
+
if distance <= max_duplicate_distance and (
|
|
106
|
+
angle_diff is None or angle_diff <= max_duplicate_angle
|
|
75
107
|
):
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
),
|
|
84
|
-
cur.filename,
|
|
85
|
-
filetype=types.FileType.IMAGE,
|
|
108
|
+
msg = f"Duplicate of its previous image in terms of distance <= {max_duplicate_distance} and angle <= {max_duplicate_angle}"
|
|
109
|
+
dup = types.describe_error_metadata(
|
|
110
|
+
exceptions.MapillaryDuplicationError(
|
|
111
|
+
msg,
|
|
112
|
+
types.as_desc(cur),
|
|
113
|
+
distance=distance,
|
|
114
|
+
angle_diff=angle_diff,
|
|
86
115
|
),
|
|
116
|
+
cur.filename,
|
|
117
|
+
filetype=types.FileType.IMAGE,
|
|
87
118
|
)
|
|
119
|
+
dups.append(dup)
|
|
88
120
|
# prev does not change
|
|
89
121
|
else:
|
|
90
122
|
dedups.append(cur)
|
|
@@ -94,86 +126,14 @@ def duplication_check(
|
|
|
94
126
|
return dedups, dups
|
|
95
127
|
|
|
96
128
|
|
|
97
|
-
def
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
"""
|
|
106
|
-
sequences: T.List[T.List[types.ImageMetadata]] = []
|
|
107
|
-
last_sequence_file_size = 0
|
|
108
|
-
last_sequence_pixels = 0
|
|
109
|
-
|
|
110
|
-
for image in sequence:
|
|
111
|
-
# decent default values if width/height not available
|
|
112
|
-
width = 1024 if image.width is None else image.width
|
|
113
|
-
height = 1024 if image.height is None else image.height
|
|
114
|
-
|
|
115
|
-
filesize = os.path.getsize(image.filename)
|
|
116
|
-
|
|
117
|
-
if len(sequences) == 0:
|
|
118
|
-
start_new_sequence = True
|
|
119
|
-
else:
|
|
120
|
-
if sequences[-1]:
|
|
121
|
-
if max_images < len(sequences[-1]):
|
|
122
|
-
LOG.debug(
|
|
123
|
-
"Cut the sequence because the current sequence (%s) reaches the max number of images (%s)",
|
|
124
|
-
len(sequences[-1]),
|
|
125
|
-
max_images,
|
|
126
|
-
)
|
|
127
|
-
start_new_sequence = True
|
|
128
|
-
elif max_sequence_filesize < last_sequence_file_size + filesize:
|
|
129
|
-
LOG.debug(
|
|
130
|
-
"Cut the sequence because the current sequence (%s) reaches the max filesize (%s)",
|
|
131
|
-
last_sequence_file_size + filesize,
|
|
132
|
-
max_sequence_filesize,
|
|
133
|
-
)
|
|
134
|
-
start_new_sequence = True
|
|
135
|
-
elif max_sequence_pixels < last_sequence_pixels + width * height:
|
|
136
|
-
LOG.debug(
|
|
137
|
-
"Cut the sequence because the current sequence (%s) reaches the max pixels (%s)",
|
|
138
|
-
last_sequence_pixels + width * height,
|
|
139
|
-
max_sequence_pixels,
|
|
140
|
-
)
|
|
141
|
-
start_new_sequence = True
|
|
142
|
-
else:
|
|
143
|
-
start_new_sequence = False
|
|
144
|
-
else:
|
|
145
|
-
start_new_sequence = False
|
|
146
|
-
|
|
147
|
-
if start_new_sequence:
|
|
148
|
-
sequences.append([])
|
|
149
|
-
last_sequence_file_size = 0
|
|
150
|
-
last_sequence_pixels = 0
|
|
151
|
-
|
|
152
|
-
sequences[-1].append(image)
|
|
153
|
-
last_sequence_file_size += filesize
|
|
154
|
-
last_sequence_pixels += width * height
|
|
155
|
-
|
|
156
|
-
assert sum(len(s) for s in sequences) == len(sequence)
|
|
157
|
-
|
|
158
|
-
return sequences
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
def _group_sort_images_by_folder(
|
|
162
|
-
image_metadatas: T.List[types.ImageMetadata],
|
|
163
|
-
) -> T.List[T.List[types.ImageMetadata]]:
|
|
164
|
-
# group images by parent directory
|
|
165
|
-
sequences_by_parent: T.Dict[str, T.List[types.ImageMetadata]] = {}
|
|
166
|
-
for image_metadata in image_metadatas:
|
|
167
|
-
filename = image_metadata.filename.resolve()
|
|
168
|
-
sequences_by_parent.setdefault(str(filename.parent), []).append(image_metadata)
|
|
169
|
-
|
|
170
|
-
sequences = list(sequences_by_parent.values())
|
|
171
|
-
for sequence in sequences:
|
|
172
|
-
sequence.sort(
|
|
173
|
-
key=lambda metadata: metadata.sort_key(),
|
|
174
|
-
)
|
|
175
|
-
|
|
176
|
-
return sequences
|
|
129
|
+
def _group_by(
|
|
130
|
+
image_metadatas: T.Iterable[types.ImageMetadata],
|
|
131
|
+
group_key_func=T.Callable[[types.ImageMetadata], T.Hashable],
|
|
132
|
+
) -> dict[T.Hashable, list[types.ImageMetadata]]:
|
|
133
|
+
grouped: dict[T.Hashable, list[types.ImageMetadata]] = {}
|
|
134
|
+
for metadata in image_metadatas:
|
|
135
|
+
grouped.setdefault(group_key_func(metadata), []).append(metadata)
|
|
136
|
+
return grouped
|
|
177
137
|
|
|
178
138
|
|
|
179
139
|
def _interpolate_subsecs_for_sorting(sequence: PointSequence) -> None:
|
|
@@ -217,58 +177,426 @@ def _interpolate_subsecs_for_sorting(sequence: PointSequence) -> None:
|
|
|
217
177
|
def _parse_filesize_in_bytes(filesize_str: str) -> int:
|
|
218
178
|
filesize_str = filesize_str.strip().upper()
|
|
219
179
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
180
|
+
try:
|
|
181
|
+
if filesize_str.endswith("B"):
|
|
182
|
+
return int(filesize_str[:-1])
|
|
183
|
+
elif filesize_str.endswith("K"):
|
|
184
|
+
return int(filesize_str[:-1]) * 1024
|
|
185
|
+
elif filesize_str.endswith("M"):
|
|
186
|
+
return int(filesize_str[:-1]) * 1024 * 1024
|
|
187
|
+
elif filesize_str.endswith("G"):
|
|
188
|
+
return int(filesize_str[:-1]) * 1024 * 1024 * 1024
|
|
189
|
+
else:
|
|
190
|
+
return int(filesize_str)
|
|
191
|
+
except ValueError:
|
|
192
|
+
raise exceptions.MapillaryBadParameterError(
|
|
193
|
+
f"Expect valid file size that ends with B, K, M, or G, but got {filesize_str}"
|
|
194
|
+
)
|
|
230
195
|
|
|
231
196
|
|
|
232
197
|
def _parse_pixels(pixels_str: str) -> int:
|
|
233
198
|
pixels_str = pixels_str.strip().upper()
|
|
234
199
|
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
200
|
+
try:
|
|
201
|
+
if pixels_str.endswith("K"):
|
|
202
|
+
return int(pixels_str[:-1]) * 1000
|
|
203
|
+
elif pixels_str.endswith("M"):
|
|
204
|
+
return int(pixels_str[:-1]) * 1000 * 1000
|
|
205
|
+
elif pixels_str.endswith("G"):
|
|
206
|
+
return int(pixels_str[:-1]) * 1000 * 1000 * 1000
|
|
207
|
+
else:
|
|
208
|
+
return int(pixels_str)
|
|
209
|
+
except ValueError:
|
|
210
|
+
raise exceptions.MapillaryBadParameterError(
|
|
211
|
+
f"Expect valid number of pixels that ends with K, M, or G, but got {pixels_str}"
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def _avg_speed(sequence: T.Sequence[geo.PointLike]) -> float:
|
|
216
|
+
total_distance = 0.0
|
|
217
|
+
for cur, nxt in geo.pairwise(sequence):
|
|
218
|
+
total_distance += geo.gps_distance(
|
|
219
|
+
(cur.lat, cur.lon),
|
|
220
|
+
(nxt.lat, nxt.lon),
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
if sequence:
|
|
224
|
+
time_diff = sequence[-1].time - sequence[0].time
|
|
241
225
|
else:
|
|
242
|
-
|
|
226
|
+
time_diff = 0.0
|
|
243
227
|
|
|
228
|
+
if time_diff == 0.0:
|
|
229
|
+
return float("inf")
|
|
244
230
|
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
231
|
+
return total_distance / time_diff
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _is_video_stationary(
|
|
235
|
+
sequence: T.Sequence[geo.PointLike], max_radius_in_meters: float
|
|
236
|
+
) -> bool:
|
|
237
|
+
if not sequence:
|
|
238
|
+
return 0.0 <= max_radius_in_meters
|
|
239
|
+
|
|
240
|
+
start = (sequence[0].lat, sequence[0].lon)
|
|
241
|
+
for p in sequence:
|
|
242
|
+
distance = geo.gps_distance(start, (p.lat, p.lon))
|
|
243
|
+
if distance > max_radius_in_meters:
|
|
244
|
+
return False
|
|
245
|
+
|
|
246
|
+
return True
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _check_video_limits(
|
|
250
|
+
video_metadatas: T.Iterable[types.VideoMetadata],
|
|
251
|
+
max_sequence_filesize_in_bytes: int,
|
|
252
|
+
max_avg_speed: float,
|
|
253
|
+
max_radius_for_stationary_check: float,
|
|
254
|
+
) -> tuple[list[types.VideoMetadata], list[types.ErrorMetadata]]:
|
|
255
|
+
output_video_metadatas: list[types.VideoMetadata] = []
|
|
256
|
+
error_metadatas: list[types.ErrorMetadata] = []
|
|
257
|
+
|
|
258
|
+
for video_metadata in video_metadatas:
|
|
259
|
+
try:
|
|
260
|
+
is_stationary = _is_video_stationary(
|
|
261
|
+
video_metadata.points,
|
|
262
|
+
max_radius_in_meters=max_radius_for_stationary_check,
|
|
263
|
+
)
|
|
264
|
+
if is_stationary:
|
|
265
|
+
raise exceptions.MapillaryStationaryVideoError("Stationary video")
|
|
266
|
+
|
|
267
|
+
video_filesize = (
|
|
268
|
+
utils.get_file_size(video_metadata.filename)
|
|
269
|
+
if video_metadata.filesize is None
|
|
270
|
+
else video_metadata.filesize
|
|
271
|
+
)
|
|
272
|
+
if video_filesize > max_sequence_filesize_in_bytes:
|
|
273
|
+
raise exceptions.MapillaryFileTooLargeError(
|
|
274
|
+
f"Video file size exceeds the maximum allowed file size ({max_sequence_filesize_in_bytes} bytes)",
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
contains_null_island = any(
|
|
278
|
+
p.lat == 0 and p.lon == 0 for p in video_metadata.points
|
|
279
|
+
)
|
|
280
|
+
if contains_null_island:
|
|
281
|
+
raise exceptions.MapillaryNullIslandError(
|
|
282
|
+
"Found GPS coordinates in Null Island (0, 0)",
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
too_fast = (
|
|
286
|
+
len(video_metadata.points) >= 2
|
|
287
|
+
and _avg_speed(video_metadata.points) > max_avg_speed
|
|
288
|
+
)
|
|
289
|
+
if too_fast:
|
|
290
|
+
raise exceptions.MapillaryCaptureSpeedTooFastError(
|
|
291
|
+
f"Capture speed too fast (exceeds {round(max_avg_speed, 3)} m/s)",
|
|
292
|
+
)
|
|
293
|
+
except exceptions.MapillaryDescriptionError as ex:
|
|
294
|
+
error_metadatas.append(
|
|
295
|
+
types.describe_error_metadata(
|
|
296
|
+
exc=ex,
|
|
297
|
+
filename=video_metadata.filename,
|
|
298
|
+
filetype=video_metadata.filetype,
|
|
299
|
+
)
|
|
300
|
+
)
|
|
301
|
+
else:
|
|
302
|
+
output_video_metadatas.append(video_metadata)
|
|
303
|
+
|
|
304
|
+
LOG.info(
|
|
305
|
+
"Found %s videos and %s errors after video limit checks",
|
|
306
|
+
len(output_video_metadatas),
|
|
307
|
+
len(error_metadatas),
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
return output_video_metadatas, error_metadatas
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def _check_sequences_by_limits(
|
|
314
|
+
input_sequences: T.Sequence[PointSequence],
|
|
315
|
+
max_sequence_filesize_in_bytes: int,
|
|
316
|
+
max_avg_speed: float,
|
|
317
|
+
) -> tuple[list[PointSequence], list[types.ErrorMetadata]]:
|
|
318
|
+
output_sequences: list[PointSequence] = []
|
|
319
|
+
output_errors: list[types.ErrorMetadata] = []
|
|
320
|
+
|
|
321
|
+
for sequence in input_sequences:
|
|
322
|
+
sequence_filesize = sum(
|
|
323
|
+
utils.get_file_size(image.filename)
|
|
324
|
+
if image.filesize is None
|
|
325
|
+
else image.filesize
|
|
326
|
+
for image in sequence
|
|
256
327
|
)
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
328
|
+
|
|
329
|
+
try:
|
|
330
|
+
if sequence_filesize > max_sequence_filesize_in_bytes:
|
|
331
|
+
raise exceptions.MapillaryFileTooLargeError(
|
|
332
|
+
f"Sequence file size exceeds the maximum allowed file size ({max_sequence_filesize_in_bytes} bytes)",
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
contains_null_island = any(
|
|
336
|
+
image.lat == 0 and image.lon == 0 for image in sequence
|
|
337
|
+
)
|
|
338
|
+
if contains_null_island:
|
|
339
|
+
raise exceptions.MapillaryNullIslandError(
|
|
340
|
+
"Found GPS coordinates in Null Island (0, 0)",
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
too_fast = len(sequence) >= 2 and _avg_speed(sequence) > max_avg_speed
|
|
344
|
+
if too_fast:
|
|
345
|
+
raise exceptions.MapillaryCaptureSpeedTooFastError(
|
|
346
|
+
f"Capture speed too fast (exceeds {round(max_avg_speed, 3)} m/s)",
|
|
347
|
+
)
|
|
348
|
+
except exceptions.MapillaryDescriptionError as ex:
|
|
349
|
+
for image in sequence:
|
|
350
|
+
output_errors.append(
|
|
351
|
+
types.describe_error_metadata(
|
|
352
|
+
exc=ex,
|
|
353
|
+
filename=image.filename,
|
|
354
|
+
filetype=types.FileType.IMAGE,
|
|
355
|
+
)
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
else:
|
|
359
|
+
output_sequences.append(sequence)
|
|
360
|
+
|
|
361
|
+
assert sum(len(s) for s in output_sequences) + len(output_errors) == sum(
|
|
362
|
+
len(s) for s in input_sequences
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
LOG.info(
|
|
366
|
+
"Found %s sequences and %s errors after sequence limit checks",
|
|
367
|
+
len(output_sequences),
|
|
368
|
+
len(output_errors),
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
return output_sequences, output_errors
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def _group_by_folder_and_camera(
|
|
375
|
+
image_metadatas: list[types.ImageMetadata],
|
|
376
|
+
) -> list[list[types.ImageMetadata]]:
|
|
377
|
+
grouped = _group_by(
|
|
378
|
+
image_metadatas,
|
|
379
|
+
lambda metadata: (
|
|
380
|
+
str(metadata.filename.parent),
|
|
381
|
+
metadata.MAPDeviceMake,
|
|
382
|
+
metadata.MAPDeviceModel,
|
|
383
|
+
metadata.width,
|
|
384
|
+
metadata.height,
|
|
385
|
+
),
|
|
386
|
+
)
|
|
387
|
+
for key in grouped:
|
|
388
|
+
LOG.debug("Group sequences by %s: %s images", key, len(grouped[key]))
|
|
389
|
+
output_sequences = list(grouped.values())
|
|
390
|
+
|
|
391
|
+
LOG.info(
|
|
392
|
+
"Found %s sequences from different folders and cameras",
|
|
393
|
+
len(output_sequences),
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
return output_sequences
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def _split_sequences_by_cutoff_time(
|
|
400
|
+
input_sequences: T.Sequence[PointSequence], cutoff_time: float
|
|
401
|
+
) -> list[PointSequence]:
|
|
402
|
+
def _should_split_by_cutoff_time(
|
|
403
|
+
prev: types.ImageMetadata, cur: types.ImageMetadata
|
|
404
|
+
) -> bool:
|
|
405
|
+
time_diff = cur.time - prev.time
|
|
406
|
+
assert 0 <= time_diff, "sequence must be sorted by capture times"
|
|
407
|
+
should = cutoff_time < time_diff
|
|
408
|
+
if should:
|
|
409
|
+
LOG.debug(
|
|
410
|
+
"Split because the capture time gap %s seconds exceeds cutoff_time (%s seconds): %s: %s -> %s",
|
|
411
|
+
round(time_diff, 2),
|
|
412
|
+
round(cutoff_time, 2),
|
|
413
|
+
prev.filename.parent,
|
|
414
|
+
prev.filename.name,
|
|
415
|
+
cur.filename.name,
|
|
416
|
+
)
|
|
417
|
+
return should
|
|
418
|
+
|
|
419
|
+
output_sequences = []
|
|
420
|
+
for sequence in input_sequences:
|
|
421
|
+
output_sequences.extend(
|
|
422
|
+
split_sequence_by(sequence, should_split=_should_split_by_cutoff_time)
|
|
260
423
|
)
|
|
261
424
|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
425
|
+
assert sum(len(s) for s in output_sequences) == sum(len(s) for s in input_sequences)
|
|
426
|
+
|
|
427
|
+
LOG.info(
|
|
428
|
+
"Found %s sequences after split by cutoff_time %d seconds",
|
|
429
|
+
len(output_sequences),
|
|
430
|
+
cutoff_time,
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
return output_sequences
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
def _split_sequences_by_cutoff_distance(
|
|
437
|
+
input_sequences: T.Sequence[PointSequence], cutoff_distance: float
|
|
438
|
+
) -> list[PointSequence]:
|
|
439
|
+
def _should_split_by_cutoff_distance(
|
|
440
|
+
prev: types.ImageMetadata, cur: types.ImageMetadata
|
|
441
|
+
) -> bool:
|
|
442
|
+
distance = geo.gps_distance(
|
|
443
|
+
(prev.lat, prev.lon),
|
|
444
|
+
(cur.lat, cur.lon),
|
|
445
|
+
)
|
|
446
|
+
should = cutoff_distance < distance
|
|
447
|
+
if should:
|
|
448
|
+
LOG.debug(
|
|
449
|
+
"Split because the distance gap %s meters exceeds cutoff_distance (%s meters): %s: %s -> %s",
|
|
450
|
+
round(distance, 2),
|
|
451
|
+
round(cutoff_distance, 2),
|
|
452
|
+
prev.filename.parent,
|
|
453
|
+
prev.filename.name,
|
|
454
|
+
cur.filename.name,
|
|
455
|
+
)
|
|
456
|
+
return should
|
|
457
|
+
|
|
458
|
+
output_sequences = []
|
|
459
|
+
for sequence in input_sequences:
|
|
460
|
+
output_sequences.extend(
|
|
461
|
+
split_sequence_by(sequence, _should_split_by_cutoff_distance)
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
assert sum(len(s) for s in output_sequences) == sum(len(s) for s in input_sequences)
|
|
465
|
+
|
|
466
|
+
LOG.info(
|
|
467
|
+
"Found %s sequences after split by cutoff_distance %d meters",
|
|
468
|
+
len(output_sequences),
|
|
469
|
+
cutoff_distance,
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
return output_sequences
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
def _check_sequences_duplication(
|
|
476
|
+
input_sequences: T.Sequence[PointSequence],
|
|
477
|
+
duplicate_distance: float,
|
|
478
|
+
duplicate_angle: float,
|
|
479
|
+
) -> tuple[list[PointSequence], list[types.ErrorMetadata]]:
|
|
480
|
+
output_sequences: list[PointSequence] = []
|
|
481
|
+
output_errors: list[types.ErrorMetadata] = []
|
|
482
|
+
|
|
483
|
+
for sequence in input_sequences:
|
|
484
|
+
output_sequence, errors = duplication_check(
|
|
485
|
+
sequence,
|
|
486
|
+
max_duplicate_distance=duplicate_distance,
|
|
487
|
+
max_duplicate_angle=duplicate_angle,
|
|
488
|
+
)
|
|
489
|
+
assert len(sequence) == len(output_sequence) + len(errors)
|
|
490
|
+
output_sequences.append(output_sequence)
|
|
491
|
+
output_errors.extend(errors)
|
|
492
|
+
|
|
493
|
+
assert sum(len(s) for s in output_sequences) + len(output_errors) == sum(
|
|
494
|
+
len(s) for s in input_sequences
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
LOG.info(
|
|
498
|
+
"Found %s sequences and %s errors after duplication check",
|
|
499
|
+
len(output_sequences),
|
|
500
|
+
len(output_errors),
|
|
501
|
+
)
|
|
502
|
+
|
|
503
|
+
return output_sequences, output_errors
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
def _split_sequences_by_limits(
|
|
507
|
+
input_sequences: T.Sequence[PointSequence],
|
|
508
|
+
max_sequence_filesize_in_bytes: float,
|
|
509
|
+
max_sequence_pixels: float,
|
|
510
|
+
) -> list[PointSequence]:
|
|
511
|
+
max_sequence_images = constants.MAX_SEQUENCE_LENGTH
|
|
512
|
+
max_sequence_filesize = max_sequence_filesize_in_bytes
|
|
513
|
+
|
|
514
|
+
def _should_split(image: types.ImageMetadata, sequence_state: dict) -> bool:
|
|
515
|
+
last_sequence_images = sequence_state.get("last_sequence_images", 0)
|
|
516
|
+
last_sequence_file_size = sequence_state.get("last_sequence_file_size", 0)
|
|
517
|
+
last_sequence_pixels = sequence_state.get("last_sequence_pixels", 0)
|
|
518
|
+
|
|
519
|
+
# decent default values if width/height not available
|
|
520
|
+
width = 1024 if image.width is None else image.width
|
|
521
|
+
height = 1024 if image.height is None else image.height
|
|
522
|
+
pixels = width * height
|
|
523
|
+
|
|
524
|
+
if image.filesize is None:
|
|
525
|
+
filesize = os.path.getsize(image.filename)
|
|
526
|
+
else:
|
|
527
|
+
filesize = image.filesize
|
|
528
|
+
|
|
529
|
+
new_sequence_images = last_sequence_images + 1
|
|
530
|
+
new_sequence_file_size = last_sequence_file_size + filesize
|
|
531
|
+
new_sequence_pixels = last_sequence_pixels + pixels
|
|
532
|
+
|
|
533
|
+
if max_sequence_images < new_sequence_images:
|
|
534
|
+
LOG.debug(
|
|
535
|
+
"Split because the current sequence (%s) reaches the max number of images (%s)",
|
|
536
|
+
new_sequence_images,
|
|
537
|
+
max_sequence_images,
|
|
538
|
+
)
|
|
539
|
+
start_new_sequence = True
|
|
540
|
+
elif max_sequence_filesize < new_sequence_file_size:
|
|
541
|
+
LOG.debug(
|
|
542
|
+
"Split because the current sequence (%s) reaches the max filesize (%s)",
|
|
543
|
+
new_sequence_file_size,
|
|
544
|
+
max_sequence_filesize,
|
|
545
|
+
)
|
|
546
|
+
start_new_sequence = True
|
|
547
|
+
elif max_sequence_pixels < new_sequence_pixels:
|
|
548
|
+
LOG.debug(
|
|
549
|
+
"Split because the current sequence (%s) reaches the max pixels (%s)",
|
|
550
|
+
new_sequence_pixels,
|
|
551
|
+
max_sequence_pixels,
|
|
552
|
+
)
|
|
553
|
+
start_new_sequence = True
|
|
554
|
+
else:
|
|
555
|
+
start_new_sequence = False
|
|
556
|
+
|
|
557
|
+
if not start_new_sequence:
|
|
558
|
+
sequence_state["last_sequence_images"] = new_sequence_images
|
|
559
|
+
sequence_state["last_sequence_file_size"] = new_sequence_file_size
|
|
560
|
+
sequence_state["last_sequence_pixels"] = new_sequence_pixels
|
|
561
|
+
else:
|
|
562
|
+
sequence_state["last_sequence_images"] = 1
|
|
563
|
+
sequence_state["last_sequence_file_size"] = filesize
|
|
564
|
+
sequence_state["last_sequence_pixels"] = pixels
|
|
565
|
+
|
|
566
|
+
return start_new_sequence
|
|
567
|
+
|
|
568
|
+
output_sequences = []
|
|
569
|
+
for sequence in input_sequences:
|
|
570
|
+
output_sequences.extend(
|
|
571
|
+
split_sequence_by_agg(
|
|
572
|
+
sequence, should_split_with_sequence_state=_should_split
|
|
573
|
+
)
|
|
267
574
|
)
|
|
268
575
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
576
|
+
assert sum(len(s) for s in output_sequences) == sum(len(s) for s in input_sequences)
|
|
577
|
+
|
|
578
|
+
LOG.info("Found %s sequences after split by sequence limits", len(output_sequences))
|
|
579
|
+
|
|
580
|
+
return output_sequences
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
def process_sequence_properties(
|
|
584
|
+
metadatas: T.Sequence[types.MetadataOrError],
|
|
585
|
+
cutoff_distance: float = constants.CUTOFF_DISTANCE,
|
|
586
|
+
cutoff_time: float = constants.CUTOFF_TIME,
|
|
587
|
+
interpolate_directions: bool = False,
|
|
588
|
+
duplicate_distance: float = constants.DUPLICATE_DISTANCE,
|
|
589
|
+
duplicate_angle: float = constants.DUPLICATE_ANGLE,
|
|
590
|
+
max_avg_speed: float = constants.MAX_AVG_SPEED,
|
|
591
|
+
) -> list[types.MetadataOrError]:
|
|
592
|
+
max_sequence_filesize_in_bytes = _parse_filesize_in_bytes(
|
|
593
|
+
constants.MAX_SEQUENCE_FILESIZE
|
|
594
|
+
)
|
|
595
|
+
max_sequence_pixels = _parse_pixels(constants.MAX_SEQUENCE_PIXELS)
|
|
596
|
+
|
|
597
|
+
error_metadatas: list[types.ErrorMetadata] = []
|
|
598
|
+
image_metadatas: list[types.ImageMetadata] = []
|
|
599
|
+
video_metadatas: list[types.VideoMetadata] = []
|
|
272
600
|
|
|
273
601
|
for metadata in metadatas:
|
|
274
602
|
if isinstance(metadata, types.ErrorMetadata):
|
|
@@ -280,66 +608,92 @@ def process_sequence_properties(
|
|
|
280
608
|
else:
|
|
281
609
|
raise RuntimeError(f"invalid metadata type: {metadata}")
|
|
282
610
|
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
611
|
+
if video_metadatas:
|
|
612
|
+
# Check limits for videos
|
|
613
|
+
video_metadatas, video_error_metadatas = _check_video_limits(
|
|
614
|
+
video_metadatas,
|
|
615
|
+
max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
|
|
616
|
+
max_avg_speed=max_avg_speed,
|
|
617
|
+
max_radius_for_stationary_check=10.0,
|
|
618
|
+
)
|
|
619
|
+
error_metadatas.extend(video_error_metadatas)
|
|
288
620
|
|
|
289
|
-
|
|
290
|
-
|
|
621
|
+
if image_metadatas:
|
|
622
|
+
sequences: list[PointSequence]
|
|
291
623
|
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
for sequence in sequences_by_folder:
|
|
295
|
-
cut = cut_sequence_by_time_distance(sequence, cutoff_distance, cutoff_time)
|
|
296
|
-
sequences_after_cut.extend(cut)
|
|
297
|
-
assert len(image_metadatas) == sum(len(s) for s in sequences_after_cut)
|
|
624
|
+
# Group by folder and camera
|
|
625
|
+
sequences = _group_by_folder_and_camera(image_metadatas)
|
|
298
626
|
|
|
299
|
-
|
|
300
|
-
|
|
627
|
+
# Make sure each sequence is sorted (in-place update)
|
|
628
|
+
for sequence in sequences:
|
|
629
|
+
sequence.sort(
|
|
630
|
+
key=lambda metadata: metadata.sort_key(),
|
|
631
|
+
)
|
|
301
632
|
|
|
302
|
-
|
|
633
|
+
# Interpolate subseconds for same timestamps (in-place update)
|
|
634
|
+
for sequence in sequences:
|
|
635
|
+
_interpolate_subsecs_for_sorting(sequence)
|
|
303
636
|
|
|
304
|
-
|
|
305
|
-
#
|
|
306
|
-
|
|
307
|
-
|
|
637
|
+
# Split sequences by cutoff time
|
|
638
|
+
# NOTE: Do not split by distance here because it affects the speed limit check
|
|
639
|
+
sequences = _split_sequences_by_cutoff_time(sequences, cutoff_time=cutoff_time)
|
|
640
|
+
|
|
641
|
+
# Duplication check
|
|
642
|
+
sequences, errors = _check_sequences_duplication(
|
|
643
|
+
sequences,
|
|
308
644
|
duplicate_distance=duplicate_distance,
|
|
309
645
|
duplicate_angle=duplicate_angle,
|
|
310
646
|
)
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
#
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
647
|
+
error_metadatas.extend(errors)
|
|
648
|
+
|
|
649
|
+
# Interpolate angles (in-place update)
|
|
650
|
+
for sequence in sequences:
|
|
651
|
+
if interpolate_directions:
|
|
652
|
+
for image in sequence:
|
|
653
|
+
image.angle = None
|
|
654
|
+
geo.interpolate_directions_if_none(sequence)
|
|
655
|
+
|
|
656
|
+
# Split sequences by max number of images, max filesize, and max pixels
|
|
657
|
+
sequences = _split_sequences_by_limits(
|
|
658
|
+
sequences,
|
|
659
|
+
max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
|
|
660
|
+
max_sequence_pixels=max_sequence_pixels,
|
|
661
|
+
)
|
|
662
|
+
|
|
663
|
+
# Check limits for sequences
|
|
664
|
+
sequences, errors = _check_sequences_by_limits(
|
|
665
|
+
sequences,
|
|
666
|
+
max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
|
|
667
|
+
max_avg_speed=max_avg_speed,
|
|
668
|
+
)
|
|
669
|
+
error_metadatas.extend(errors)
|
|
670
|
+
|
|
671
|
+
# Split sequences by cutoff distance
|
|
672
|
+
# NOTE: The speed limit check probably rejects most of anomalies
|
|
673
|
+
sequences = _split_sequences_by_cutoff_distance(
|
|
674
|
+
sequences, cutoff_distance=cutoff_distance
|
|
326
675
|
)
|
|
327
676
|
|
|
328
|
-
#
|
|
329
|
-
|
|
330
|
-
|
|
677
|
+
# Assign sequence UUIDs (in-place update)
|
|
678
|
+
sequence_idx = 0
|
|
679
|
+
for sequence in sequences:
|
|
680
|
+
for image in sequence:
|
|
331
681
|
# using incremental id as shorter "uuid", so we can save some space for the desc file
|
|
332
|
-
|
|
333
|
-
image_metadatas.append(p)
|
|
682
|
+
image.MAPSequenceUUID = str(sequence_idx)
|
|
334
683
|
sequence_idx += 1
|
|
335
684
|
|
|
685
|
+
image_metadatas = []
|
|
686
|
+
for sequence in sequences:
|
|
687
|
+
image_metadatas.extend(sequence)
|
|
688
|
+
|
|
689
|
+
assert sequence_idx == len(
|
|
690
|
+
set(metadata.MAPSequenceUUID for metadata in image_metadatas)
|
|
691
|
+
)
|
|
692
|
+
|
|
336
693
|
results = error_metadatas + image_metadatas + video_metadatas
|
|
337
694
|
|
|
338
695
|
assert len(metadatas) == len(results), (
|
|
339
696
|
f"expected {len(metadatas)} results but got {len(results)}"
|
|
340
697
|
)
|
|
341
|
-
assert sequence_idx == len(
|
|
342
|
-
set(metadata.MAPSequenceUUID for metadata in image_metadatas)
|
|
343
|
-
)
|
|
344
698
|
|
|
345
699
|
return results
|