mapillary-tools 0.13.3__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. mapillary_tools/__init__.py +1 -1
  2. mapillary_tools/api_v4.py +198 -55
  3. mapillary_tools/authenticate.py +326 -64
  4. mapillary_tools/blackvue_parser.py +195 -0
  5. mapillary_tools/camm/camm_builder.py +55 -97
  6. mapillary_tools/camm/camm_parser.py +429 -181
  7. mapillary_tools/commands/__main__.py +10 -6
  8. mapillary_tools/commands/authenticate.py +8 -1
  9. mapillary_tools/commands/process.py +27 -51
  10. mapillary_tools/commands/process_and_upload.py +18 -5
  11. mapillary_tools/commands/sample_video.py +2 -3
  12. mapillary_tools/commands/upload.py +44 -13
  13. mapillary_tools/commands/video_process_and_upload.py +19 -5
  14. mapillary_tools/config.py +65 -26
  15. mapillary_tools/constants.py +141 -18
  16. mapillary_tools/exceptions.py +37 -34
  17. mapillary_tools/exif_read.py +221 -116
  18. mapillary_tools/exif_write.py +10 -8
  19. mapillary_tools/exiftool_read.py +33 -42
  20. mapillary_tools/exiftool_read_video.py +97 -47
  21. mapillary_tools/exiftool_runner.py +57 -0
  22. mapillary_tools/ffmpeg.py +417 -242
  23. mapillary_tools/geo.py +158 -118
  24. mapillary_tools/geotag/__init__.py +0 -1
  25. mapillary_tools/geotag/base.py +147 -0
  26. mapillary_tools/geotag/factory.py +307 -0
  27. mapillary_tools/geotag/geotag_images_from_exif.py +14 -131
  28. mapillary_tools/geotag/geotag_images_from_exiftool.py +136 -85
  29. mapillary_tools/geotag/geotag_images_from_gpx.py +60 -124
  30. mapillary_tools/geotag/geotag_images_from_gpx_file.py +13 -126
  31. mapillary_tools/geotag/geotag_images_from_nmea_file.py +4 -5
  32. mapillary_tools/geotag/geotag_images_from_video.py +88 -51
  33. mapillary_tools/geotag/geotag_videos_from_exiftool.py +123 -0
  34. mapillary_tools/geotag/geotag_videos_from_gpx.py +52 -0
  35. mapillary_tools/geotag/geotag_videos_from_video.py +20 -185
  36. mapillary_tools/geotag/image_extractors/base.py +18 -0
  37. mapillary_tools/geotag/image_extractors/exif.py +60 -0
  38. mapillary_tools/geotag/image_extractors/exiftool.py +18 -0
  39. mapillary_tools/geotag/options.py +182 -0
  40. mapillary_tools/geotag/utils.py +52 -16
  41. mapillary_tools/geotag/video_extractors/base.py +18 -0
  42. mapillary_tools/geotag/video_extractors/exiftool.py +70 -0
  43. mapillary_tools/geotag/video_extractors/gpx.py +116 -0
  44. mapillary_tools/geotag/video_extractors/native.py +160 -0
  45. mapillary_tools/{geotag → gpmf}/gpmf_parser.py +205 -182
  46. mapillary_tools/{geotag → gpmf}/gps_filter.py +5 -3
  47. mapillary_tools/history.py +134 -20
  48. mapillary_tools/mp4/construct_mp4_parser.py +17 -10
  49. mapillary_tools/mp4/io_utils.py +0 -1
  50. mapillary_tools/mp4/mp4_sample_parser.py +36 -28
  51. mapillary_tools/mp4/simple_mp4_builder.py +10 -9
  52. mapillary_tools/mp4/simple_mp4_parser.py +13 -22
  53. mapillary_tools/process_geotag_properties.py +184 -414
  54. mapillary_tools/process_sequence_properties.py +594 -225
  55. mapillary_tools/sample_video.py +20 -26
  56. mapillary_tools/serializer/description.py +587 -0
  57. mapillary_tools/serializer/gpx.py +132 -0
  58. mapillary_tools/telemetry.py +26 -13
  59. mapillary_tools/types.py +98 -611
  60. mapillary_tools/upload.py +411 -387
  61. mapillary_tools/upload_api_v4.py +167 -142
  62. mapillary_tools/uploader.py +804 -284
  63. mapillary_tools/utils.py +49 -18
  64. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info}/METADATA +93 -35
  65. mapillary_tools-0.14.0.dist-info/RECORD +75 -0
  66. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info}/WHEEL +1 -1
  67. mapillary_tools/geotag/blackvue_parser.py +0 -118
  68. mapillary_tools/geotag/geotag_from_generic.py +0 -22
  69. mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +0 -93
  70. mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +0 -145
  71. mapillary_tools/video_data_extraction/cli_options.py +0 -22
  72. mapillary_tools/video_data_extraction/extract_video_data.py +0 -176
  73. mapillary_tools/video_data_extraction/extractors/base_parser.py +0 -75
  74. mapillary_tools/video_data_extraction/extractors/blackvue_parser.py +0 -34
  75. mapillary_tools/video_data_extraction/extractors/camm_parser.py +0 -38
  76. mapillary_tools/video_data_extraction/extractors/exiftool_runtime_parser.py +0 -71
  77. mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py +0 -53
  78. mapillary_tools/video_data_extraction/extractors/generic_video_parser.py +0 -52
  79. mapillary_tools/video_data_extraction/extractors/gopro_parser.py +0 -43
  80. mapillary_tools/video_data_extraction/extractors/gpx_parser.py +0 -108
  81. mapillary_tools/video_data_extraction/extractors/nmea_parser.py +0 -24
  82. mapillary_tools/video_data_extraction/video_data_parser_factory.py +0 -39
  83. mapillary_tools-0.13.3.dist-info/RECORD +0 -75
  84. /mapillary_tools/{geotag → gpmf}/gpmf_gps_filter.py +0 -0
  85. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info}/entry_points.txt +0 -0
  86. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info/licenses}/LICENSE +0 -0
  87. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info}/top_level.txt +0 -0
@@ -1,14 +1,25 @@
1
- import enum
1
+ from __future__ import annotations
2
+
2
3
  import io
3
4
  import os
4
5
  import random
6
+ import sys
5
7
  import typing as T
6
8
  import uuid
9
+ from pathlib import Path
10
+
11
+ if sys.version_info >= (3, 12):
12
+ from typing import override
13
+ else:
14
+ from typing_extensions import override
15
+
16
+ import tempfile
7
17
 
8
18
  import requests
9
19
 
10
20
  from .api_v4 import (
11
- MAPILLARY_GRAPH_API_ENDPOINT,
21
+ HTTPContentError,
22
+ jsonify_response,
12
23
  request_get,
13
24
  request_post,
14
25
  REQUESTS_TIMEOUT,
@@ -17,90 +28,87 @@ from .api_v4 import (
17
28
  MAPILLARY_UPLOAD_ENDPOINT = os.getenv(
18
29
  "MAPILLARY_UPLOAD_ENDPOINT", "https://rupload.facebook.com/mapillary_public_uploads"
19
30
  )
20
- DEFAULT_CHUNK_SIZE = 1024 * 1024 * 16 # 16MB
21
- # According to the docs, UPLOAD_REQUESTS_TIMEOUT can be a tuple of
22
- # (connection_timeout, read_timeout): https://requests.readthedocs.io/en/latest/user/advanced/#timeouts
23
- # In my test, however, the connection_timeout rules both connection timeout and read timeout.
24
- # i.e. if your the server does not respond within this timeout, it will throw:
25
- # ConnectionError: ('Connection aborted.', timeout('The write operation timed out'))
26
- # So let us make sure the largest possible chunks can be uploaded before this timeout for now,
27
- UPLOAD_REQUESTS_TIMEOUT = (30 * 60, 30 * 60) # 30 minutes
28
-
29
-
30
- class ClusterFileType(enum.Enum):
31
- ZIP = "zip"
32
- BLACKVUE = "mly_blackvue_video"
33
- CAMM = "mly_camm_video"
34
31
 
35
32
 
36
33
  class UploadService:
34
+ """
35
+ Upload byte streams to the Upload Service.
36
+ """
37
+
37
38
  user_access_token: str
38
39
  session_key: str
39
- callbacks: T.List[T.Callable[[bytes, T.Optional[requests.Response]], None]]
40
- cluster_filetype: ClusterFileType
41
- organization_id: T.Optional[T.Union[str, int]]
42
- chunk_size: int
43
-
44
- MIME_BY_CLUSTER_TYPE: T.Dict[ClusterFileType, str] = {
45
- ClusterFileType.ZIP: "application/zip",
46
- ClusterFileType.BLACKVUE: "video/mp4",
47
- ClusterFileType.CAMM: "video/mp4",
48
- }
49
-
50
- def __init__(
51
- self,
52
- user_access_token: str,
53
- session_key: str,
54
- organization_id: T.Optional[T.Union[str, int]] = None,
55
- cluster_filetype: ClusterFileType = ClusterFileType.ZIP,
56
- chunk_size: int = DEFAULT_CHUNK_SIZE,
57
- ):
58
- if chunk_size <= 0:
59
- raise ValueError("Expect positive chunk size")
60
40
 
41
+ def __init__(self, user_access_token: str, session_key: str):
61
42
  self.user_access_token = user_access_token
62
43
  self.session_key = session_key
63
- self.organization_id = organization_id
64
- # validate the input
65
- self.cluster_filetype = ClusterFileType(cluster_filetype)
66
- self.callbacks = []
67
- self.chunk_size = chunk_size
68
44
 
69
45
  def fetch_offset(self) -> int:
70
46
  headers = {
71
47
  "Authorization": f"OAuth {self.user_access_token}",
72
48
  }
73
49
  url = f"{MAPILLARY_UPLOAD_ENDPOINT}/{self.session_key}"
74
- resp = request_get(
75
- url,
76
- headers=headers,
77
- timeout=REQUESTS_TIMEOUT,
78
- )
50
+ resp = request_get(url, headers=headers, timeout=REQUESTS_TIMEOUT)
51
+
79
52
  resp.raise_for_status()
80
- data = resp.json()
81
- return data["offset"]
82
53
 
83
- def upload(
84
- self,
85
- data: T.IO[bytes],
86
- offset: T.Optional[int] = None,
87
- ) -> str:
88
- chunks = self._chunkize_byte_stream(data)
89
- return self.upload_chunks(chunks, offset=offset)
54
+ data = jsonify_response(resp)
55
+
56
+ try:
57
+ return data["offset"]
58
+ except KeyError:
59
+ raise HTTPContentError("Offset not found in the response", resp)
90
60
 
91
- def _chunkize_byte_stream(
92
- self, stream: T.IO[bytes]
61
+ @classmethod
62
+ def chunkize_byte_stream(
63
+ cls, stream: T.IO[bytes], chunk_size: int
93
64
  ) -> T.Generator[bytes, None, None]:
65
+ """
66
+ Chunkize a byte stream into chunks of the specified size.
67
+
68
+ >>> list(UploadService.chunkize_byte_stream(io.BytesIO(b"foo"), 1))
69
+ [b'f', b'o', b'o']
70
+
71
+ >>> list(UploadService.chunkize_byte_stream(io.BytesIO(b"foo"), 10))
72
+ [b'foo']
73
+ """
74
+
75
+ if chunk_size <= 0:
76
+ raise ValueError("Expect positive chunk size")
77
+
94
78
  while True:
95
- data = stream.read(self.chunk_size)
79
+ data = stream.read(chunk_size)
96
80
  if not data:
97
81
  break
98
82
  yield data
99
83
 
100
- def _offset_chunks(
101
- self, chunks: T.Iterable[bytes], offset: int
84
+ @classmethod
85
+ def shift_chunks(
86
+ cls, chunks: T.Iterable[bytes], offset: int
102
87
  ) -> T.Generator[bytes, None, None]:
103
- assert offset >= 0, f"Expect non-negative offset but got {offset}"
88
+ """
89
+ Shift the chunks by the offset.
90
+
91
+ >>> list(UploadService.shift_chunks([b"foo", b"bar"], 0))
92
+ [b'foo', b'bar']
93
+
94
+ >>> list(UploadService.shift_chunks([b"foo", b"bar"], 1))
95
+ [b'oo', b'bar']
96
+
97
+ >>> list(UploadService.shift_chunks([b"foo", b"bar"], 3))
98
+ [b'bar']
99
+
100
+ >>> list(UploadService.shift_chunks([b"foo", b"bar"], 6))
101
+ []
102
+
103
+ >>> list(UploadService.shift_chunks([b"foo", b"bar"], 7))
104
+ []
105
+
106
+ >>> list(UploadService.shift_chunks([], 0))
107
+ []
108
+ """
109
+
110
+ if offset < 0:
111
+ raise ValueError(f"Expect non-negative offset but got {offset}")
104
112
 
105
113
  for chunk in chunks:
106
114
  if offset:
@@ -112,124 +120,141 @@ class UploadService:
112
120
  else:
113
121
  yield chunk
114
122
 
115
- def _attach_callbacks(
116
- self, chunks: T.Iterable[bytes]
117
- ) -> T.Generator[bytes, None, None]:
118
- for chunk in chunks:
119
- yield chunk
120
- for callback in self.callbacks:
121
- callback(chunk, None)
123
+ def upload_byte_stream(
124
+ self,
125
+ stream: T.IO[bytes],
126
+ offset: int | None = None,
127
+ chunk_size: int = 2 * 1024 * 1024, # 2MB
128
+ read_timeout: float | None = None,
129
+ ) -> str:
130
+ if offset is None:
131
+ offset = self.fetch_offset()
132
+ return self.upload_chunks(
133
+ self.chunkize_byte_stream(stream, chunk_size),
134
+ offset,
135
+ read_timeout=read_timeout,
136
+ )
122
137
 
123
138
  def upload_chunks(
124
139
  self,
125
140
  chunks: T.Iterable[bytes],
126
- offset: T.Optional[int] = None,
141
+ offset: int | None = None,
142
+ read_timeout: float | None = None,
127
143
  ) -> str:
128
144
  if offset is None:
129
145
  offset = self.fetch_offset()
146
+ shifted_chunks = self.shift_chunks(chunks, offset)
147
+ return self.upload_shifted_chunks(
148
+ shifted_chunks, offset, read_timeout=read_timeout
149
+ )
130
150
 
131
- chunks = self._attach_callbacks(self._offset_chunks(chunks, offset))
151
+ def upload_shifted_chunks(
152
+ self,
153
+ shifted_chunks: T.Iterable[bytes],
154
+ offset: int,
155
+ read_timeout: float | None = None,
156
+ ) -> str:
157
+ """
158
+ Upload the chunks that must already be shifted by the offset (e.g. fp.seek(offset, io.SEEK_SET))
159
+ """
132
160
 
133
161
  headers = {
134
162
  "Authorization": f"OAuth {self.user_access_token}",
135
163
  "Offset": f"{offset}",
136
164
  "X-Entity-Name": self.session_key,
137
- "X-Entity-Type": self.MIME_BY_CLUSTER_TYPE[self.cluster_filetype],
138
165
  }
139
166
  url = f"{MAPILLARY_UPLOAD_ENDPOINT}/{self.session_key}"
140
167
  resp = request_post(
141
168
  url,
142
169
  headers=headers,
143
- data=chunks,
144
- timeout=UPLOAD_REQUESTS_TIMEOUT,
145
- )
146
-
147
- payload = resp.json()
148
- try:
149
- return payload["h"]
150
- except KeyError:
151
- raise RuntimeError(
152
- f"Upload server error: File handle not found in the upload response {resp.text}"
153
- )
154
-
155
- def finish(self, file_handle: str) -> str:
156
- headers = {
157
- "Authorization": f"OAuth {self.user_access_token}",
158
- }
159
- data: T.Dict[str, T.Union[str, int]] = {
160
- "file_handle": file_handle,
161
- "file_type": self.cluster_filetype.value,
162
- }
163
- if self.organization_id is not None:
164
- data["organization_id"] = self.organization_id
165
-
166
- url = f"{MAPILLARY_GRAPH_API_ENDPOINT}/finish_upload"
167
-
168
- resp = request_post(
169
- url,
170
- headers=headers,
171
- json=data,
172
- timeout=REQUESTS_TIMEOUT,
170
+ data=shifted_chunks,
171
+ timeout=(REQUESTS_TIMEOUT, read_timeout),
173
172
  )
174
173
 
175
174
  resp.raise_for_status()
176
175
 
177
- data = resp.json()
176
+ data = jsonify_response(resp)
178
177
 
179
- cluster_id = data.get("cluster_id")
180
- if cluster_id is None:
181
- raise RuntimeError(
182
- f"Upload server error: failed to create the cluster {resp.text}"
183
- )
184
-
185
- return T.cast(str, cluster_id)
178
+ try:
179
+ return data["h"]
180
+ except KeyError:
181
+ raise HTTPContentError("File handle not found in the response", resp)
186
182
 
187
183
 
188
184
  # A mock class for testing only
189
185
  class FakeUploadService(UploadService):
190
- def __init__(self, *args, **kwargs):
186
+ """
187
+ A mock upload service that simulates the upload process for testing purposes.
188
+ It writes the uploaded data to a file in a temporary directory and generates a fake file handle.
189
+ """
190
+
191
+ FILE_HANDLE_DIR: str = "file_handles"
192
+
193
+ def __init__(
194
+ self,
195
+ *args,
196
+ upload_path: Path | None = None,
197
+ transient_error_ratio: float = 0.0,
198
+ **kwargs,
199
+ ):
191
200
  super().__init__(*args, **kwargs)
192
- self._upload_path = os.getenv(
193
- "MAPILLARY_UPLOAD_PATH", "mapillary_public_uploads"
194
- )
195
- self._error_ratio = 0.1
201
+ if upload_path is None:
202
+ upload_path = Path(tempfile.gettempdir()).joinpath(
203
+ "mapillary_public_uploads"
204
+ )
205
+ self._upload_path = upload_path
206
+ self._transient_error_ratio = transient_error_ratio
196
207
 
197
- def upload_chunks(
208
+ @override
209
+ def upload_shifted_chunks(
198
210
  self,
199
- chunks: T.Iterable[bytes],
200
- offset: T.Optional[int] = None,
211
+ shifted_chunks: T.Iterable[bytes],
212
+ offset: int,
213
+ read_timeout: float | None = None,
201
214
  ) -> str:
202
- if offset is None:
203
- offset = self.fetch_offset()
204
-
205
- chunks = self._attach_callbacks(self._offset_chunks(chunks, offset))
215
+ expected_offset = self.fetch_offset()
216
+ if offset != expected_offset:
217
+ raise ValueError(
218
+ f"Expect offset {expected_offset} but got {offset} for session {self.session_key}"
219
+ )
206
220
 
207
221
  os.makedirs(self._upload_path, exist_ok=True)
208
- filename = os.path.join(self._upload_path, self.session_key)
209
- with open(filename, "ab") as fp:
210
- for chunk in chunks:
211
- if random.random() <= self._error_ratio:
212
- raise requests.ConnectionError(
213
- f"TEST ONLY: Failed to upload with error ratio {self._error_ratio}"
214
- )
222
+ filename = self._upload_path.joinpath(self.session_key)
223
+ with filename.open("ab") as fp:
224
+ for chunk in shifted_chunks:
225
+ self._randomly_raise_transient_error()
215
226
  fp.write(chunk)
216
- if random.random() <= self._error_ratio:
217
- raise requests.ConnectionError(
218
- f"TEST ONLY: Partially uploaded with error ratio {self._error_ratio}"
219
- )
220
- return uuid.uuid4().hex
227
+ self._randomly_raise_transient_error()
221
228
 
222
- def finish(self, _: str) -> str:
223
- return "0"
229
+ file_handle_dir = self._upload_path.joinpath(self.FILE_HANDLE_DIR)
230
+ file_handle_path = file_handle_dir.joinpath(self.session_key)
231
+ if not file_handle_path.exists():
232
+ os.makedirs(file_handle_dir, exist_ok=True)
233
+ random_file_handle = uuid.uuid4().hex
234
+ file_handle_path.write_text(random_file_handle)
224
235
 
236
+ return file_handle_path.read_text()
237
+
238
+ @override
225
239
  def fetch_offset(self) -> int:
226
- if random.random() <= self._error_ratio:
227
- raise requests.ConnectionError(
228
- f"TEST ONLY: Partially uploaded with error ratio {self._error_ratio}"
229
- )
230
- filename = os.path.join(self._upload_path, self.session_key)
231
- if not os.path.exists(filename):
240
+ self._randomly_raise_transient_error()
241
+ filename = self._upload_path.joinpath(self.session_key)
242
+ if not filename.exists():
232
243
  return 0
233
244
  with open(filename, "rb") as fp:
234
245
  fp.seek(0, io.SEEK_END)
235
246
  return fp.tell()
247
+
248
+ @property
249
+ def upload_path(self) -> Path:
250
+ return self._upload_path
251
+
252
+ def _randomly_raise_transient_error(self):
253
+ """
254
+ Randomly raise a transient error based on the configured error ratio.
255
+ This is for testing purposes only.
256
+ """
257
+ if random.random() <= self._transient_error_ratio:
258
+ raise requests.ConnectionError(
259
+ f"[TEST ONLY]: Transient error with ratio {self._transient_error_ratio}"
260
+ )