mapillary-tools 0.13.3a1__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. mapillary_tools/__init__.py +1 -1
  2. mapillary_tools/api_v4.py +287 -22
  3. mapillary_tools/authenticate.py +326 -64
  4. mapillary_tools/blackvue_parser.py +195 -0
  5. mapillary_tools/camm/camm_builder.py +55 -97
  6. mapillary_tools/camm/camm_parser.py +429 -181
  7. mapillary_tools/commands/__main__.py +17 -8
  8. mapillary_tools/commands/authenticate.py +8 -1
  9. mapillary_tools/commands/process.py +27 -51
  10. mapillary_tools/commands/process_and_upload.py +19 -5
  11. mapillary_tools/commands/sample_video.py +2 -3
  12. mapillary_tools/commands/upload.py +44 -13
  13. mapillary_tools/commands/video_process_and_upload.py +19 -5
  14. mapillary_tools/config.py +65 -26
  15. mapillary_tools/constants.py +141 -18
  16. mapillary_tools/exceptions.py +37 -34
  17. mapillary_tools/exif_read.py +221 -116
  18. mapillary_tools/exif_write.py +10 -8
  19. mapillary_tools/exiftool_read.py +33 -42
  20. mapillary_tools/exiftool_read_video.py +97 -47
  21. mapillary_tools/exiftool_runner.py +57 -0
  22. mapillary_tools/ffmpeg.py +417 -242
  23. mapillary_tools/geo.py +158 -118
  24. mapillary_tools/geotag/__init__.py +0 -1
  25. mapillary_tools/geotag/base.py +147 -0
  26. mapillary_tools/geotag/factory.py +307 -0
  27. mapillary_tools/geotag/geotag_images_from_exif.py +14 -131
  28. mapillary_tools/geotag/geotag_images_from_exiftool.py +136 -85
  29. mapillary_tools/geotag/geotag_images_from_gpx.py +60 -124
  30. mapillary_tools/geotag/geotag_images_from_gpx_file.py +13 -126
  31. mapillary_tools/geotag/geotag_images_from_nmea_file.py +4 -5
  32. mapillary_tools/geotag/geotag_images_from_video.py +88 -51
  33. mapillary_tools/geotag/geotag_videos_from_exiftool.py +123 -0
  34. mapillary_tools/geotag/geotag_videos_from_gpx.py +52 -0
  35. mapillary_tools/geotag/geotag_videos_from_video.py +20 -185
  36. mapillary_tools/geotag/image_extractors/base.py +18 -0
  37. mapillary_tools/geotag/image_extractors/exif.py +60 -0
  38. mapillary_tools/geotag/image_extractors/exiftool.py +18 -0
  39. mapillary_tools/geotag/options.py +182 -0
  40. mapillary_tools/geotag/utils.py +52 -16
  41. mapillary_tools/geotag/video_extractors/base.py +18 -0
  42. mapillary_tools/geotag/video_extractors/exiftool.py +70 -0
  43. mapillary_tools/geotag/video_extractors/gpx.py +116 -0
  44. mapillary_tools/geotag/video_extractors/native.py +160 -0
  45. mapillary_tools/{geotag → gpmf}/gpmf_parser.py +205 -182
  46. mapillary_tools/{geotag → gpmf}/gps_filter.py +5 -3
  47. mapillary_tools/history.py +134 -20
  48. mapillary_tools/mp4/construct_mp4_parser.py +17 -10
  49. mapillary_tools/mp4/io_utils.py +0 -1
  50. mapillary_tools/mp4/mp4_sample_parser.py +36 -28
  51. mapillary_tools/mp4/simple_mp4_builder.py +10 -9
  52. mapillary_tools/mp4/simple_mp4_parser.py +13 -22
  53. mapillary_tools/process_geotag_properties.py +184 -414
  54. mapillary_tools/process_sequence_properties.py +594 -225
  55. mapillary_tools/sample_video.py +20 -26
  56. mapillary_tools/serializer/description.py +587 -0
  57. mapillary_tools/serializer/gpx.py +132 -0
  58. mapillary_tools/telemetry.py +26 -13
  59. mapillary_tools/types.py +98 -611
  60. mapillary_tools/upload.py +408 -416
  61. mapillary_tools/upload_api_v4.py +172 -174
  62. mapillary_tools/uploader.py +804 -284
  63. mapillary_tools/utils.py +49 -18
  64. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info}/METADATA +93 -35
  65. mapillary_tools-0.14.0.dist-info/RECORD +75 -0
  66. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info}/WHEEL +1 -1
  67. mapillary_tools/geotag/blackvue_parser.py +0 -118
  68. mapillary_tools/geotag/geotag_from_generic.py +0 -22
  69. mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +0 -93
  70. mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +0 -145
  71. mapillary_tools/video_data_extraction/cli_options.py +0 -22
  72. mapillary_tools/video_data_extraction/extract_video_data.py +0 -176
  73. mapillary_tools/video_data_extraction/extractors/base_parser.py +0 -75
  74. mapillary_tools/video_data_extraction/extractors/blackvue_parser.py +0 -34
  75. mapillary_tools/video_data_extraction/extractors/camm_parser.py +0 -38
  76. mapillary_tools/video_data_extraction/extractors/exiftool_runtime_parser.py +0 -71
  77. mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py +0 -53
  78. mapillary_tools/video_data_extraction/extractors/generic_video_parser.py +0 -52
  79. mapillary_tools/video_data_extraction/extractors/gopro_parser.py +0 -43
  80. mapillary_tools/video_data_extraction/extractors/gpx_parser.py +0 -108
  81. mapillary_tools/video_data_extraction/extractors/nmea_parser.py +0 -24
  82. mapillary_tools/video_data_extraction/video_data_parser_factory.py +0 -39
  83. mapillary_tools-0.13.3a1.dist-info/RECORD +0 -75
  84. /mapillary_tools/{geotag → gpmf}/gpmf_gps_filter.py +0 -0
  85. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info}/entry_points.txt +0 -0
  86. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info/licenses}/LICENSE +0 -0
  87. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info}/top_level.txt +0 -0
@@ -1,129 +1,114 @@
1
- import enum
1
+ from __future__ import annotations
2
+
2
3
  import io
3
- import json
4
- import logging
5
4
  import os
6
5
  import random
6
+ import sys
7
7
  import typing as T
8
8
  import uuid
9
+ from pathlib import Path
10
+
11
+ if sys.version_info >= (3, 12):
12
+ from typing import override
13
+ else:
14
+ from typing_extensions import override
15
+
16
+ import tempfile
9
17
 
10
18
  import requests
11
19
 
12
- from .api_v4 import MAPILLARY_GRAPH_API_ENDPOINT, request_get, request_post
20
+ from .api_v4 import (
21
+ HTTPContentError,
22
+ jsonify_response,
23
+ request_get,
24
+ request_post,
25
+ REQUESTS_TIMEOUT,
26
+ )
13
27
 
14
- LOG = logging.getLogger(__name__)
15
28
  MAPILLARY_UPLOAD_ENDPOINT = os.getenv(
16
29
  "MAPILLARY_UPLOAD_ENDPOINT", "https://rupload.facebook.com/mapillary_public_uploads"
17
30
  )
18
- DEFAULT_CHUNK_SIZE = 1024 * 1024 * 16 # 16MB
19
- # According to the docs, UPLOAD_REQUESTS_TIMEOUT can be a tuple of
20
- # (connection_timeout, read_timeout): https://requests.readthedocs.io/en/latest/user/advanced/#timeouts
21
- # In my test, however, the connection_timeout rules both connection timeout and read timeout.
22
- # i.e. if your the server does not respond within this timeout, it will throw:
23
- # ConnectionError: ('Connection aborted.', timeout('The write operation timed out'))
24
- # So let us make sure the largest possible chunks can be uploaded before this timeout for now,
25
- REQUESTS_TIMEOUT = (20, 20) # 20 seconds
26
- UPLOAD_REQUESTS_TIMEOUT = (30 * 60, 30 * 60) # 30 minutes
27
-
28
-
29
- class ClusterFileType(enum.Enum):
30
- ZIP = "zip"
31
- BLACKVUE = "mly_blackvue_video"
32
- CAMM = "mly_camm_video"
33
-
34
-
35
- def _sanitize_headers(headers: T.Dict):
36
- return {
37
- k: v
38
- for k, v in headers.items()
39
- if k.lower() not in ["authorization", "cookie", "x-fb-access-token"]
40
- }
41
-
42
-
43
- _S = T.TypeVar("_S", str, bytes)
44
-
45
-
46
- def _truncate_end(s: _S) -> _S:
47
- MAX_LENGTH = 512
48
- if MAX_LENGTH < len(s):
49
- if isinstance(s, bytes):
50
- return s[:MAX_LENGTH] + b"..."
51
- else:
52
- return str(s[:MAX_LENGTH]) + "..."
53
- else:
54
- return s
55
31
 
56
32
 
57
33
  class UploadService:
34
+ """
35
+ Upload byte streams to the Upload Service.
36
+ """
37
+
58
38
  user_access_token: str
59
39
  session_key: str
60
- callbacks: T.List[T.Callable[[bytes, T.Optional[requests.Response]], None]]
61
- cluster_filetype: ClusterFileType
62
- organization_id: T.Optional[T.Union[str, int]]
63
- chunk_size: int
64
-
65
- MIME_BY_CLUSTER_TYPE: T.Dict[ClusterFileType, str] = {
66
- ClusterFileType.ZIP: "application/zip",
67
- ClusterFileType.BLACKVUE: "video/mp4",
68
- ClusterFileType.CAMM: "video/mp4",
69
- }
70
-
71
- def __init__(
72
- self,
73
- user_access_token: str,
74
- session_key: str,
75
- organization_id: T.Optional[T.Union[str, int]] = None,
76
- cluster_filetype: ClusterFileType = ClusterFileType.ZIP,
77
- chunk_size: int = DEFAULT_CHUNK_SIZE,
78
- ):
79
- if chunk_size <= 0:
80
- raise ValueError("Expect positive chunk size")
81
40
 
41
+ def __init__(self, user_access_token: str, session_key: str):
82
42
  self.user_access_token = user_access_token
83
43
  self.session_key = session_key
84
- self.organization_id = organization_id
85
- # validate the input
86
- self.cluster_filetype = ClusterFileType(cluster_filetype)
87
- self.callbacks = []
88
- self.chunk_size = chunk_size
89
44
 
90
45
  def fetch_offset(self) -> int:
91
46
  headers = {
92
47
  "Authorization": f"OAuth {self.user_access_token}",
93
48
  }
94
49
  url = f"{MAPILLARY_UPLOAD_ENDPOINT}/{self.session_key}"
95
- LOG.debug("GET %s", url)
96
- resp = request_get(
97
- url,
98
- headers=headers,
99
- timeout=REQUESTS_TIMEOUT,
100
- )
101
- LOG.debug("HTTP response %s: %s", resp.status_code, resp.content)
50
+ resp = request_get(url, headers=headers, timeout=REQUESTS_TIMEOUT)
51
+
102
52
  resp.raise_for_status()
103
- data = resp.json()
104
- return data["offset"]
105
53
 
106
- def upload(
107
- self,
108
- data: T.IO[bytes],
109
- offset: T.Optional[int] = None,
110
- ) -> str:
111
- chunks = self._chunkize_byte_stream(data)
112
- return self.upload_chunks(chunks, offset=offset)
54
+ data = jsonify_response(resp)
55
+
56
+ try:
57
+ return data["offset"]
58
+ except KeyError:
59
+ raise HTTPContentError("Offset not found in the response", resp)
113
60
 
114
- def _chunkize_byte_stream(
115
- self, stream: T.IO[bytes]
61
+ @classmethod
62
+ def chunkize_byte_stream(
63
+ cls, stream: T.IO[bytes], chunk_size: int
116
64
  ) -> T.Generator[bytes, None, None]:
65
+ """
66
+ Chunkize a byte stream into chunks of the specified size.
67
+
68
+ >>> list(UploadService.chunkize_byte_stream(io.BytesIO(b"foo"), 1))
69
+ [b'f', b'o', b'o']
70
+
71
+ >>> list(UploadService.chunkize_byte_stream(io.BytesIO(b"foo"), 10))
72
+ [b'foo']
73
+ """
74
+
75
+ if chunk_size <= 0:
76
+ raise ValueError("Expect positive chunk size")
77
+
117
78
  while True:
118
- data = stream.read(self.chunk_size)
79
+ data = stream.read(chunk_size)
119
80
  if not data:
120
81
  break
121
82
  yield data
122
83
 
123
- def _offset_chunks(
124
- self, chunks: T.Iterable[bytes], offset: int
84
+ @classmethod
85
+ def shift_chunks(
86
+ cls, chunks: T.Iterable[bytes], offset: int
125
87
  ) -> T.Generator[bytes, None, None]:
126
- assert offset >= 0, f"Expect non-negative offset but got {offset}"
88
+ """
89
+ Shift the chunks by the offset.
90
+
91
+ >>> list(UploadService.shift_chunks([b"foo", b"bar"], 0))
92
+ [b'foo', b'bar']
93
+
94
+ >>> list(UploadService.shift_chunks([b"foo", b"bar"], 1))
95
+ [b'oo', b'bar']
96
+
97
+ >>> list(UploadService.shift_chunks([b"foo", b"bar"], 3))
98
+ [b'bar']
99
+
100
+ >>> list(UploadService.shift_chunks([b"foo", b"bar"], 6))
101
+ []
102
+
103
+ >>> list(UploadService.shift_chunks([b"foo", b"bar"], 7))
104
+ []
105
+
106
+ >>> list(UploadService.shift_chunks([], 0))
107
+ []
108
+ """
109
+
110
+ if offset < 0:
111
+ raise ValueError(f"Expect non-negative offset but got {offset}")
127
112
 
128
113
  for chunk in chunks:
129
114
  if offset:
@@ -135,128 +120,141 @@ class UploadService:
135
120
  else:
136
121
  yield chunk
137
122
 
138
- def _attach_callbacks(
139
- self, chunks: T.Iterable[bytes]
140
- ) -> T.Generator[bytes, None, None]:
141
- for chunk in chunks:
142
- yield chunk
143
- for callback in self.callbacks:
144
- callback(chunk, None)
123
+ def upload_byte_stream(
124
+ self,
125
+ stream: T.IO[bytes],
126
+ offset: int | None = None,
127
+ chunk_size: int = 2 * 1024 * 1024, # 2MB
128
+ read_timeout: float | None = None,
129
+ ) -> str:
130
+ if offset is None:
131
+ offset = self.fetch_offset()
132
+ return self.upload_chunks(
133
+ self.chunkize_byte_stream(stream, chunk_size),
134
+ offset,
135
+ read_timeout=read_timeout,
136
+ )
145
137
 
146
138
  def upload_chunks(
147
139
  self,
148
140
  chunks: T.Iterable[bytes],
149
- offset: T.Optional[int] = None,
141
+ offset: int | None = None,
142
+ read_timeout: float | None = None,
150
143
  ) -> str:
151
144
  if offset is None:
152
145
  offset = self.fetch_offset()
146
+ shifted_chunks = self.shift_chunks(chunks, offset)
147
+ return self.upload_shifted_chunks(
148
+ shifted_chunks, offset, read_timeout=read_timeout
149
+ )
153
150
 
154
- chunks = self._attach_callbacks(self._offset_chunks(chunks, offset))
151
+ def upload_shifted_chunks(
152
+ self,
153
+ shifted_chunks: T.Iterable[bytes],
154
+ offset: int,
155
+ read_timeout: float | None = None,
156
+ ) -> str:
157
+ """
158
+ Upload the chunks that must already be shifted by the offset (e.g. fp.seek(offset, io.SEEK_SET))
159
+ """
155
160
 
156
161
  headers = {
157
162
  "Authorization": f"OAuth {self.user_access_token}",
158
163
  "Offset": f"{offset}",
159
164
  "X-Entity-Name": self.session_key,
160
- "X-Entity-Type": self.MIME_BY_CLUSTER_TYPE[self.cluster_filetype],
161
165
  }
162
166
  url = f"{MAPILLARY_UPLOAD_ENDPOINT}/{self.session_key}"
163
- LOG.debug("POST %s HEADERS %s", url, json.dumps(_sanitize_headers(headers)))
164
167
  resp = request_post(
165
168
  url,
166
169
  headers=headers,
167
- data=chunks,
168
- timeout=UPLOAD_REQUESTS_TIMEOUT,
170
+ data=shifted_chunks,
171
+ timeout=(REQUESTS_TIMEOUT, read_timeout),
169
172
  )
170
- LOG.debug("HTTP response %s: %s", resp.status_code, _truncate_end(resp.content))
171
-
172
- payload = resp.json()
173
- try:
174
- return payload["h"]
175
- except KeyError:
176
- raise RuntimeError(
177
- f"Upload server error: File handle not found in the upload response {resp.text}"
178
- )
179
-
180
- def finish(self, file_handle: str) -> str:
181
- headers = {
182
- "Authorization": f"OAuth {self.user_access_token}",
183
- }
184
- data: T.Dict[str, T.Union[str, int]] = {
185
- "file_handle": file_handle,
186
- "file_type": self.cluster_filetype.value,
187
- }
188
- if self.organization_id is not None:
189
- data["organization_id"] = self.organization_id
190
-
191
- url = f"{MAPILLARY_GRAPH_API_ENDPOINT}/finish_upload"
192
-
193
- LOG.debug("POST %s HEADERS %s", url, json.dumps(_sanitize_headers(headers)))
194
- resp = request_post(
195
- url,
196
- headers=headers,
197
- json=data,
198
- timeout=REQUESTS_TIMEOUT,
199
- )
200
- LOG.debug("HTTP response %s: %s", resp.status_code, _truncate_end(resp.content))
201
173
 
202
174
  resp.raise_for_status()
203
175
 
204
- data = resp.json()
205
-
206
- cluster_id = data.get("cluster_id")
207
- if cluster_id is None:
208
- raise RuntimeError(
209
- f"Upload server error: failed to create the cluster {resp.text}"
210
- )
176
+ data = jsonify_response(resp)
211
177
 
212
- return T.cast(str, cluster_id)
178
+ try:
179
+ return data["h"]
180
+ except KeyError:
181
+ raise HTTPContentError("File handle not found in the response", resp)
213
182
 
214
183
 
215
184
  # A mock class for testing only
216
185
  class FakeUploadService(UploadService):
217
- def __init__(self, *args, **kwargs):
186
+ """
187
+ A mock upload service that simulates the upload process for testing purposes.
188
+ It writes the uploaded data to a file in a temporary directory and generates a fake file handle.
189
+ """
190
+
191
+ FILE_HANDLE_DIR: str = "file_handles"
192
+
193
+ def __init__(
194
+ self,
195
+ *args,
196
+ upload_path: Path | None = None,
197
+ transient_error_ratio: float = 0.0,
198
+ **kwargs,
199
+ ):
218
200
  super().__init__(*args, **kwargs)
219
- self._upload_path = os.getenv(
220
- "MAPILLARY_UPLOAD_PATH", "mapillary_public_uploads"
221
- )
222
- self._error_ratio = 0.1
201
+ if upload_path is None:
202
+ upload_path = Path(tempfile.gettempdir()).joinpath(
203
+ "mapillary_public_uploads"
204
+ )
205
+ self._upload_path = upload_path
206
+ self._transient_error_ratio = transient_error_ratio
223
207
 
224
- def upload_chunks(
208
+ @override
209
+ def upload_shifted_chunks(
225
210
  self,
226
- chunks: T.Iterable[bytes],
227
- offset: T.Optional[int] = None,
211
+ shifted_chunks: T.Iterable[bytes],
212
+ offset: int,
213
+ read_timeout: float | None = None,
228
214
  ) -> str:
229
- if offset is None:
230
- offset = self.fetch_offset()
231
-
232
- chunks = self._attach_callbacks(self._offset_chunks(chunks, offset))
215
+ expected_offset = self.fetch_offset()
216
+ if offset != expected_offset:
217
+ raise ValueError(
218
+ f"Expect offset {expected_offset} but got {offset} for session {self.session_key}"
219
+ )
233
220
 
234
221
  os.makedirs(self._upload_path, exist_ok=True)
235
- filename = os.path.join(self._upload_path, self.session_key)
236
- with open(filename, "ab") as fp:
237
- for chunk in chunks:
238
- if random.random() <= self._error_ratio:
239
- raise requests.ConnectionError(
240
- f"TEST ONLY: Failed to upload with error ratio {self._error_ratio}"
241
- )
222
+ filename = self._upload_path.joinpath(self.session_key)
223
+ with filename.open("ab") as fp:
224
+ for chunk in shifted_chunks:
225
+ self._randomly_raise_transient_error()
242
226
  fp.write(chunk)
243
- if random.random() <= self._error_ratio:
244
- raise requests.ConnectionError(
245
- f"TEST ONLY: Partially uploaded with error ratio {self._error_ratio}"
246
- )
247
- return uuid.uuid4().hex
227
+ self._randomly_raise_transient_error()
228
+
229
+ file_handle_dir = self._upload_path.joinpath(self.FILE_HANDLE_DIR)
230
+ file_handle_path = file_handle_dir.joinpath(self.session_key)
231
+ if not file_handle_path.exists():
232
+ os.makedirs(file_handle_dir, exist_ok=True)
233
+ random_file_handle = uuid.uuid4().hex
234
+ file_handle_path.write_text(random_file_handle)
248
235
 
249
- def finish(self, _: str) -> str:
250
- return "0"
236
+ return file_handle_path.read_text()
251
237
 
238
+ @override
252
239
  def fetch_offset(self) -> int:
253
- if random.random() <= self._error_ratio:
254
- raise requests.ConnectionError(
255
- f"TEST ONLY: Partially uploaded with error ratio {self._error_ratio}"
256
- )
257
- filename = os.path.join(self._upload_path, self.session_key)
258
- if not os.path.exists(filename):
240
+ self._randomly_raise_transient_error()
241
+ filename = self._upload_path.joinpath(self.session_key)
242
+ if not filename.exists():
259
243
  return 0
260
244
  with open(filename, "rb") as fp:
261
245
  fp.seek(0, io.SEEK_END)
262
246
  return fp.tell()
247
+
248
+ @property
249
+ def upload_path(self) -> Path:
250
+ return self._upload_path
251
+
252
+ def _randomly_raise_transient_error(self):
253
+ """
254
+ Randomly raise a transient error based on the configured error ratio.
255
+ This is for testing purposes only.
256
+ """
257
+ if random.random() <= self._transient_error_ratio:
258
+ raise requests.ConnectionError(
259
+ f"[TEST ONLY]: Transient error with ratio {self._transient_error_ratio}"
260
+ )