mapillary-downloader 0.8.0__tar.gz → 0.8.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/PKG-INFO +1 -1
- {mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/pyproject.toml +1 -1
- {mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/downloader.py +77 -110
- {mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/logging_config.py +5 -0
- {mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/webp_converter.py +0 -4
- {mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/LICENSE.md +0 -0
- {mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/README.md +0 -0
- {mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/__init__.py +0 -0
- {mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/__main__.py +0 -0
- {mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/client.py +0 -0
- {mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/exif_writer.py +0 -0
- {mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/ia_check.py +0 -0
- {mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/ia_meta.py +0 -0
- {mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/ia_stats.py +0 -0
- {mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/metadata_reader.py +0 -0
- {mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/tar_sequences.py +0 -0
- {mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/utils.py +0 -0
- {mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/worker.py +0 -0
- {mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/worker_pool.py +0 -0
- {mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/xmp_writer.py +0 -0
{mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/downloader.py
RENAMED
|
@@ -5,6 +5,7 @@ import json
|
|
|
5
5
|
import logging
|
|
6
6
|
import os
|
|
7
7
|
import shutil
|
|
8
|
+
import threading
|
|
8
9
|
import time
|
|
9
10
|
from pathlib import Path
|
|
10
11
|
from mapillary_downloader.utils import format_size, format_time, safe_json_save
|
|
@@ -146,6 +147,65 @@ class MapillaryDownloader:
|
|
|
146
147
|
# Write atomically using utility function
|
|
147
148
|
safe_json_save(self.progress_file, progress)
|
|
148
149
|
|
|
150
|
+
def _submit_metadata_batch(self, file_handle, quality_field, pool, convert_webp, process_results, base_submitted):
|
|
151
|
+
"""Read metadata lines from current position, submit to workers.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
file_handle: Open file positioned at read point
|
|
155
|
+
quality_field: Field name for quality URL (e.g., "thumb_1024_url")
|
|
156
|
+
pool: Worker pool to submit to
|
|
157
|
+
convert_webp: Whether to convert to webp
|
|
158
|
+
process_results: Callback to drain result queue
|
|
159
|
+
base_submitted: Running total for cumulative logging
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
tuple: (submitted_count, skipped_count) for this batch
|
|
163
|
+
"""
|
|
164
|
+
submitted = 0
|
|
165
|
+
skipped = 0
|
|
166
|
+
|
|
167
|
+
for line in file_handle:
|
|
168
|
+
line = line.strip()
|
|
169
|
+
if not line:
|
|
170
|
+
continue
|
|
171
|
+
|
|
172
|
+
try:
|
|
173
|
+
image = json.loads(line)
|
|
174
|
+
except json.JSONDecodeError:
|
|
175
|
+
continue
|
|
176
|
+
|
|
177
|
+
if image.get("__complete__"):
|
|
178
|
+
continue
|
|
179
|
+
|
|
180
|
+
image_id = image.get("id")
|
|
181
|
+
if not image_id:
|
|
182
|
+
continue
|
|
183
|
+
|
|
184
|
+
if image_id in self.downloaded:
|
|
185
|
+
skipped += 1
|
|
186
|
+
continue
|
|
187
|
+
|
|
188
|
+
if not image.get(quality_field):
|
|
189
|
+
continue
|
|
190
|
+
|
|
191
|
+
work_item = (
|
|
192
|
+
image,
|
|
193
|
+
str(self.output_dir),
|
|
194
|
+
self.quality,
|
|
195
|
+
convert_webp,
|
|
196
|
+
self.client.access_token,
|
|
197
|
+
)
|
|
198
|
+
pool.submit(work_item)
|
|
199
|
+
submitted += 1
|
|
200
|
+
|
|
201
|
+
total = base_submitted + submitted
|
|
202
|
+
if total % 1000 == 0:
|
|
203
|
+
logger.info(f"Queue: submitted {total:,} images")
|
|
204
|
+
|
|
205
|
+
process_results()
|
|
206
|
+
|
|
207
|
+
return submitted, skipped
|
|
208
|
+
|
|
149
209
|
def download_user_data(self, bbox=None, convert_webp=False):
|
|
150
210
|
"""Download all images for a user using streaming queue-based architecture.
|
|
151
211
|
|
|
@@ -187,13 +247,13 @@ class MapillaryDownloader:
|
|
|
187
247
|
total_bytes = 0
|
|
188
248
|
failed_count = 0
|
|
189
249
|
submitted = 0
|
|
250
|
+
skipped_count = 0
|
|
190
251
|
|
|
191
252
|
try:
|
|
192
253
|
# Step 3a: Fetch metadata from API in parallel (write-only, don't block on queue)
|
|
193
|
-
|
|
194
|
-
import threading
|
|
254
|
+
api_fetch_complete = threading.Event()
|
|
195
255
|
|
|
196
|
-
|
|
256
|
+
if not api_complete:
|
|
197
257
|
new_images_count = [0] # Mutable so thread can update it
|
|
198
258
|
|
|
199
259
|
def fetch_api_metadata():
|
|
@@ -221,7 +281,7 @@ class MapillaryDownloader:
|
|
|
221
281
|
api_thread = threading.Thread(target=fetch_api_metadata, daemon=True)
|
|
222
282
|
api_thread.start()
|
|
223
283
|
else:
|
|
224
|
-
api_fetch_complete
|
|
284
|
+
api_fetch_complete.set()
|
|
225
285
|
|
|
226
286
|
# Step 3b: Tail metadata file and submit to workers
|
|
227
287
|
logger.debug("Starting metadata tail and download queue feeder")
|
|
@@ -244,9 +304,10 @@ class MapillaryDownloader:
|
|
|
244
304
|
total_bytes += bytes_dl
|
|
245
305
|
|
|
246
306
|
# Log every download for first 10, then every 100
|
|
307
|
+
total_downloaded = len(self.downloaded)
|
|
247
308
|
should_log = downloaded_count <= 10 or downloaded_count % 100 == 0
|
|
248
309
|
if should_log:
|
|
249
|
-
logger.info(f"Downloaded: {
|
|
310
|
+
logger.info(f"Downloaded: {total_downloaded:,} ({format_size(total_bytes)} this session)")
|
|
250
311
|
|
|
251
312
|
if downloaded_count % 100 == 0:
|
|
252
313
|
pool.check_throughput(downloaded_count)
|
|
@@ -260,117 +321,20 @@ class MapillaryDownloader:
|
|
|
260
321
|
|
|
261
322
|
# Tail the metadata file and submit to workers
|
|
262
323
|
while True:
|
|
263
|
-
# Check if API fetch is done and we've processed everything
|
|
264
|
-
if api_fetch_complete and api_fetch_complete.is_set():
|
|
265
|
-
# Read any remaining lines
|
|
266
|
-
if self.metadata_file.exists():
|
|
267
|
-
with open(self.metadata_file) as f:
|
|
268
|
-
f.seek(last_position)
|
|
269
|
-
for line in f:
|
|
270
|
-
line = line.strip()
|
|
271
|
-
if not line:
|
|
272
|
-
continue
|
|
273
|
-
|
|
274
|
-
try:
|
|
275
|
-
image = json.loads(line)
|
|
276
|
-
except json.JSONDecodeError:
|
|
277
|
-
# Incomplete line, will retry
|
|
278
|
-
continue
|
|
279
|
-
|
|
280
|
-
# Skip completion marker
|
|
281
|
-
if image.get("__complete__"):
|
|
282
|
-
continue
|
|
283
|
-
|
|
284
|
-
image_id = image.get("id")
|
|
285
|
-
if not image_id:
|
|
286
|
-
continue
|
|
287
|
-
|
|
288
|
-
# Skip if already downloaded or no quality URL
|
|
289
|
-
if image_id in self.downloaded:
|
|
290
|
-
downloaded_count += 1
|
|
291
|
-
continue
|
|
292
|
-
if not image.get(quality_field):
|
|
293
|
-
continue
|
|
294
|
-
|
|
295
|
-
# Submit to workers
|
|
296
|
-
work_item = (
|
|
297
|
-
image,
|
|
298
|
-
str(self.output_dir),
|
|
299
|
-
self.quality,
|
|
300
|
-
convert_webp,
|
|
301
|
-
self.client.access_token,
|
|
302
|
-
)
|
|
303
|
-
pool.submit(work_item)
|
|
304
|
-
submitted += 1
|
|
305
|
-
|
|
306
|
-
if submitted % 1000 == 0:
|
|
307
|
-
logger.info(f"Queue: submitted {submitted:,} images")
|
|
308
|
-
|
|
309
|
-
# Process results while submitting
|
|
310
|
-
process_results()
|
|
311
|
-
|
|
312
|
-
last_position = f.tell()
|
|
313
|
-
|
|
314
|
-
# API done and all lines processed, break
|
|
315
|
-
break
|
|
316
|
-
|
|
317
|
-
# API still running or API was already complete, tail the file
|
|
318
324
|
if self.metadata_file.exists():
|
|
319
325
|
with open(self.metadata_file) as f:
|
|
320
326
|
f.seek(last_position)
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
try:
|
|
327
|
-
image = json.loads(line)
|
|
328
|
-
except json.JSONDecodeError:
|
|
329
|
-
# Incomplete line, will retry next iteration
|
|
330
|
-
continue
|
|
331
|
-
|
|
332
|
-
# Skip completion marker
|
|
333
|
-
if image.get("__complete__"):
|
|
334
|
-
continue
|
|
335
|
-
|
|
336
|
-
image_id = image.get("id")
|
|
337
|
-
if not image_id:
|
|
338
|
-
continue
|
|
339
|
-
|
|
340
|
-
# Skip if already downloaded or no quality URL
|
|
341
|
-
if image_id in self.downloaded:
|
|
342
|
-
downloaded_count += 1
|
|
343
|
-
continue
|
|
344
|
-
if not image.get(quality_field):
|
|
345
|
-
continue
|
|
346
|
-
|
|
347
|
-
# Submit to workers
|
|
348
|
-
work_item = (
|
|
349
|
-
image,
|
|
350
|
-
str(self.output_dir),
|
|
351
|
-
self.quality,
|
|
352
|
-
convert_webp,
|
|
353
|
-
self.client.access_token,
|
|
354
|
-
)
|
|
355
|
-
pool.submit(work_item)
|
|
356
|
-
submitted += 1
|
|
357
|
-
|
|
358
|
-
if submitted % 1000 == 0:
|
|
359
|
-
logger.info(f"Queue: submitted {submitted:,} images")
|
|
360
|
-
|
|
361
|
-
# Process results while submitting
|
|
362
|
-
process_results()
|
|
363
|
-
|
|
327
|
+
batch_submitted, batch_skipped = self._submit_metadata_batch(
|
|
328
|
+
f, quality_field, pool, convert_webp, process_results, submitted
|
|
329
|
+
)
|
|
330
|
+
submitted += batch_submitted
|
|
331
|
+
skipped_count += batch_skipped
|
|
364
332
|
last_position = f.tell()
|
|
365
333
|
|
|
366
|
-
|
|
367
|
-
if api_fetch_complete is None:
|
|
334
|
+
if api_fetch_complete.is_set():
|
|
368
335
|
break
|
|
369
336
|
|
|
370
|
-
# Sleep briefly before next tail iteration
|
|
371
337
|
time.sleep(0.1)
|
|
372
|
-
|
|
373
|
-
# Process any results that came in
|
|
374
338
|
process_results()
|
|
375
339
|
|
|
376
340
|
# Send shutdown signals
|
|
@@ -397,7 +361,7 @@ class MapillaryDownloader:
|
|
|
397
361
|
total_bytes += bytes_dl
|
|
398
362
|
|
|
399
363
|
if downloaded_count % 100 == 0:
|
|
400
|
-
logger.info(f"Downloaded: {
|
|
364
|
+
logger.info(f"Downloaded: {len(self.downloaded):,} ({format_size(total_bytes)} this session)")
|
|
401
365
|
pool.check_throughput(downloaded_count)
|
|
402
366
|
# Save progress every 5 minutes
|
|
403
367
|
if time.time() - self._last_save_time >= 300:
|
|
@@ -414,7 +378,10 @@ class MapillaryDownloader:
|
|
|
414
378
|
self._save_progress()
|
|
415
379
|
elapsed = time.time() - start_time
|
|
416
380
|
|
|
417
|
-
logger.info(
|
|
381
|
+
logger.info(
|
|
382
|
+
f"Complete! Downloaded {downloaded_count:,} this session ({format_size(total_bytes)}), "
|
|
383
|
+
f"{len(self.downloaded):,} total, skipped {skipped_count:,}, failed {failed_count:,}"
|
|
384
|
+
)
|
|
418
385
|
logger.info(f"Total time: {format_time(elapsed)}")
|
|
419
386
|
|
|
420
387
|
# Tar sequence directories for efficient IA uploads
|
{mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/logging_config.py
RENAMED
|
@@ -15,6 +15,7 @@ class ColoredFormatter(logging.Formatter):
|
|
|
15
15
|
"DEBUG": "\033[94m", # Blue
|
|
16
16
|
"RESET": "\033[0m",
|
|
17
17
|
}
|
|
18
|
+
CYAN = "\033[96m"
|
|
18
19
|
|
|
19
20
|
def __init__(self, fmt=None, datefmt=None, use_color=True):
|
|
20
21
|
"""Initialize the formatter.
|
|
@@ -41,6 +42,10 @@ class ColoredFormatter(logging.Formatter):
|
|
|
41
42
|
if levelname in self.COLORS:
|
|
42
43
|
record.levelname = f"{self.COLORS[levelname]}{levelname}{self.COLORS['RESET']}"
|
|
43
44
|
|
|
45
|
+
# Color API messages differently so they stand out
|
|
46
|
+
if record.msg.startswith("API"):
|
|
47
|
+
record.msg = f"{self.CYAN}{record.msg}{self.COLORS['RESET']}"
|
|
48
|
+
|
|
44
49
|
return super().format(record)
|
|
45
50
|
|
|
46
51
|
|
{mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/webp_converter.py
RENAMED
|
@@ -43,7 +43,6 @@ def convert_to_webp(jpg_path, output_path=None, delete_original=True):
|
|
|
43
43
|
["cwebp", "-metadata", "all", str(jpg_path), "-o", str(webp_path)],
|
|
44
44
|
capture_output=True,
|
|
45
45
|
text=True,
|
|
46
|
-
timeout=60,
|
|
47
46
|
)
|
|
48
47
|
|
|
49
48
|
if result.returncode != 0:
|
|
@@ -55,9 +54,6 @@ def convert_to_webp(jpg_path, output_path=None, delete_original=True):
|
|
|
55
54
|
jpg_path.unlink()
|
|
56
55
|
return webp_path
|
|
57
56
|
|
|
58
|
-
except subprocess.TimeoutExpired:
|
|
59
|
-
logger.error(f"cwebp conversion timed out for {jpg_path}")
|
|
60
|
-
return None
|
|
61
57
|
except Exception as e:
|
|
62
58
|
logger.error(f"Error converting {jpg_path} to WebP: {e}")
|
|
63
59
|
return None
|
|
File without changes
|
|
File without changes
|
{mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/__init__.py
RENAMED
|
File without changes
|
{mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/__main__.py
RENAMED
|
File without changes
|
{mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/client.py
RENAMED
|
File without changes
|
{mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/exif_writer.py
RENAMED
|
File without changes
|
{mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/ia_check.py
RENAMED
|
File without changes
|
{mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/ia_meta.py
RENAMED
|
File without changes
|
{mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/ia_stats.py
RENAMED
|
File without changes
|
|
File without changes
|
{mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/tar_sequences.py
RENAMED
|
File without changes
|
|
File without changes
|
{mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/worker.py
RENAMED
|
File without changes
|
{mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/worker_pool.py
RENAMED
|
File without changes
|
{mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/xmp_writer.py
RENAMED
|
File without changes
|