mapillary-downloader 0.7.4__tar.gz → 0.7.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. {mapillary_downloader-0.7.4 → mapillary_downloader-0.7.5}/PKG-INFO +1 -1
  2. {mapillary_downloader-0.7.4 → mapillary_downloader-0.7.5}/pyproject.toml +1 -1
  3. {mapillary_downloader-0.7.4 → mapillary_downloader-0.7.5}/src/mapillary_downloader/tar_sequences.py +11 -0
  4. mapillary_downloader-0.7.4/src/mapillary_downloader/graphql_web.py +0 -193
  5. {mapillary_downloader-0.7.4 → mapillary_downloader-0.7.5}/LICENSE.md +0 -0
  6. {mapillary_downloader-0.7.4 → mapillary_downloader-0.7.5}/README.md +0 -0
  7. {mapillary_downloader-0.7.4 → mapillary_downloader-0.7.5}/src/mapillary_downloader/__init__.py +0 -0
  8. {mapillary_downloader-0.7.4 → mapillary_downloader-0.7.5}/src/mapillary_downloader/__main__.py +0 -0
  9. {mapillary_downloader-0.7.4 → mapillary_downloader-0.7.5}/src/mapillary_downloader/client.py +0 -0
  10. {mapillary_downloader-0.7.4 → mapillary_downloader-0.7.5}/src/mapillary_downloader/downloader.py +0 -0
  11. {mapillary_downloader-0.7.4 → mapillary_downloader-0.7.5}/src/mapillary_downloader/exif_writer.py +0 -0
  12. {mapillary_downloader-0.7.4 → mapillary_downloader-0.7.5}/src/mapillary_downloader/ia_check.py +0 -0
  13. {mapillary_downloader-0.7.4 → mapillary_downloader-0.7.5}/src/mapillary_downloader/ia_meta.py +0 -0
  14. {mapillary_downloader-0.7.4 → mapillary_downloader-0.7.5}/src/mapillary_downloader/ia_stats.py +0 -0
  15. {mapillary_downloader-0.7.4 → mapillary_downloader-0.7.5}/src/mapillary_downloader/logging_config.py +0 -0
  16. {mapillary_downloader-0.7.4 → mapillary_downloader-0.7.5}/src/mapillary_downloader/metadata_reader.py +0 -0
  17. {mapillary_downloader-0.7.4 → mapillary_downloader-0.7.5}/src/mapillary_downloader/utils.py +0 -0
  18. {mapillary_downloader-0.7.4 → mapillary_downloader-0.7.5}/src/mapillary_downloader/webp_converter.py +0 -0
  19. {mapillary_downloader-0.7.4 → mapillary_downloader-0.7.5}/src/mapillary_downloader/worker.py +0 -0
  20. {mapillary_downloader-0.7.4 → mapillary_downloader-0.7.5}/src/mapillary_downloader/worker_pool.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mapillary_downloader
3
- Version: 0.7.4
3
+ Version: 0.7.5
4
4
  Summary: Archive user data from Mapillary
5
5
  Author-email: Gareth Davidson <gaz@bitplane.net>
6
6
  Requires-Python: >=3.10
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "mapillary_downloader"
3
3
  description = "Archive user data from Mapillary"
4
- version = "0.7.4"
4
+ version = "0.7.5"
5
5
  authors = [
6
6
  { name = "Gareth Davidson", email = "gaz@bitplane.net" }
7
7
  ]
@@ -52,7 +52,18 @@ def tar_sequence_directories(collection_dir):
52
52
 
53
53
  for date_dir in date_dirs:
54
54
  date_name = date_dir.name
55
+
56
+ # Find next available tar filename (don't overwrite existing tars)
55
57
  tar_path = collection_dir / f"{date_name}.tar"
58
+ if tar_path.exists():
59
+ # Find next available addendum number
60
+ addendum = 1
61
+ while True:
62
+ tar_path = collection_dir / f"{date_name}.{addendum}.tar"
63
+ if not tar_path.exists():
64
+ break
65
+ addendum += 1
66
+ logger.info(f"Existing tar for {date_name}, creating addendum: {tar_path.name}")
56
67
 
57
68
  # Count files in date directory
58
69
  files_to_tar = sorted([f for f in date_dir.rglob("*") if f.is_file()], key=lambda x: str(x))
@@ -1,193 +0,0 @@
1
- """GraphQL web API utilities (unofficial, experimental).
2
-
3
- This module provides access to Mapillary's GraphQL endpoint used by the web interface.
4
- Unlike the official v4 REST API, this requires a public web token extracted from the
5
- JavaScript bundle.
6
-
7
- Use cases:
8
- - Get user image counts without pagination
9
- - Access leaderboard data
10
- - Check for updates to existing downloads
11
-
12
- WARNING: This is not officially documented and may break at any time.
13
- """
14
-
15
- import json
16
- import logging
17
- import re
18
- from datetime import datetime
19
- from urllib.parse import urlencode, quote
20
- import requests
21
-
22
- logger = logging.getLogger("mapillary_downloader")
23
-
24
- # Fallback token (extracted from main JS bundle as of 2025-01-09)
25
- FALLBACK_TOKEN = "MLY|4223665974375089|d62822dd792b6a823d0794ef26450398"
26
-
27
-
28
- def extract_token_from_js():
29
- """Extract public web token from Mapillary's JavaScript bundle.
30
-
31
- This fetches the main page, finds the main JS bundle, and extracts
32
- the hardcoded MLY token used for GraphQL queries.
33
-
34
- Returns:
35
- Token string (e.g., "MLY|123|abc...") or None if extraction failed
36
- """
37
- try:
38
- # Fetch main page to find JS bundle URL
39
- # Need consent cookie to get actual page (not GDPR banner)
40
- logger.debug("Fetching Mapillary main page...")
41
- # Generate today's date in the format YYYY_MM_DD for cookie
42
- today = datetime.now().strftime("%Y_%m_%d")
43
- cookies = {
44
- "mly_cb": f'{{"version":"1","date":"{today}","third_party_consent":"withdrawn","categories":{{"content_and_media":"withdrawn"}},"integration_controls":{{"YOUTUBE":"withdrawn"}}}}'
45
- }
46
- headers = {
47
- "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:144.0) Gecko/20100101 Firefox/144.0",
48
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
49
- "Accept-Language": "en-GB,en;q=0.5",
50
- "Sec-GPC": "1",
51
- "Upgrade-Insecure-Requests": "1",
52
- "Sec-Fetch-Dest": "document",
53
- "Sec-Fetch-Mode": "navigate",
54
- "Sec-Fetch-Site": "none",
55
- "Sec-Fetch-User": "?1",
56
- }
57
- response = requests.get("https://www.mapillary.com/app/", cookies=cookies, headers=headers, timeout=30)
58
- response.raise_for_status()
59
-
60
- # Find main JS file URL
61
- # Pattern: <script src="main.{hash}.js" type="module"></script>
62
- js_match = re.search(r'src="(main\.[a-f0-9]+\.js)"', response.text)
63
- if not js_match:
64
- logger.warning("Could not find main JS bundle URL in page")
65
- return None
66
-
67
- # URL is relative to /app/ base path
68
- js_url = f"https://www.mapillary.com/app/{js_match.group(1)}"
69
- logger.debug(f"Found JS bundle: {js_url}")
70
-
71
- # Fetch JS bundle
72
- logger.debug("Fetching JS bundle...")
73
- js_response = requests.get(js_url, timeout=30)
74
- js_response.raise_for_status()
75
-
76
- # Extract token
77
- # Pattern: "MLY|{client_id}|{secret}"
78
- token_match = re.search(r'"(MLY\|[^"]+)"', js_response.text)
79
- if not token_match:
80
- logger.warning("Could not find MLY token in JS bundle")
81
- return None
82
-
83
- token = token_match.group(1)
84
- logger.info(f"Extracted web token: {token[:20]}...")
85
- return token
86
-
87
- except requests.RequestException as e:
88
- logger.error(f"Failed to extract web token: {e}")
89
- return None
90
- except Exception as e:
91
- logger.error(f"Unexpected error extracting web token: {e}")
92
- return None
93
-
94
-
95
- def get_leaderboard(key="global", token=None):
96
- """Get leaderboard data from Mapillary GraphQL API.
97
-
98
- Args:
99
- key: Leaderboard key (e.g., "global", country name, etc.)
100
- token: MLY token (if None, will extract from JS bundle or use fallback)
101
-
102
- Returns:
103
- Dict with leaderboard data, or None on error
104
- """
105
- if token is None:
106
- token = extract_token_from_js()
107
- if token is None:
108
- logger.warning("Failed to extract token, using fallback")
109
- token = FALLBACK_TOKEN
110
-
111
- # GraphQL query for leaderboard (lifetime stats only)
112
- query = """query getUserLeaderboard($key: String!) {
113
- user_leaderboards(key: $key) {
114
- lifetime {
115
- count
116
- user {
117
- id
118
- username
119
- profile_photo_url
120
- __typename
121
- }
122
- __typename
123
- }
124
- __typename
125
- }
126
- }"""
127
-
128
- try:
129
- headers = {
130
- "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:144.0) Gecko/20100101 Firefox/144.0",
131
- "Accept": "*/*",
132
- "Accept-Language": "en-GB,en;q=0.5",
133
- "Referer": "https://www.mapillary.com/",
134
- "content-type": "application/json",
135
- "authorization": f"OAuth {token}",
136
- "Origin": "https://www.mapillary.com",
137
- "Sec-Fetch-Dest": "empty",
138
- "Sec-Fetch-Mode": "cors",
139
- "Sec-Fetch-Site": "same-site",
140
- }
141
-
142
- # Build query params - use quote_via=quote to get %20 instead of +
143
- # Note: both 'doc' and 'query' params seem to be required (from observed curl)
144
- params = {
145
- "doc": query,
146
- "query": query,
147
- "operationName": "getUserLeaderboard",
148
- "variables": json.dumps({"key": key}, separators=(',', ':')),
149
- }
150
-
151
- # Build URL with proper percent encoding (not + for spaces)
152
- # Don't encode parentheses to match curl behavior
153
- query_string = urlencode(params, quote_via=lambda s, safe='', encoding=None, errors=None: quote(s, safe='()!'))
154
- url = f"https://graph.mapillary.com/graphql?{query_string}"
155
-
156
- logger.debug(f"Querying leaderboard for key: {key}")
157
-
158
- response = requests.get(
159
- url,
160
- headers=headers,
161
- timeout=30
162
- )
163
- response.raise_for_status()
164
-
165
- return response.json()
166
-
167
- except requests.RequestException as e:
168
- logger.error(f"Failed to query leaderboard: {e}")
169
- return None
170
- except Exception as e:
171
- logger.error(f"Unexpected error querying leaderboard: {e}")
172
- return None
173
-
174
-
175
- if __name__ == "__main__":
176
- # Test the extraction and leaderboard query
177
- logging.basicConfig(level=logging.DEBUG)
178
-
179
- print("=== Extracting token ===")
180
- token = extract_token_from_js()
181
- if token:
182
- print(f"Success! Token: {token}")
183
- else:
184
- print("Failed to extract token")
185
- print(f"Fallback: {FALLBACK_TOKEN}")
186
- token = FALLBACK_TOKEN
187
-
188
- print("\n=== Querying global leaderboard ===")
189
- data = get_leaderboard("global", token=token)
190
- if data:
191
- print(json.dumps(data, indent=2))
192
- else:
193
- print("Failed to get leaderboard data")