@music-league-eras/local-runner 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -9,7 +9,11 @@ from urllib.parse import urljoin, urlparse
|
|
|
9
9
|
|
|
10
10
|
from playwright.async_api import async_playwright
|
|
11
11
|
|
|
12
|
-
from ..services.viewer import
|
|
12
|
+
from ..services.viewer import (
|
|
13
|
+
extract_viewer_avatar_url,
|
|
14
|
+
extract_viewer_user_id,
|
|
15
|
+
extract_viewer_user_id_from_storage_state,
|
|
16
|
+
)
|
|
13
17
|
|
|
14
18
|
|
|
15
19
|
@dataclass(frozen=True)
|
|
@@ -235,6 +239,8 @@ async def capture_storage_state(
|
|
|
235
239
|
try:
|
|
236
240
|
html = await page.content()
|
|
237
241
|
viewer_user_id = extract_viewer_user_id(html)
|
|
242
|
+
if not viewer_user_id:
|
|
243
|
+
viewer_user_id = extract_viewer_user_id_from_storage_state(storage_state)
|
|
238
244
|
viewer_avatar_url = extract_viewer_avatar_url(html)
|
|
239
245
|
except Exception:
|
|
240
246
|
viewer_user_id = None
|
|
@@ -14,7 +14,7 @@ from urllib.parse import urljoin, urlparse
|
|
|
14
14
|
from bs4 import BeautifulSoup
|
|
15
15
|
from playwright.async_api import async_playwright
|
|
16
16
|
|
|
17
|
-
from .viewer import extract_viewer_user_id
|
|
17
|
+
from .viewer import extract_viewer_user_id, extract_viewer_user_id_from_storage_state
|
|
18
18
|
|
|
19
19
|
COMPLETED_LEAGUES_XHR = "/completed/-/completedLeagues"
|
|
20
20
|
DEFAULT_USER_AGENT = (
|
|
@@ -586,6 +586,15 @@ async def build_manifest(
|
|
|
586
586
|
completed_final_url = page.url
|
|
587
587
|
completed_page_html = await page.content()
|
|
588
588
|
_write_text(out_dir / "completed.page.html", completed_page_html)
|
|
589
|
+
|
|
590
|
+
# The completed leagues list is loaded via htmx; give it a moment to hydrate so our
|
|
591
|
+
# fallback parser can still find league links even if the XHR helpers fail.
|
|
592
|
+
try:
|
|
593
|
+
await page.wait_for_selector(".league-tile, a[href^=\"/l/\"]", timeout=6_000)
|
|
594
|
+
completed_page_html = await page.content()
|
|
595
|
+
_write_text(out_dir / "completed.page.after.html", completed_page_html)
|
|
596
|
+
except Exception:
|
|
597
|
+
pass
|
|
589
598
|
manifest["debug"] = {
|
|
590
599
|
"completed_url": completed_page_url,
|
|
591
600
|
"completed_final_url": completed_final_url,
|
|
@@ -595,6 +604,8 @@ async def build_manifest(
|
|
|
595
604
|
cookie_header = await _cookie_header_from_context(context, base_url) or initial_cookie_header
|
|
596
605
|
|
|
597
606
|
viewer_id = extract_viewer_user_id(completed_page_html)
|
|
607
|
+
if not viewer_id:
|
|
608
|
+
viewer_id = extract_viewer_user_id_from_storage_state(storage_state_payload)
|
|
598
609
|
manifest["viewer"] = {"user_id": viewer_id} if viewer_id else None
|
|
599
610
|
|
|
600
611
|
completed_url = _abs_url(base_url, COMPLETED_LEAGUES_XHR)
|
|
@@ -1,17 +1,28 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import re
|
|
4
|
+
from typing import Any
|
|
4
5
|
from typing import Optional
|
|
5
6
|
|
|
6
7
|
from bs4 import BeautifulSoup
|
|
7
8
|
|
|
8
9
|
USER_HREF_RE = re.compile(r"^/user/([0-9a-f]{32})/?$", re.I)
|
|
9
10
|
VIEW_PROFILE_LABEL = "view profile"
|
|
11
|
+
HEX_32_RE = re.compile(r"\b([0-9a-f]{32})\b", re.I)
|
|
12
|
+
BEACON_USER_ID_RE = re.compile(
|
|
13
|
+
r"Beacon\(\s*['\"]session-data['\"]\s*,\s*\{[^}]*['\"]User ID['\"]\s*:\s*['\"]([0-9a-f]{32})['\"]",
|
|
14
|
+
re.I,
|
|
15
|
+
)
|
|
10
16
|
|
|
11
17
|
|
|
12
18
|
def extract_viewer_user_id(html: str) -> Optional[str]:
|
|
13
19
|
soup = BeautifulSoup(html or "", "html.parser")
|
|
14
20
|
|
|
21
|
+
# Some templates include the user id for support tooling (HelpScout Beacon).
|
|
22
|
+
beacon_match = BEACON_USER_ID_RE.search(html or "")
|
|
23
|
+
if beacon_match:
|
|
24
|
+
return beacon_match.group(1)
|
|
25
|
+
|
|
15
26
|
a_tag = soup.find(
|
|
16
27
|
"a",
|
|
17
28
|
href=lambda h: bool(h and USER_HREF_RE.match(h)),
|
|
@@ -56,3 +67,92 @@ def extract_viewer_avatar_url(html: str) -> Optional[str]:
|
|
|
56
67
|
return src
|
|
57
68
|
|
|
58
69
|
return None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def extract_viewer_user_id_from_storage_state(storage_state: dict[str, Any] | None) -> Optional[str]:
|
|
73
|
+
if not storage_state:
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
def first_hex(text: str | None) -> Optional[str]:
|
|
77
|
+
if not text:
|
|
78
|
+
return None
|
|
79
|
+
match = HEX_32_RE.search(text)
|
|
80
|
+
return match.group(1) if match else None
|
|
81
|
+
|
|
82
|
+
# Prefer localStorage keys that look explicitly user-related.
|
|
83
|
+
origins = storage_state.get("origins") or []
|
|
84
|
+
if isinstance(origins, list):
|
|
85
|
+
for origin in origins:
|
|
86
|
+
if not isinstance(origin, dict):
|
|
87
|
+
continue
|
|
88
|
+
local_storage = origin.get("localStorage") or []
|
|
89
|
+
if not isinstance(local_storage, list):
|
|
90
|
+
continue
|
|
91
|
+
for entry in local_storage:
|
|
92
|
+
if not isinstance(entry, dict):
|
|
93
|
+
continue
|
|
94
|
+
name = str(entry.get("name") or "")
|
|
95
|
+
value = entry.get("value")
|
|
96
|
+
if value is None:
|
|
97
|
+
continue
|
|
98
|
+
value_str = value if isinstance(value, str) else str(value)
|
|
99
|
+
if re.search(r"(user|viewer|account|profile)", name, re.I):
|
|
100
|
+
candidate = first_hex(value_str)
|
|
101
|
+
if candidate:
|
|
102
|
+
return candidate
|
|
103
|
+
|
|
104
|
+
# Try JSON blobs stored as strings.
|
|
105
|
+
if value_str and value_str.lstrip().startswith(("{", "[")):
|
|
106
|
+
candidate = _extract_hex_from_json_like(value_str)
|
|
107
|
+
if candidate:
|
|
108
|
+
return candidate
|
|
109
|
+
|
|
110
|
+
# Next: cookies with user-ish names that directly store a 32-hex id.
|
|
111
|
+
cookies = storage_state.get("cookies") or []
|
|
112
|
+
if isinstance(cookies, list):
|
|
113
|
+
for cookie in cookies:
|
|
114
|
+
if not isinstance(cookie, dict):
|
|
115
|
+
continue
|
|
116
|
+
name = str(cookie.get("name") or "")
|
|
117
|
+
value = str(cookie.get("value") or "")
|
|
118
|
+
if re.search(r"(user|uid)", name, re.I):
|
|
119
|
+
candidate = first_hex(value)
|
|
120
|
+
if candidate:
|
|
121
|
+
return candidate
|
|
122
|
+
|
|
123
|
+
# Last: if there's exactly one unique 32-hex anywhere in localStorage values, use it.
|
|
124
|
+
candidates: set[str] = set()
|
|
125
|
+
if isinstance(origins, list):
|
|
126
|
+
for origin in origins:
|
|
127
|
+
if not isinstance(origin, dict):
|
|
128
|
+
continue
|
|
129
|
+
local_storage = origin.get("localStorage") or []
|
|
130
|
+
if not isinstance(local_storage, list):
|
|
131
|
+
continue
|
|
132
|
+
for entry in local_storage:
|
|
133
|
+
if not isinstance(entry, dict):
|
|
134
|
+
continue
|
|
135
|
+
value = entry.get("value")
|
|
136
|
+
if value is None:
|
|
137
|
+
continue
|
|
138
|
+
value_str = value if isinstance(value, str) else str(value)
|
|
139
|
+
for match in HEX_32_RE.finditer(value_str):
|
|
140
|
+
candidates.add(match.group(1))
|
|
141
|
+
if len(candidates) == 1:
|
|
142
|
+
return next(iter(candidates))
|
|
143
|
+
|
|
144
|
+
return None
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _extract_hex_from_json_like(value: str) -> Optional[str]:
|
|
148
|
+
# Keep it simple: avoid importing json here; we only need a heuristic.
|
|
149
|
+
# Common patterns: {"user_id":"..."} or {"userId":"..."} nested inside a blob.
|
|
150
|
+
for pattern in (
|
|
151
|
+
r"['\"]user_id['\"]\s*:\s*['\"]([0-9a-f]{32})['\"]",
|
|
152
|
+
r"['\"]userId['\"]\s*:\s*['\"]([0-9a-f]{32})['\"]",
|
|
153
|
+
r"['\"]viewer_user_id['\"]\s*:\s*['\"]([0-9a-f]{32})['\"]",
|
|
154
|
+
):
|
|
155
|
+
match = re.search(pattern, value, re.I)
|
|
156
|
+
if match:
|
|
157
|
+
return match.group(1)
|
|
158
|
+
return None
|