chzzk-archiver 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chzzk_archiver-0.2.0/PKG-INFO +112 -0
- chzzk_archiver-0.2.0/README.md +93 -0
- chzzk_archiver-0.2.0/pyproject.toml +28 -0
- chzzk_archiver-0.2.0/src/chzzk_archiver/__init__.py +7 -0
- chzzk_archiver-0.2.0/src/chzzk_archiver/__main__.py +4 -0
- chzzk_archiver-0.2.0/src/chzzk_archiver/chat.py +276 -0
- chzzk_archiver-0.2.0/src/chzzk_archiver/chzzk.py +272 -0
- chzzk_archiver-0.2.0/src/chzzk_archiver/cli.py +290 -0
- chzzk_archiver-0.2.0/src/chzzk_archiver/finalize.py +46 -0
- chzzk_archiver-0.2.0/src/chzzk_archiver/hls.py +209 -0
- chzzk_archiver-0.2.0/src/chzzk_archiver/http_client.py +132 -0
- chzzk_archiver-0.2.0/src/chzzk_archiver/recorder.py +744 -0
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: chzzk-archiver
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
License-Expression: Unlicense
|
|
5
|
+
Classifier: Development Status :: 3 - Alpha
|
|
6
|
+
Classifier: Environment :: Console
|
|
7
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Classifier: Programming Language :: Python
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
13
|
+
Classifier: Topic :: Utilities
|
|
14
|
+
Requires-Dist: websockets>=12.0
|
|
15
|
+
Maintainer: Mina Her
|
|
16
|
+
Maintainer-email: Mina Her <minacle@live.com>
|
|
17
|
+
Requires-Python: >=3.14
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# CHZZK Archiver
|
|
21
|
+
|
|
22
|
+
CLI for archiving CHZZK live streams and replay videos with chat.
|
|
23
|
+
|
|
24
|
+
It accepts live URLs, replay URLs, channel IDs, and video numbers, saves the original HLS capture locally, records chat as JSONL, and can remux the capture to MP4 with `ffmpeg`.
|
|
25
|
+
|
|
26
|
+
## Requirements
|
|
27
|
+
|
|
28
|
+
- Python 3.14+
|
|
29
|
+
- [uv](https://docs.astral.sh/uv/)
|
|
30
|
+
- `ffmpeg` on `PATH` if you want `archive.mp4`
|
|
31
|
+
|
|
32
|
+
## Install
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
uv sync
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Quick start
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
# Inspect a live stream or replay and list available qualities
|
|
42
|
+
uv run chzzk-archiver probe https://chzzk.naver.com/live/<channel-id>
|
|
43
|
+
uv run chzzk-archiver probe https://chzzk.naver.com/video/<video-no>
|
|
44
|
+
|
|
45
|
+
# Record a live stream
|
|
46
|
+
uv run chzzk-archiver record https://chzzk.naver.com/live/<channel-id> --quality best
|
|
47
|
+
|
|
48
|
+
# Start recording at a local time (live only)
|
|
49
|
+
uv run chzzk-archiver record https://chzzk.naver.com/live/<channel-id> --start-at 22:00
|
|
50
|
+
|
|
51
|
+
# Record a replay
|
|
52
|
+
uv run chzzk-archiver record https://chzzk.naver.com/video/<video-no> --quality 1080p
|
|
53
|
+
|
|
54
|
+
# Remux an existing capture later
|
|
55
|
+
uv run chzzk-archiver remux archives/<session-dir> --overwrite
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
You can also pass identifiers directly:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
uv run chzzk-archiver record <channel-id>
|
|
62
|
+
uv run chzzk-archiver record <video-no>
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Commands
|
|
66
|
+
|
|
67
|
+
| Command | Purpose |
|
|
68
|
+
| --- | --- |
|
|
69
|
+
| `probe <target>` | Show metadata and available qualities before recording. |
|
|
70
|
+
| `record <target>` | Archive video and chat into a new session directory. |
|
|
71
|
+
| `remux <archive-dir>` | Create `archive.mp4` from an existing `capture.m3u8`. |
|
|
72
|
+
|
|
73
|
+
## Useful options
|
|
74
|
+
|
|
75
|
+
- `--quality best|worst|1080p|720p|...`
|
|
76
|
+
- `--cookies <cookie-header-or-file>` for streams that require login
|
|
77
|
+
- `--no-chat` to skip chat recording
|
|
78
|
+
- `--no-remux` to skip MP4 creation
|
|
79
|
+
- `--start-at HH:MM` to wait for a local time before live recording
|
|
80
|
+
- `--mode hls|llhls|auto` and `--include-parts` for LL-HLS captures
|
|
81
|
+
- `--max-seconds <n>` to stop after a fixed duration
|
|
82
|
+
- `--log-level DEBUG` for detailed download logs
|
|
83
|
+
|
|
84
|
+
Example with cookies:
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
uv run chzzk-archiver --cookies cookies.txt record https://chzzk.naver.com/live/<channel-id>
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Output
|
|
91
|
+
|
|
92
|
+
Each `record` run creates a directory under `archives/` containing:
|
|
93
|
+
|
|
94
|
+
- `raw/`: downloaded init files and media segments
|
|
95
|
+
- `capture.m3u8`: local playlist pointing to `raw/`
|
|
96
|
+
- `archive.mp4`: remuxed MP4 when `ffmpeg` is available and remuxing is enabled
|
|
97
|
+
- `chat.jsonl`: recorded live or replay chat when available
|
|
98
|
+
- `metadata.json`: stream metadata and selected options
|
|
99
|
+
|
|
100
|
+
During `record`, progress logs go to stderr and the final result is printed as JSON to stdout.
|
|
101
|
+
|
|
102
|
+
## Behavior notes
|
|
103
|
+
|
|
104
|
+
- Live recording retries automatically until playable media is available.
|
|
105
|
+
- Press `Ctrl+C` during `record` to stop gracefully; remuxing is still attempted unless `--no-remux` is set.
|
|
106
|
+
- `--start-at` only works for live targets, not replay videos.
|
|
107
|
+
|
|
108
|
+
## Limitations and safety
|
|
109
|
+
|
|
110
|
+
- This project uses unofficial CHZZK endpoints, so API behavior may change.
|
|
111
|
+
- Use it only for streams or videos you are allowed to access.
|
|
112
|
+
- It does not bypass DRM, paywalls, region restrictions, or other access controls.
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# CHZZK Archiver
|
|
2
|
+
|
|
3
|
+
CLI for archiving CHZZK live streams and replay videos with chat.
|
|
4
|
+
|
|
5
|
+
It accepts live URLs, replay URLs, channel IDs, and video numbers, saves the original HLS capture locally, records chat as JSONL, and can remux the capture to MP4 with `ffmpeg`.
|
|
6
|
+
|
|
7
|
+
## Requirements
|
|
8
|
+
|
|
9
|
+
- Python 3.14+
|
|
10
|
+
- [uv](https://docs.astral.sh/uv/)
|
|
11
|
+
- `ffmpeg` on `PATH` if you want `archive.mp4`
|
|
12
|
+
|
|
13
|
+
## Install
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
uv sync
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Quick start
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
# Inspect a live stream or replay and list available qualities
|
|
23
|
+
uv run chzzk-archiver probe https://chzzk.naver.com/live/<channel-id>
|
|
24
|
+
uv run chzzk-archiver probe https://chzzk.naver.com/video/<video-no>
|
|
25
|
+
|
|
26
|
+
# Record a live stream
|
|
27
|
+
uv run chzzk-archiver record https://chzzk.naver.com/live/<channel-id> --quality best
|
|
28
|
+
|
|
29
|
+
# Start recording at a local time (live only)
|
|
30
|
+
uv run chzzk-archiver record https://chzzk.naver.com/live/<channel-id> --start-at 22:00
|
|
31
|
+
|
|
32
|
+
# Record a replay
|
|
33
|
+
uv run chzzk-archiver record https://chzzk.naver.com/video/<video-no> --quality 1080p
|
|
34
|
+
|
|
35
|
+
# Remux an existing capture later
|
|
36
|
+
uv run chzzk-archiver remux archives/<session-dir> --overwrite
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
You can also pass identifiers directly:
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
uv run chzzk-archiver record <channel-id>
|
|
43
|
+
uv run chzzk-archiver record <video-no>
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Commands
|
|
47
|
+
|
|
48
|
+
| Command | Purpose |
|
|
49
|
+
| --- | --- |
|
|
50
|
+
| `probe <target>` | Show metadata and available qualities before recording. |
|
|
51
|
+
| `record <target>` | Archive video and chat into a new session directory. |
|
|
52
|
+
| `remux <archive-dir>` | Create `archive.mp4` from an existing `capture.m3u8`. |
|
|
53
|
+
|
|
54
|
+
## Useful options
|
|
55
|
+
|
|
56
|
+
- `--quality best|worst|1080p|720p|...`
|
|
57
|
+
- `--cookies <cookie-header-or-file>` for streams that require login
|
|
58
|
+
- `--no-chat` to skip chat recording
|
|
59
|
+
- `--no-remux` to skip MP4 creation
|
|
60
|
+
- `--start-at HH:MM` to wait for a local time before live recording
|
|
61
|
+
- `--mode hls|llhls|auto` and `--include-parts` for LL-HLS captures
|
|
62
|
+
- `--max-seconds <n>` to stop after a fixed duration
|
|
63
|
+
- `--log-level DEBUG` for detailed download logs
|
|
64
|
+
|
|
65
|
+
Example with cookies:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
uv run chzzk-archiver --cookies cookies.txt record https://chzzk.naver.com/live/<channel-id>
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Output
|
|
72
|
+
|
|
73
|
+
Each `record` run creates a directory under `archives/` containing:
|
|
74
|
+
|
|
75
|
+
- `raw/`: downloaded init files and media segments
|
|
76
|
+
- `capture.m3u8`: local playlist pointing to `raw/`
|
|
77
|
+
- `archive.mp4`: remuxed MP4 when `ffmpeg` is available and remuxing is enabled
|
|
78
|
+
- `chat.jsonl`: recorded live or replay chat when available
|
|
79
|
+
- `metadata.json`: stream metadata and selected options
|
|
80
|
+
|
|
81
|
+
During `record`, progress logs go to stderr and the final result is printed as JSON to stdout.
|
|
82
|
+
|
|
83
|
+
## Behavior notes
|
|
84
|
+
|
|
85
|
+
- Live recording retries automatically until playable media is available.
|
|
86
|
+
- Press `Ctrl+C` during `record` to stop gracefully; remuxing is still attempted unless `--no-remux` is set.
|
|
87
|
+
- `--start-at` only works for live targets, not replay videos.
|
|
88
|
+
|
|
89
|
+
## Limitations and safety
|
|
90
|
+
|
|
91
|
+
- This project uses unofficial CHZZK endpoints, so API behavior may change.
|
|
92
|
+
- Use it only for streams or videos you are allowed to access.
|
|
93
|
+
- It does not bypass DRM, paywalls, region restrictions, or other access controls.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "chzzk-archiver"
|
|
3
|
+
version = "0.2.0"
|
|
4
|
+
readme = "README.md"
|
|
5
|
+
requires-python = ">=3.14"
|
|
6
|
+
dependencies = [
|
|
7
|
+
"websockets>=12.0",
|
|
8
|
+
]
|
|
9
|
+
maintainers = [{ name = "Mina Her", email = "minacle@live.com" }]
|
|
10
|
+
license = "Unlicense"
|
|
11
|
+
classifiers = [
|
|
12
|
+
"Development Status :: 3 - Alpha",
|
|
13
|
+
"Environment :: Console",
|
|
14
|
+
"Intended Audience :: End Users/Desktop",
|
|
15
|
+
"Operating System :: OS Independent",
|
|
16
|
+
"Programming Language :: Python",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
19
|
+
"Programming Language :: Python :: 3.14",
|
|
20
|
+
"Topic :: Utilities",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[project.scripts]
|
|
24
|
+
chzzk-archiver = "chzzk_archiver:main"
|
|
25
|
+
|
|
26
|
+
[build-system]
|
|
27
|
+
requires = ["uv_build>=0.11.8,<0.12.0"]
|
|
28
|
+
build-backend = "uv_build"
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
import time
|
|
6
|
+
import urllib.parse
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from threading import Event
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
from .http_client import HttpClient
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
CHAT_ACCESS_TOKEN_URL = "https://comm-api.game.naver.com/nng_main/v1/chats/access-token"
|
|
16
|
+
VIDEO_CHATS_URL = "https://api.chzzk.naver.com/service/v1/videos/{video_no}/chats"
|
|
17
|
+
|
|
18
|
+
CMD_PING = 0
|
|
19
|
+
CMD_PONG = 10000
|
|
20
|
+
CMD_CONNECT = 100
|
|
21
|
+
CMD_CONNECTED = 10100
|
|
22
|
+
CMD_REQUEST_RECENT_CHAT = 5101
|
|
23
|
+
CMD_RECENT_CHAT = 15101
|
|
24
|
+
CMD_CHAT = 93101
|
|
25
|
+
CMD_DONATION = 93102
|
|
26
|
+
CMD_NOTICE = 94010
|
|
27
|
+
CMD_BLIND = 94006
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ChatError(RuntimeError):
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass(frozen=True)
|
|
35
|
+
class ChatAccess:
|
|
36
|
+
chat_channel_id: str
|
|
37
|
+
access_token: str
|
|
38
|
+
extra_token: str | None = None
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def chat_socket_url(chat_channel_id: str) -> str:
|
|
42
|
+
server_index = sum(ord(char) for char in chat_channel_id) % 9 + 1
|
|
43
|
+
return f"wss://kr-ss{server_index}.chat.naver.com/chat"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _parse_json_field(value: Any) -> Any:
|
|
47
|
+
if not isinstance(value, str):
|
|
48
|
+
return value
|
|
49
|
+
try:
|
|
50
|
+
return json.loads(value)
|
|
51
|
+
except json.JSONDecodeError:
|
|
52
|
+
return value
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def normalize_chat_frame(raw: str) -> list[dict[str, Any]]:
|
|
56
|
+
try:
|
|
57
|
+
frame = json.loads(raw)
|
|
58
|
+
except json.JSONDecodeError as exc:
|
|
59
|
+
raise ChatError("Chat frame is not valid JSON") from exc
|
|
60
|
+
|
|
61
|
+
cmd = frame.get("cmd")
|
|
62
|
+
body = frame.get("bdy")
|
|
63
|
+
if cmd not in {CMD_CHAT, CMD_RECENT_CHAT, CMD_DONATION, CMD_NOTICE, CMD_BLIND}:
|
|
64
|
+
return []
|
|
65
|
+
|
|
66
|
+
if isinstance(body, dict) and isinstance(body.get("messageList"), list):
|
|
67
|
+
messages = body["messageList"]
|
|
68
|
+
elif isinstance(body, list):
|
|
69
|
+
messages = body
|
|
70
|
+
elif body:
|
|
71
|
+
messages = [body]
|
|
72
|
+
else:
|
|
73
|
+
messages = []
|
|
74
|
+
|
|
75
|
+
normalized: list[dict[str, Any]] = []
|
|
76
|
+
for message in messages:
|
|
77
|
+
if not isinstance(message, dict):
|
|
78
|
+
continue
|
|
79
|
+
profile = _parse_json_field(message.get("profile")) or {}
|
|
80
|
+
extras = _parse_json_field(message.get("extras")) or {}
|
|
81
|
+
normalized.append(
|
|
82
|
+
{
|
|
83
|
+
"cmd": cmd,
|
|
84
|
+
"message": message.get("msg") or message.get("message") or message.get("content") or "",
|
|
85
|
+
"message_type": message.get("msgTypeCode") or message.get("messageTypeCode"),
|
|
86
|
+
"message_time": message.get("msgTime") or message.get("messageTime"),
|
|
87
|
+
"member_count": message.get("mbrCnt") or message.get("memberCount"),
|
|
88
|
+
"profile": profile,
|
|
89
|
+
"extras": extras,
|
|
90
|
+
"hidden": (message.get("msgStatusType") or message.get("messageStatusType")) == "HIDDEN",
|
|
91
|
+
"received_at": int(time.time() * 1000),
|
|
92
|
+
}
|
|
93
|
+
)
|
|
94
|
+
return normalized
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def normalize_replay_chat_message(message: dict[str, Any]) -> dict[str, Any]:
|
|
98
|
+
profile = _parse_json_field(message.get("profile")) or {}
|
|
99
|
+
extras = _parse_json_field(message.get("extras")) or {}
|
|
100
|
+
return {
|
|
101
|
+
"message": message.get("msg") or message.get("message") or message.get("content") or "",
|
|
102
|
+
"message_type": message.get("msgTypeCode") or message.get("messageTypeCode"),
|
|
103
|
+
"message_time": message.get("msgTime") or message.get("messageTime"),
|
|
104
|
+
"player_message_time": message.get("playerMessageTime"),
|
|
105
|
+
"member_count": message.get("mbrCnt") or message.get("memberCount"),
|
|
106
|
+
"profile": profile,
|
|
107
|
+
"extras": extras,
|
|
108
|
+
"hidden": (message.get("msgStatusType") or message.get("messageStatusType")) == "HIDDEN",
|
|
109
|
+
"received_at": int(time.time() * 1000),
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class ChatClient:
|
|
114
|
+
def __init__(self, http: HttpClient) -> None:
|
|
115
|
+
self.http = http
|
|
116
|
+
|
|
117
|
+
def fetch_access_token(self, chat_channel_id: str, chat_type: str = "STREAMING") -> ChatAccess:
|
|
118
|
+
query = urllib.parse.urlencode({"channelId": chat_channel_id, "chatType": chat_type})
|
|
119
|
+
payload = self.http.get_json(
|
|
120
|
+
f"{CHAT_ACCESS_TOKEN_URL}?{query}",
|
|
121
|
+
headers={
|
|
122
|
+
"Accept": "application/json, text/plain, */*",
|
|
123
|
+
"Origin": "https://chzzk.naver.com",
|
|
124
|
+
"Referer": "https://chzzk.naver.com/",
|
|
125
|
+
},
|
|
126
|
+
)
|
|
127
|
+
if not isinstance(payload, dict) or not isinstance(payload.get("content"), dict):
|
|
128
|
+
raise ChatError("Chat access-token response is not valid")
|
|
129
|
+
content = payload["content"]
|
|
130
|
+
access_token = content.get("accessToken")
|
|
131
|
+
if not isinstance(access_token, str) or not access_token:
|
|
132
|
+
raise ChatError("Chat access-token response does not contain an accessToken")
|
|
133
|
+
extra_token = content.get("extraToken") if isinstance(content.get("extraToken"), str) else None
|
|
134
|
+
return ChatAccess(chat_channel_id=chat_channel_id, access_token=access_token, extra_token=extra_token)
|
|
135
|
+
|
|
136
|
+
def fetch_video_chats(
|
|
137
|
+
self,
|
|
138
|
+
video_no: int,
|
|
139
|
+
*,
|
|
140
|
+
player_message_time: int = 1000,
|
|
141
|
+
previous_video_chat_size: int = 50,
|
|
142
|
+
) -> dict[str, Any]:
|
|
143
|
+
query = urllib.parse.urlencode(
|
|
144
|
+
{
|
|
145
|
+
"playerMessageTime": player_message_time,
|
|
146
|
+
"previousVideoChatSize": previous_video_chat_size,
|
|
147
|
+
}
|
|
148
|
+
)
|
|
149
|
+
payload = self.http.get_json(
|
|
150
|
+
f"{VIDEO_CHATS_URL.format(video_no=video_no)}?{query}",
|
|
151
|
+
headers={
|
|
152
|
+
"Accept": "application/json, text/plain, */*",
|
|
153
|
+
"Origin": "https://chzzk.naver.com",
|
|
154
|
+
"Referer": f"https://chzzk.naver.com/video/{video_no}",
|
|
155
|
+
},
|
|
156
|
+
)
|
|
157
|
+
if not isinstance(payload, dict) or not isinstance(payload.get("content"), dict):
|
|
158
|
+
raise ChatError("Video chats response is not valid")
|
|
159
|
+
return payload["content"]
|
|
160
|
+
|
|
161
|
+
def write_video_chats(
|
|
162
|
+
self,
|
|
163
|
+
video_no: int,
|
|
164
|
+
output_path: Path,
|
|
165
|
+
*,
|
|
166
|
+
page_size: int = 50,
|
|
167
|
+
initial_player_message_time: int = 1000,
|
|
168
|
+
) -> int:
|
|
169
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
170
|
+
player_message_time = initial_player_message_time
|
|
171
|
+
written = 0
|
|
172
|
+
seen: set[tuple[Any, Any, Any, Any]] = set()
|
|
173
|
+
|
|
174
|
+
with output_path.open("a", encoding="utf-8") as output:
|
|
175
|
+
while True:
|
|
176
|
+
content = self.fetch_video_chats(
|
|
177
|
+
video_no,
|
|
178
|
+
player_message_time=player_message_time,
|
|
179
|
+
previous_video_chat_size=page_size,
|
|
180
|
+
)
|
|
181
|
+
rows: list[dict[str, Any]] = []
|
|
182
|
+
for key in ("previousVideoChats", "videoChats"):
|
|
183
|
+
value = content.get(key)
|
|
184
|
+
if isinstance(value, list):
|
|
185
|
+
rows.extend(message for message in value if isinstance(message, dict))
|
|
186
|
+
for row in rows:
|
|
187
|
+
normalized = normalize_replay_chat_message(row)
|
|
188
|
+
dedupe_key = (
|
|
189
|
+
normalized.get("message_time"),
|
|
190
|
+
normalized.get("player_message_time"),
|
|
191
|
+
row.get("userIdHash"),
|
|
192
|
+
normalized.get("message"),
|
|
193
|
+
)
|
|
194
|
+
if dedupe_key in seen:
|
|
195
|
+
continue
|
|
196
|
+
seen.add(dedupe_key)
|
|
197
|
+
output.write(json.dumps(normalized, ensure_ascii=False, sort_keys=True) + "\n")
|
|
198
|
+
written += 1
|
|
199
|
+
|
|
200
|
+
next_player_message_time = content.get("nextPlayerMessageTime")
|
|
201
|
+
if not rows or not isinstance(next_player_message_time, int) or next_player_message_time <= player_message_time:
|
|
202
|
+
break
|
|
203
|
+
player_message_time = next_player_message_time
|
|
204
|
+
|
|
205
|
+
return written
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
class ChatWebSocketRecorder:
|
|
209
|
+
def __init__(self, access: ChatAccess, output_path: Path, *, recent_count: int = 50) -> None:
|
|
210
|
+
self.access = access
|
|
211
|
+
self.output_path = output_path
|
|
212
|
+
self.recent_count = recent_count
|
|
213
|
+
|
|
214
|
+
async def record_until_stopped(self, stop_event: Event) -> None:
|
|
215
|
+
try:
|
|
216
|
+
import websockets
|
|
217
|
+
except ImportError as exc:
|
|
218
|
+
raise ChatError("The websockets package is required for chat recording") from exc
|
|
219
|
+
|
|
220
|
+
self.output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
221
|
+
async with websockets.connect(chat_socket_url(self.access.chat_channel_id), ping_interval=None) as socket:
|
|
222
|
+
await socket.send(
|
|
223
|
+
json.dumps(
|
|
224
|
+
{
|
|
225
|
+
"ver": "2",
|
|
226
|
+
"cmd": CMD_CONNECT,
|
|
227
|
+
"svcid": "game",
|
|
228
|
+
"cid": self.access.chat_channel_id,
|
|
229
|
+
"tid": 1,
|
|
230
|
+
"bdy": {
|
|
231
|
+
"uid": None,
|
|
232
|
+
"devType": 2001,
|
|
233
|
+
"accTkn": self.access.access_token,
|
|
234
|
+
"accessToken": self.access.access_token,
|
|
235
|
+
"auth": "READ",
|
|
236
|
+
},
|
|
237
|
+
},
|
|
238
|
+
ensure_ascii=False,
|
|
239
|
+
)
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
sid: str | None = None
|
|
243
|
+
with self.output_path.open("a", encoding="utf-8") as output:
|
|
244
|
+
while not stop_event.is_set():
|
|
245
|
+
try:
|
|
246
|
+
raw = await asyncio.wait_for(socket.recv(), timeout=1.0)
|
|
247
|
+
except asyncio.TimeoutError:
|
|
248
|
+
continue
|
|
249
|
+
if not isinstance(raw, str):
|
|
250
|
+
continue
|
|
251
|
+
frame = json.loads(raw)
|
|
252
|
+
cmd = frame.get("cmd")
|
|
253
|
+
if cmd == CMD_PING:
|
|
254
|
+
await socket.send(json.dumps({"ver": "2", "cmd": CMD_PONG}))
|
|
255
|
+
continue
|
|
256
|
+
if cmd == CMD_CONNECTED:
|
|
257
|
+
sid = (frame.get("bdy") or {}).get("sid")
|
|
258
|
+
if sid:
|
|
259
|
+
await socket.send(
|
|
260
|
+
json.dumps(
|
|
261
|
+
{
|
|
262
|
+
"ver": "2",
|
|
263
|
+
"cmd": CMD_REQUEST_RECENT_CHAT,
|
|
264
|
+
"svcid": "game",
|
|
265
|
+
"cid": self.access.chat_channel_id,
|
|
266
|
+
"sid": sid,
|
|
267
|
+
"tid": 2,
|
|
268
|
+
"bdy": {"recentMessageCount": self.recent_count},
|
|
269
|
+
},
|
|
270
|
+
ensure_ascii=False,
|
|
271
|
+
)
|
|
272
|
+
)
|
|
273
|
+
continue
|
|
274
|
+
for message in normalize_chat_frame(raw):
|
|
275
|
+
output.write(json.dumps(message, ensure_ascii=False, sort_keys=True) + "\n")
|
|
276
|
+
output.flush()
|