getnotes-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ """getnotes-cli — 得到笔记 CLI 下载工具"""
2
+
3
+ __version__ = "0.1.0"
getnotes_cli/auth.py ADDED
@@ -0,0 +1,120 @@
1
+ """Auth token 管理 — 缓存与刷新 Bearer token"""
2
+
3
+ import json
4
+ import time
5
+ from dataclasses import dataclass, field
6
+ from pathlib import Path
7
+
8
+ from getnotes_cli.config import AUTH_CACHE_FILE, CONFIG_DIR, DEFAULT_HEADERS
9
+
10
+
11
+ @dataclass
12
+ class AuthToken:
13
+ """存储 Bearer token 及相关 headers"""
14
+ authorization: str # "Bearer xxx"
15
+ csrf_token: str = "" # Xi-Csrf-Token
16
+ extra_headers: dict[str, str] = field(default_factory=dict)
17
+ extracted_at: float = 0.0
18
+
19
+ def to_dict(self) -> dict:
20
+ return {
21
+ "authorization": self.authorization,
22
+ "csrf_token": self.csrf_token,
23
+ "extra_headers": self.extra_headers,
24
+ "extracted_at": self.extracted_at,
25
+ }
26
+
27
+ @classmethod
28
+ def from_dict(cls, data: dict) -> "AuthToken":
29
+ return cls(
30
+ authorization=data["authorization"],
31
+ csrf_token=data.get("csrf_token", ""),
32
+ extra_headers=data.get("extra_headers", {}),
33
+ extracted_at=data.get("extracted_at", 0),
34
+ )
35
+
36
+ def is_expired(self, max_age_minutes: float = 25) -> bool:
37
+ """检查 token 是否过期(得到 token 约 30 分钟有效)"""
38
+ age = time.time() - self.extracted_at
39
+ return age > (max_age_minutes * 60)
40
+
41
+ def get_headers(self) -> dict[str, str]:
42
+ """生成完整的请求 headers"""
43
+ headers = dict(DEFAULT_HEADERS)
44
+ headers["Authorization"] = self.authorization
45
+ if self.csrf_token:
46
+ headers["Xi-Csrf-Token"] = self.csrf_token
47
+ headers.update(self.extra_headers)
48
+ return headers
49
+
50
+
51
+ def load_cached_token() -> AuthToken | None:
52
+ """从缓存加载 token"""
53
+ if not AUTH_CACHE_FILE.exists():
54
+ return None
55
+ try:
56
+ data = json.loads(AUTH_CACHE_FILE.read_text(encoding="utf-8"))
57
+ return AuthToken.from_dict(data)
58
+ except (json.JSONDecodeError, KeyError, TypeError):
59
+ return None
60
+
61
+
62
+ def save_token(token: AuthToken) -> None:
63
+ """保存 token 到缓存"""
64
+ CONFIG_DIR.mkdir(parents=True, exist_ok=True)
65
+ AUTH_CACHE_FILE.write_text(
66
+ json.dumps(token.to_dict(), ensure_ascii=False, indent=2),
67
+ encoding="utf-8",
68
+ )
69
+
70
+
71
+ def get_or_refresh_token(force_login: bool = False) -> AuthToken:
72
+ """获取有效的 token,过期则自动刷新。
73
+
74
+ Args:
75
+ force_login: 强制重新登录
76
+
77
+ Returns:
78
+ 有效的 AuthToken
79
+
80
+ Raises:
81
+ RuntimeError: 无法获取 token
82
+ """
83
+ if not force_login:
84
+ cached = load_cached_token()
85
+ if cached and not cached.is_expired():
86
+ return cached
87
+ if cached and cached.is_expired():
88
+ print("⚠️ Token 已过期,需要重新登录...")
89
+
90
+ # 通过 CDP 获取新 token
91
+ from getnotes_cli.cdp import extract_auth_via_cdp
92
+
93
+ headers = extract_auth_via_cdp()
94
+ if not headers or "Authorization" not in headers:
95
+ raise RuntimeError("❌ 登录失败,未能获取 Authorization token")
96
+
97
+ token = AuthToken(
98
+ authorization=headers["Authorization"],
99
+ csrf_token=headers.get("Xi-Csrf-Token", ""),
100
+ extra_headers={
101
+ k: v for k, v in headers.items()
102
+ if k not in ("Authorization", "Xi-Csrf-Token")
103
+ },
104
+ extracted_at=time.time(),
105
+ )
106
+ save_token(token)
107
+ return token
108
+
109
+
110
+ def login_with_token(bearer_token: str) -> AuthToken:
111
+ """手动输入 Bearer token 进行登录"""
112
+ if not bearer_token.startswith("Bearer "):
113
+ bearer_token = f"Bearer {bearer_token}"
114
+
115
+ token = AuthToken(
116
+ authorization=bearer_token,
117
+ extracted_at=time.time(),
118
+ )
119
+ save_token(token)
120
+ return token
getnotes_cli/cache.py ADDED
@@ -0,0 +1,133 @@
1
+ """缓存管理 — 跟踪已下载笔记的版本与状态"""
2
+
3
+ import json
4
+ from pathlib import Path
5
+
6
+ from getnotes_cli.config import CACHE_MANIFEST_FILE, CONFIG_DIR
7
+
8
+
9
+ class CacheManager:
10
+ """管理下载缓存清单"""
11
+
12
+ def __init__(self, output_dir: Path):
13
+ self.output_dir = output_dir
14
+ self.cache_path = CONFIG_DIR / CACHE_MANIFEST_FILE
15
+ self._manifest: dict = {}
16
+
17
+ def load(self) -> dict:
18
+ """加载缓存清单"""
19
+ if self.cache_path.exists():
20
+ try:
21
+ self._manifest = json.loads(self.cache_path.read_text(encoding="utf-8"))
22
+ except (json.JSONDecodeError, IOError):
23
+ print("⚠️ 缓存清单损坏,将重新构建。")
24
+ self._manifest = {}
25
+ return self._manifest
26
+
27
+ def save(self) -> None:
28
+ """保存缓存清单"""
29
+ CONFIG_DIR.mkdir(parents=True, exist_ok=True)
30
+ self.cache_path.write_text(
31
+ json.dumps(self._manifest, ensure_ascii=False, indent=2),
32
+ encoding="utf-8",
33
+ )
34
+
35
+ def is_cached(self, note: dict) -> bool:
36
+ """检查笔记是否已缓存且版本未变化"""
37
+ note_id = note.get("note_id", note.get("id", ""))
38
+ if note_id not in self._manifest:
39
+ return False
40
+ cached = self._manifest[note_id]
41
+ return (
42
+ cached.get("version") == note.get("version")
43
+ and cached.get("updated_at") == note.get("updated_at")
44
+ )
45
+
46
+ def update(self, note_id: str, info: dict) -> None:
47
+ """更新缓存条目"""
48
+ self._manifest[note_id] = info
49
+
50
+ def get(self, note_id: str) -> dict | None:
51
+ """获取缓存条目"""
52
+ return self._manifest.get(note_id)
53
+
54
+ @property
55
+ def count(self) -> int:
56
+ return len(self._manifest)
57
+
58
+ @property
59
+ def manifest(self) -> dict:
60
+ return self._manifest
61
+
62
+ def check(self) -> dict:
63
+ """检查缓存状态,返回统计信息"""
64
+ if not self.cache_path.exists():
65
+ return {"exists": False, "count": 0, "path": str(self.cache_path)}
66
+ self.load()
67
+ return {
68
+ "exists": True,
69
+ "count": self.count,
70
+ "path": str(self.cache_path),
71
+ "notes": {
72
+ nid: {
73
+ "title": info.get("title", "(无标题)"),
74
+ "created_at": info.get("created_at", ""),
75
+ "folder": info.get("folder_name", ""),
76
+ }
77
+ for nid, info in self._manifest.items()
78
+ },
79
+ }
80
+
81
+ def rebuild_from_disk(self, notes_dir: Path) -> int:
82
+ """从磁盘已有文件夹重建缓存清单。
83
+
84
+ 扫描 notes_dir 下所有子目录的 note.json,提取 note_id 等信息
85
+ 建立 note_id → folder_name 的映射。
86
+
87
+ Returns:
88
+ 重建的缓存条目数
89
+ """
90
+ if not notes_dir.exists():
91
+ return 0
92
+
93
+ rebuilt = 0
94
+ for folder in notes_dir.iterdir():
95
+ if not folder.is_dir():
96
+ continue
97
+ json_file = folder / "note.json"
98
+ if not json_file.exists():
99
+ continue
100
+ try:
101
+ data = json.loads(json_file.read_text(encoding="utf-8"))
102
+ note_id = data.get("note_id", data.get("id", ""))
103
+ if not note_id:
104
+ continue
105
+ # 避免覆盖已有缓存条目
106
+ if note_id in self._manifest:
107
+ continue
108
+ self._manifest[note_id] = {
109
+ "version": data.get("version"),
110
+ "updated_at": data.get("updated_at", ""),
111
+ "folder_name": folder.name,
112
+ "title": data.get("title", ""),
113
+ "created_at": data.get("created_at", ""),
114
+ }
115
+ rebuilt += 1
116
+ except (json.JSONDecodeError, IOError):
117
+ continue
118
+
119
+ if rebuilt > 0:
120
+ self.save()
121
+ print(f"💾 从磁盘重建缓存: 恢复了 {rebuilt} 条记录")
122
+
123
+ return rebuilt
124
+
125
+ def clear(self) -> int:
126
+ """清除缓存,返回清除的条目数"""
127
+ count = 0
128
+ if self.cache_path.exists():
129
+ self.load()
130
+ count = self.count
131
+ self.cache_path.unlink()
132
+ self._manifest = {}
133
+ return count
getnotes_cli/cdp.py ADDED
@@ -0,0 +1,299 @@
1
+ """Chrome DevTools Protocol (CDP) 工具 — 用于自动获取 Bearer token。
2
+
3
+ 通过 CDP 启动 Chrome,打开得到笔记页面,监听网络请求以捕获 Authorization header。
4
+ """
5
+
6
+ import json
7
+ import platform
8
+ import shutil
9
+ import socket
10
+ import subprocess
11
+ import time
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ import httpx
16
+
17
+ from getnotes_cli.config import CHROME_PROFILE_DIR, LOGIN_URL, API_DOMAINS
18
+
19
+ _httpx = httpx.Client(timeout=10)
20
+
21
+ # CDP 端口范围
22
+ CDP_PORT_RANGE = range(9222, 9232)
23
+
24
+
25
+ # ========================================================================
26
+ # Chrome 管理
27
+ # ========================================================================
28
+
29
+
30
+ def get_chrome_path() -> str | None:
31
+ """获取 Chrome 可执行文件路径"""
32
+ system = platform.system()
33
+ if system == "Darwin":
34
+ path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
35
+ return path if Path(path).exists() else None
36
+ elif system == "Linux":
37
+ for candidate in ["google-chrome", "google-chrome-stable", "chromium", "chromium-browser"]:
38
+ if shutil.which(candidate):
39
+ return candidate
40
+ return None
41
+ elif system == "Windows":
42
+ path = r"C:\Program Files\Google\Chrome\Application\chrome.exe"
43
+ return path if Path(path).exists() else None
44
+ return None
45
+
46
+
47
+ def find_available_port(start: int = 9222, attempts: int = 10) -> int:
48
+ """查找可用端口"""
49
+ for offset in range(attempts):
50
+ port = start + offset
51
+ try:
52
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
53
+ s.bind(("127.0.0.1", port))
54
+ return port
55
+ except OSError:
56
+ continue
57
+ raise RuntimeError(f"在 {start}-{start + attempts - 1} 范围内找不到可用端口")
58
+
59
+
60
+ def find_existing_chrome(port_range: range = CDP_PORT_RANGE) -> tuple[int | None, str | None]:
61
+ """扫描端口范围,查找已运行的 Chrome 调试实例"""
62
+ for port in port_range:
63
+ try:
64
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
65
+ s.bind(("127.0.0.1", port))
66
+ continue # 端口空闲,跳过
67
+ except OSError:
68
+ pass # 端口已占用
69
+ url = get_debugger_url(port, timeout=2)
70
+ if url:
71
+ return port, url
72
+ return None, None
73
+
74
+
75
+ _chrome_process: subprocess.Popen | None = None
76
+
77
+
78
+ def launch_chrome(port: int = 9222) -> bool:
79
+ """启动 Chrome,打开得到笔记页面"""
80
+ global _chrome_process
81
+ chrome_path = get_chrome_path()
82
+ if not chrome_path:
83
+ return False
84
+
85
+ profile_dir = CHROME_PROFILE_DIR
86
+ profile_dir.mkdir(parents=True, exist_ok=True)
87
+
88
+ args = [
89
+ chrome_path,
90
+ f"--remote-debugging-port={port}",
91
+ "--no-first-run",
92
+ "--no-default-browser-check",
93
+ "--disable-extensions",
94
+ f"--user-data-dir={profile_dir}",
95
+ "--remote-allow-origins=*",
96
+ LOGIN_URL,
97
+ ]
98
+ try:
99
+ _chrome_process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
100
+ return True
101
+ except Exception:
102
+ return False
103
+
104
+
105
+ def terminate_chrome() -> bool:
106
+ """关闭 Chrome"""
107
+ global _chrome_process
108
+ if _chrome_process is None:
109
+ return False
110
+ try:
111
+ _chrome_process.terminate()
112
+ _chrome_process.wait(timeout=5)
113
+ except Exception:
114
+ try:
115
+ _chrome_process.kill()
116
+ except Exception:
117
+ pass
118
+ _chrome_process = None
119
+ return True
120
+
121
+
122
+ # ========================================================================
123
+ # CDP 协议
124
+ # ========================================================================
125
+
126
+
127
+ def get_debugger_url(port: int = 9222, tries: int = 1, timeout: int = 5) -> str | None:
128
+ """获取 Chrome 调试 WebSocket URL"""
129
+ for attempt in range(tries):
130
+ try:
131
+ resp = _httpx.get(f"http://localhost:{port}/json/version", timeout=timeout)
132
+ return resp.json().get("webSocketDebuggerUrl")
133
+ except Exception:
134
+ if attempt < tries - 1:
135
+ time.sleep(1)
136
+ return None
137
+
138
+
139
+ def execute_cdp_command(ws_url: str, method: str, params: dict | None = None) -> dict:
140
+ """通过 WebSocket 发送 CDP 命令"""
141
+ import websocket
142
+ ws = websocket.create_connection(ws_url, timeout=30, suppress_origin=True)
143
+ try:
144
+ command = {"id": 1, "method": method, "params": params or {}}
145
+ ws.send(json.dumps(command))
146
+ while True:
147
+ response = json.loads(ws.recv())
148
+ if response.get("id") == 1:
149
+ return response.get("result", {})
150
+ finally:
151
+ ws.close()
152
+
153
+
154
+ def get_current_url(ws_url: str) -> str:
155
+ """获取当前页面 URL"""
156
+ execute_cdp_command(ws_url, "Runtime.enable")
157
+ result = execute_cdp_command(ws_url, "Runtime.evaluate", {"expression": "window.location.href"})
158
+ return result.get("result", {}).get("value", "")
159
+
160
+
161
+ def navigate_to_url(ws_url: str, url: str) -> None:
162
+ """导航到指定 URL"""
163
+ execute_cdp_command(ws_url, "Page.enable")
164
+ execute_cdp_command(ws_url, "Page.navigate", {"url": url})
165
+
166
+
167
+ # ========================================================================
168
+ # Token 提取(核心)
169
+ # ========================================================================
170
+
171
+
172
+ def _find_biji_page(port: int) -> dict | None:
173
+ """查找或创建得到笔记页面"""
174
+ try:
175
+ resp = _httpx.get(f"http://localhost:{port}/json", timeout=5)
176
+ pages = resp.json()
177
+ except Exception:
178
+ return None
179
+
180
+ # 优先查找已有的 biji.com 页面
181
+ for page in pages:
182
+ url = page.get("url", "")
183
+ if "biji.com" in url:
184
+ return page
185
+
186
+ # 没有则创建新标签页
187
+ try:
188
+ from urllib.parse import quote
189
+ encoded = quote(LOGIN_URL, safe="")
190
+ resp = _httpx.put(f"http://localhost:{port}/json/new?{encoded}", timeout=15)
191
+ if resp.status_code == 200 and resp.text.strip():
192
+ return resp.json()
193
+ except Exception:
194
+ pass
195
+ return None
196
+
197
+
198
+ def extract_auth_via_cdp(
199
+ auto_launch: bool = True,
200
+ login_timeout: int = 300,
201
+ ) -> dict[str, str] | None:
202
+ """
203
+ 通过 CDP 监听网络请求,提取 Authorization header。
204
+
205
+ 流程:
206
+ 1. 启动 Chrome 或连接已有实例
207
+ 2. 打开得到笔记页面
208
+ 3. 等待用户登录
209
+ 4. 监听 API 请求的 Authorization header
210
+ 5. 返回 headers dict
211
+
212
+ Returns:
213
+ 包含 Authorization 和 Xi-Csrf-Token 的 headers dict,失败返回 None
214
+ """
215
+ import websocket
216
+
217
+ # 1. 查找或启动 Chrome
218
+ port, debugger_url = find_existing_chrome()
219
+ reused = bool(port)
220
+
221
+ if not debugger_url and auto_launch:
222
+ chrome_path = get_chrome_path()
223
+ if not chrome_path:
224
+ raise RuntimeError(
225
+ "❌ 未找到 Chrome 浏览器。\n"
226
+ "请安装 Google Chrome,或使用 `getnotes login --token` 手动输入 token。"
227
+ )
228
+ port = find_available_port()
229
+ if not launch_chrome(port):
230
+ raise RuntimeError("❌ 启动 Chrome 失败")
231
+ debugger_url = get_debugger_url(port, tries=10)
232
+
233
+ if not debugger_url:
234
+ raise RuntimeError(f"❌ 无法连接 Chrome(端口 {port})")
235
+
236
+ # 2. 查找得到笔记页面
237
+ page = _find_biji_page(port)
238
+ if not page:
239
+ raise RuntimeError("❌ 无法打开得到笔记页面")
240
+
241
+ ws_url = page.get("webSocketDebuggerUrl")
242
+ if not ws_url:
243
+ raise RuntimeError("❌ 无法获取页面 WebSocket URL")
244
+
245
+ # 3. 通过 CDP 网络监听捕获 Authorization header
246
+ ws = websocket.create_connection(ws_url, timeout=30, suppress_origin=True)
247
+ try:
248
+ # 启用网络监听
249
+ ws.send(json.dumps({"id": 10, "method": "Network.enable", "params": {}}))
250
+ # 读取 enable 的响应
251
+ while True:
252
+ resp = json.loads(ws.recv())
253
+ if resp.get("id") == 10:
254
+ break
255
+
256
+ print("⏳ 等待登录并捕获 API 请求中...")
257
+ print(f" 请在浏览器中登录 {LOGIN_URL}")
258
+ if reused:
259
+ print(" (已连接到现有 Chrome 实例)")
260
+ print(f" 超时时间: {login_timeout}s\n")
261
+
262
+ start_time = time.time()
263
+ captured_headers: dict[str, str] = {}
264
+
265
+ while time.time() - start_time < login_timeout:
266
+ try:
267
+ ws.settimeout(2.0)
268
+ raw = ws.recv()
269
+ event = json.loads(raw)
270
+ except websocket.WebSocketTimeoutException:
271
+ continue
272
+ except Exception:
273
+ continue
274
+
275
+ # 监听 Network.requestWillBeSent 事件
276
+ if event.get("method") == "Network.requestWillBeSent":
277
+ request = event.get("params", {}).get("request", {})
278
+ url = request.get("url", "")
279
+ headers = request.get("headers", {})
280
+
281
+ # 检查是否是得到笔记的 API 请求
282
+ is_target = any(domain in url for domain in API_DOMAINS)
283
+ if is_target and "Authorization" in headers:
284
+ auth_value = headers["Authorization"]
285
+ if auth_value.startswith("Bearer "):
286
+ captured_headers["Authorization"] = auth_value
287
+ # 尝试捕获 CSRF token
288
+ for key in ["Xi-Csrf-Token", "X-Appid", "X-Av"]:
289
+ if key in headers:
290
+ captured_headers[key] = headers[key]
291
+ print(f"✅ 成功捕获 Authorization token!")
292
+ return captured_headers
293
+
294
+ raise RuntimeError("⏰ 登录超时,未捕获到 API 请求。请重试。")
295
+
296
+ finally:
297
+ ws.close()
298
+ if not reused:
299
+ terminate_chrome()