PyPI - getnotes-cli - Versions diffs - 0.1.0__py3-none-any.whl - Mend

getnotes-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

getnotes_cli/__init__.py +3 -0
getnotes_cli/auth.py +120 -0
getnotes_cli/cache.py +133 -0
getnotes_cli/cdp.py +299 -0
getnotes_cli/cli.py +723 -0
getnotes_cli/config.py +74 -0
getnotes_cli/downloader.py +334 -0
getnotes_cli/markdown.py +161 -0
getnotes_cli/notebook.py +106 -0
getnotes_cli/notebook_downloader.py +494 -0
getnotes_cli/settings.py +128 -0
getnotes_cli-0.1.0.dist-info/METADATA +242 -0
getnotes_cli-0.1.0.dist-info/RECORD +16 -0
getnotes_cli-0.1.0.dist-info/WHEEL +4 -0
getnotes_cli-0.1.0.dist-info/entry_points.txt +2 -0
getnotes_cli-0.1.0.dist-info/licenses/LICENSE +21 -0

getnotes_cli/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""getnotes-cli — 得到笔记 CLI 下载工具"""
+__version__ = "0.1.0"

getnotes_cli/auth.py ADDED Viewed

@@ -0,0 +1,120 @@
+"""Auth token 管理 — 缓存与刷新 Bearer token"""
+import json
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from getnotes_cli.config import AUTH_CACHE_FILE, CONFIG_DIR, DEFAULT_HEADERS
+@dataclass
+class AuthToken:
+    """存储 Bearer token 及相关 headers"""
+    authorization: str  # "Bearer xxx"
+    csrf_token: str = ""  # Xi-Csrf-Token
+    extra_headers: dict[str, str] = field(default_factory=dict)
+    extracted_at: float = 0.0
+    def to_dict(self) -> dict:
+        return {
+            "authorization": self.authorization,
+            "csrf_token": self.csrf_token,
+            "extra_headers": self.extra_headers,
+            "extracted_at": self.extracted_at,
+        }
+    @classmethod
+    def from_dict(cls, data: dict) -> "AuthToken":
+        return cls(
+            authorization=data["authorization"],
+            csrf_token=data.get("csrf_token", ""),
+            extra_headers=data.get("extra_headers", {}),
+            extracted_at=data.get("extracted_at", 0),
+        )
+    def is_expired(self, max_age_minutes: float = 25) -> bool:
+        """检查 token 是否过期（得到 token 约 30 分钟有效）"""
+        age = time.time() - self.extracted_at
+        return age > (max_age_minutes * 60)
+    def get_headers(self) -> dict[str, str]:
+        """生成完整的请求 headers"""
+        headers = dict(DEFAULT_HEADERS)
+        headers["Authorization"] = self.authorization
+        if self.csrf_token:
+            headers["Xi-Csrf-Token"] = self.csrf_token
+        headers.update(self.extra_headers)
+        return headers
+def load_cached_token() -> AuthToken | None:
+    """从缓存加载 token"""
+    if not AUTH_CACHE_FILE.exists():
+        return None
+    try:
+        data = json.loads(AUTH_CACHE_FILE.read_text(encoding="utf-8"))
+        return AuthToken.from_dict(data)
+    except (json.JSONDecodeError, KeyError, TypeError):
+        return None
+def save_token(token: AuthToken) -> None:
+    """保存 token 到缓存"""
+    CONFIG_DIR.mkdir(parents=True, exist_ok=True)
+    AUTH_CACHE_FILE.write_text(
+        json.dumps(token.to_dict(), ensure_ascii=False, indent=2),
+        encoding="utf-8",
+    )
+def get_or_refresh_token(force_login: bool = False) -> AuthToken:
+    """获取有效的 token，过期则自动刷新。
+    Args:
+        force_login: 强制重新登录
+    Returns:
+        有效的 AuthToken
+    Raises:
+        RuntimeError: 无法获取 token
+    """
+    if not force_login:
+        cached = load_cached_token()
+        if cached and not cached.is_expired():
+            return cached
+        if cached and cached.is_expired():
+            print("⚠️  Token 已过期，需要重新登录...")
+    # 通过 CDP 获取新 token
+    from getnotes_cli.cdp import extract_auth_via_cdp
+    headers = extract_auth_via_cdp()
+    if not headers or "Authorization" not in headers:
+        raise RuntimeError("❌ 登录失败，未能获取 Authorization token")
+    token = AuthToken(
+        authorization=headers["Authorization"],
+        csrf_token=headers.get("Xi-Csrf-Token", ""),
+        extra_headers={
+            k: v for k, v in headers.items()
+            if k not in ("Authorization", "Xi-Csrf-Token")
+        },
+        extracted_at=time.time(),
+    )
+    save_token(token)
+    return token
+def login_with_token(bearer_token: str) -> AuthToken:
+    """手动输入 Bearer token 进行登录"""
+    if not bearer_token.startswith("Bearer "):
+        bearer_token = f"Bearer {bearer_token}"
+    token = AuthToken(
+        authorization=bearer_token,
+        extracted_at=time.time(),
+    )
+    save_token(token)
+    return token

getnotes_cli/cache.py ADDED Viewed

@@ -0,0 +1,133 @@
+"""缓存管理 — 跟踪已下载笔记的版本与状态"""
+import json
+from pathlib import Path
+from getnotes_cli.config import CACHE_MANIFEST_FILE, CONFIG_DIR
+class CacheManager:
+    """管理下载缓存清单"""
+    def __init__(self, output_dir: Path):
+        self.output_dir = output_dir
+        self.cache_path = CONFIG_DIR / CACHE_MANIFEST_FILE
+        self._manifest: dict = {}
+    def load(self) -> dict:
+        """加载缓存清单"""
+        if self.cache_path.exists():
+            try:
+                self._manifest = json.loads(self.cache_path.read_text(encoding="utf-8"))
+            except (json.JSONDecodeError, IOError):
+                print("⚠️  缓存清单损坏，将重新构建。")
+                self._manifest = {}
+        return self._manifest
+    def save(self) -> None:
+        """保存缓存清单"""
+        CONFIG_DIR.mkdir(parents=True, exist_ok=True)
+        self.cache_path.write_text(
+            json.dumps(self._manifest, ensure_ascii=False, indent=2),
+            encoding="utf-8",
+        )
+    def is_cached(self, note: dict) -> bool:
+        """检查笔记是否已缓存且版本未变化"""
+        note_id = note.get("note_id", note.get("id", ""))
+        if note_id not in self._manifest:
+            return False
+        cached = self._manifest[note_id]
+        return (
+            cached.get("version") == note.get("version")
+            and cached.get("updated_at") == note.get("updated_at")
+        )
+    def update(self, note_id: str, info: dict) -> None:
+        """更新缓存条目"""
+        self._manifest[note_id] = info
+    def get(self, note_id: str) -> dict | None:
+        """获取缓存条目"""
+        return self._manifest.get(note_id)
+    @property
+    def count(self) -> int:
+        return len(self._manifest)
+    @property
+    def manifest(self) -> dict:
+        return self._manifest
+    def check(self) -> dict:
+        """检查缓存状态，返回统计信息"""
+        if not self.cache_path.exists():
+            return {"exists": False, "count": 0, "path": str(self.cache_path)}
+        self.load()
+        return {
+            "exists": True,
+            "count": self.count,
+            "path": str(self.cache_path),
+            "notes": {
+                nid: {
+                    "title": info.get("title", "(无标题)"),
+                    "created_at": info.get("created_at", ""),
+                    "folder": info.get("folder_name", ""),
+                }
+                for nid, info in self._manifest.items()
+            },
+        }
+    def rebuild_from_disk(self, notes_dir: Path) -> int:
+        """从磁盘已有文件夹重建缓存清单。
+        扫描 notes_dir 下所有子目录的 note.json，提取 note_id 等信息
+        建立 note_id → folder_name 的映射。
+        Returns:
+            重建的缓存条目数
+        """
+        if not notes_dir.exists():
+            return 0
+        rebuilt = 0
+        for folder in notes_dir.iterdir():
+            if not folder.is_dir():
+                continue
+            json_file = folder / "note.json"
+            if not json_file.exists():
+                continue
+            try:
+                data = json.loads(json_file.read_text(encoding="utf-8"))
+                note_id = data.get("note_id", data.get("id", ""))
+                if not note_id:
+                    continue
+                # 避免覆盖已有缓存条目
+                if note_id in self._manifest:
+                    continue
+                self._manifest[note_id] = {
+                    "version": data.get("version"),
+                    "updated_at": data.get("updated_at", ""),
+                    "folder_name": folder.name,
+                    "title": data.get("title", ""),
+                    "created_at": data.get("created_at", ""),
+                }
+                rebuilt += 1
+            except (json.JSONDecodeError, IOError):
+                continue
+        if rebuilt > 0:
+            self.save()
+            print(f"💾 从磁盘重建缓存: 恢复了 {rebuilt} 条记录")
+        return rebuilt
+    def clear(self) -> int:
+        """清除缓存，返回清除的条目数"""
+        count = 0
+        if self.cache_path.exists():
+            self.load()
+            count = self.count
+            self.cache_path.unlink()
+        self._manifest = {}
+        return count

getnotes_cli/cdp.py ADDED Viewed

@@ -0,0 +1,299 @@
+"""Chrome DevTools Protocol (CDP) 工具 — 用于自动获取 Bearer token。
+通过 CDP 启动 Chrome，打开得到笔记页面，监听网络请求以捕获 Authorization header。
+"""
+import json
+import platform
+import shutil
+import socket
+import subprocess
+import time
+from pathlib import Path
+from typing import Any
+import httpx
+from getnotes_cli.config import CHROME_PROFILE_DIR, LOGIN_URL, API_DOMAINS
+_httpx = httpx.Client(timeout=10)
+# CDP 端口范围
+CDP_PORT_RANGE = range(9222, 9232)
+# ========================================================================
+# Chrome 管理
+# ========================================================================
+def get_chrome_path() -> str | None:
+    """获取 Chrome 可执行文件路径"""
+    system = platform.system()
+    if system == "Darwin":
+        path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
+        return path if Path(path).exists() else None
+    elif system == "Linux":
+        for candidate in ["google-chrome", "google-chrome-stable", "chromium", "chromium-browser"]:
+            if shutil.which(candidate):
+                return candidate
+        return None
+    elif system == "Windows":
+        path = r"C:\Program Files\Google\Chrome\Application\chrome.exe"
+        return path if Path(path).exists() else None
+    return None
+def find_available_port(start: int = 9222, attempts: int = 10) -> int:
+    """查找可用端口"""
+    for offset in range(attempts):
+        port = start + offset
+        try:
+            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+                s.bind(("127.0.0.1", port))
+                return port
+        except OSError:
+            continue
+    raise RuntimeError(f"在 {start}-{start + attempts - 1} 范围内找不到可用端口")
+def find_existing_chrome(port_range: range = CDP_PORT_RANGE) -> tuple[int | None, str | None]:
+    """扫描端口范围，查找已运行的 Chrome 调试实例"""
+    for port in port_range:
+        try:
+            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+                s.bind(("127.0.0.1", port))
+                continue  # 端口空闲，跳过
+        except OSError:
+            pass  # 端口已占用
+        url = get_debugger_url(port, timeout=2)
+        if url:
+            return port, url
+    return None, None
+_chrome_process: subprocess.Popen | None = None
+def launch_chrome(port: int = 9222) -> bool:
+    """启动 Chrome，打开得到笔记页面"""
+    global _chrome_process
+    chrome_path = get_chrome_path()
+    if not chrome_path:
+        return False
+    profile_dir = CHROME_PROFILE_DIR
+    profile_dir.mkdir(parents=True, exist_ok=True)
+    args = [
+        chrome_path,
+        f"--remote-debugging-port={port}",
+        "--no-first-run",
+        "--no-default-browser-check",
+        "--disable-extensions",
+        f"--user-data-dir={profile_dir}",
+        "--remote-allow-origins=*",
+        LOGIN_URL,
+    ]
+    try:
+        _chrome_process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        return True
+    except Exception:
+        return False
+def terminate_chrome() -> bool:
+    """关闭 Chrome"""
+    global _chrome_process
+    if _chrome_process is None:
+        return False
+    try:
+        _chrome_process.terminate()
+        _chrome_process.wait(timeout=5)
+    except Exception:
+        try:
+            _chrome_process.kill()
+        except Exception:
+            pass
+    _chrome_process = None
+    return True
+# ========================================================================
+# CDP 协议
+# ========================================================================
+def get_debugger_url(port: int = 9222, tries: int = 1, timeout: int = 5) -> str | None:
+    """获取 Chrome 调试 WebSocket URL"""
+    for attempt in range(tries):
+        try:
+            resp = _httpx.get(f"http://localhost:{port}/json/version", timeout=timeout)
+            return resp.json().get("webSocketDebuggerUrl")
+        except Exception:
+            if attempt < tries - 1:
+                time.sleep(1)
+    return None
+def execute_cdp_command(ws_url: str, method: str, params: dict | None = None) -> dict:
+    """通过 WebSocket 发送 CDP 命令"""
+    import websocket
+    ws = websocket.create_connection(ws_url, timeout=30, suppress_origin=True)
+    try:
+        command = {"id": 1, "method": method, "params": params or {}}
+        ws.send(json.dumps(command))
+        while True:
+            response = json.loads(ws.recv())
+            if response.get("id") == 1:
+                return response.get("result", {})
+    finally:
+        ws.close()
+def get_current_url(ws_url: str) -> str:
+    """获取当前页面 URL"""
+    execute_cdp_command(ws_url, "Runtime.enable")
+    result = execute_cdp_command(ws_url, "Runtime.evaluate", {"expression": "window.location.href"})
+    return result.get("result", {}).get("value", "")
+def navigate_to_url(ws_url: str, url: str) -> None:
+    """导航到指定 URL"""
+    execute_cdp_command(ws_url, "Page.enable")
+    execute_cdp_command(ws_url, "Page.navigate", {"url": url})
+# ========================================================================
+# Token 提取（核心）
+# ========================================================================
+def _find_biji_page(port: int) -> dict | None:
+    """查找或创建得到笔记页面"""
+    try:
+        resp = _httpx.get(f"http://localhost:{port}/json", timeout=5)
+        pages = resp.json()
+    except Exception:
+        return None
+    # 优先查找已有的 biji.com 页面
+    for page in pages:
+        url = page.get("url", "")
+        if "biji.com" in url:
+            return page
+    # 没有则创建新标签页
+    try:
+        from urllib.parse import quote
+        encoded = quote(LOGIN_URL, safe="")
+        resp = _httpx.put(f"http://localhost:{port}/json/new?{encoded}", timeout=15)
+        if resp.status_code == 200 and resp.text.strip():
+            return resp.json()
+    except Exception:
+        pass
+    return None
+def extract_auth_via_cdp(
+    auto_launch: bool = True,
+    login_timeout: int = 300,
+) -> dict[str, str] | None:
+    """
+    通过 CDP 监听网络请求，提取 Authorization header。
+    流程：
+    1. 启动 Chrome 或连接已有实例
+    2. 打开得到笔记页面
+    3. 等待用户登录
+    4. 监听 API 请求的 Authorization header
+    5. 返回 headers dict
+    Returns:
+        包含 Authorization 和 Xi-Csrf-Token 的 headers dict，失败返回 None
+    """
+    import websocket
+    # 1. 查找或启动 Chrome
+    port, debugger_url = find_existing_chrome()
+    reused = bool(port)
+    if not debugger_url and auto_launch:
+        chrome_path = get_chrome_path()
+        if not chrome_path:
+            raise RuntimeError(
+                "❌ 未找到 Chrome 浏览器。\n"
+                "请安装 Google Chrome，或使用 `getnotes login --token` 手动输入 token。"
+            )
+        port = find_available_port()
+        if not launch_chrome(port):
+            raise RuntimeError("❌ 启动 Chrome 失败")
+        debugger_url = get_debugger_url(port, tries=10)
+    if not debugger_url:
+        raise RuntimeError(f"❌ 无法连接 Chrome（端口 {port}）")
+    # 2. 查找得到笔记页面
+    page = _find_biji_page(port)
+    if not page:
+        raise RuntimeError("❌ 无法打开得到笔记页面")
+    ws_url = page.get("webSocketDebuggerUrl")
+    if not ws_url:
+        raise RuntimeError("❌ 无法获取页面 WebSocket URL")
+    # 3. 通过 CDP 网络监听捕获 Authorization header
+    ws = websocket.create_connection(ws_url, timeout=30, suppress_origin=True)
+    try:
+        # 启用网络监听
+        ws.send(json.dumps({"id": 10, "method": "Network.enable", "params": {}}))
+        # 读取 enable 的响应
+        while True:
+            resp = json.loads(ws.recv())
+            if resp.get("id") == 10:
+                break
+        print("⏳ 等待登录并捕获 API 请求中...")
+        print(f"   请在浏览器中登录 {LOGIN_URL}")
+        if reused:
+            print("   （已连接到现有 Chrome 实例）")
+        print(f"   超时时间: {login_timeout}s\n")
+        start_time = time.time()
+        captured_headers: dict[str, str] = {}
+        while time.time() - start_time < login_timeout:
+            try:
+                ws.settimeout(2.0)
+                raw = ws.recv()
+                event = json.loads(raw)
+            except websocket.WebSocketTimeoutException:
+                continue
+            except Exception:
+                continue
+            # 监听 Network.requestWillBeSent 事件
+            if event.get("method") == "Network.requestWillBeSent":
+                request = event.get("params", {}).get("request", {})
+                url = request.get("url", "")
+                headers = request.get("headers", {})
+                # 检查是否是得到笔记的 API 请求
+                is_target = any(domain in url for domain in API_DOMAINS)
+                if is_target and "Authorization" in headers:
+                    auth_value = headers["Authorization"]
+                    if auth_value.startswith("Bearer "):
+                        captured_headers["Authorization"] = auth_value
+                        # 尝试捕获 CSRF token
+                        for key in ["Xi-Csrf-Token", "X-Appid", "X-Av"]:
+                            if key in headers:
+                                captured_headers[key] = headers[key]
+                        print(f"✅ 成功捕获 Authorization token!")
+                        return captured_headers
+        raise RuntimeError("⏰ 登录超时，未捕获到 API 请求。请重试。")
+    finally:
+        ws.close()
+        if not reused:
+            terminate_chrome()