douyin-cli 5.3.260515__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- douyin_cli-5.3.260515/PKG-INFO +90 -0
- douyin_cli-5.3.260515/README.md +75 -0
- douyin_cli-5.3.260515/backend/__init__.py +1 -0
- douyin_cli-5.3.260515/backend/cli.py +491 -0
- douyin_cli-5.3.260515/backend/constants.py +38 -0
- douyin_cli-5.3.260515/backend/lib/__init__.py +27 -0
- douyin_cli-5.3.260515/backend/lib/cookies.py +205 -0
- douyin_cli-5.3.260515/backend/lib/douyin/__init__.py +55 -0
- douyin_cli-5.3.260515/backend/lib/douyin/client.py +230 -0
- douyin_cli-5.3.260515/backend/lib/douyin/crawler.py +410 -0
- douyin_cli-5.3.260515/backend/lib/douyin/js/douyin.js +434 -0
- douyin_cli-5.3.260515/backend/lib/douyin/openapi.py +117 -0
- douyin_cli-5.3.260515/backend/lib/douyin/parser.py +299 -0
- douyin_cli-5.3.260515/backend/lib/douyin/request.py +294 -0
- douyin_cli-5.3.260515/backend/lib/douyin/target.py +182 -0
- douyin_cli-5.3.260515/backend/lib/douyin/types.py +186 -0
- douyin_cli-5.3.260515/backend/lib/download.py +40 -0
- douyin_cli-5.3.260515/backend/lib/exceptions.py +18 -0
- douyin_cli-5.3.260515/backend/settings.py +189 -0
- douyin_cli-5.3.260515/backend/utils/__init__.py +25 -0
- douyin_cli-5.3.260515/backend/utils/paths.py +39 -0
- douyin_cli-5.3.260515/backend/utils/text.py +231 -0
- douyin_cli-5.3.260515/pyproject.toml +87 -0
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: douyin-cli
|
|
3
|
+
Version: 5.3.260515
|
|
4
|
+
Summary: 抖音的非官方命令行工具,由erma0的douyin-crawler修改而来。
|
|
5
|
+
Author: LIghtJUNction, erma0
|
|
6
|
+
Author-email: LIghtJUNction <lightjuncion.me@gmail.com>, erma0 <me@erma0.cn>
|
|
7
|
+
License: AGPL-3.0-or-later
|
|
8
|
+
Requires-Dist: click>=8.3.1
|
|
9
|
+
Requires-Dist: exejs>=0.0.7
|
|
10
|
+
Requires-Dist: loguru>=0.7.3
|
|
11
|
+
Requires-Dist: niquests>=3.10
|
|
12
|
+
Requires-Dist: ujson>=5.11.0
|
|
13
|
+
Requires-Python: >=3.13
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
|
|
16
|
+

|
|
17
|
+
|
|
18
|
+
# ✨douyin-cli
|
|
19
|
+
|
|
20
|
+
> ❤️[开源不易,欢迎star⭐](#star-history)
|
|
21
|
+
|
|
22
|
+
## 📢声明
|
|
23
|
+
|
|
24
|
+
> 本项目是面向抖音开放平台官方 OpenAPI 的命令行工具,同时保留部分公开网页数据处理能力,仅用于测试和学习研究,禁止用于商业用途或任何非法用途。
|
|
25
|
+
>
|
|
26
|
+
> 任何用户直接或间接使用、传播本仓库内容时责任自负,本仓库的贡献者不对该等行为产生的任何后果负责。
|
|
27
|
+
>
|
|
28
|
+
> **如果相关方认为该项目的代码可能涉嫌侵犯其权利,请及时联系我删除相关代码**。
|
|
29
|
+
>
|
|
30
|
+
> 使用本仓库的内容即表示您同意本免责声明的所有条款和条件。如果你不接受以上的免责声明,请立即停止使用本项目。
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
## 🍬功能特性
|
|
35
|
+
|
|
36
|
+
- ✅ **官方 OpenAPI**:支持通过 `douyin api` 调用抖音开放平台官方接口
|
|
37
|
+
- ✅ **Token 获取**:支持 `client_token` 与 OAuth `access_token` 获取流程
|
|
38
|
+
- ✅ **通用请求**:支持任意官方 OpenAPI 路径、查询参数、JSON body、表单和自定义请求头
|
|
39
|
+
- ✅ **环境变量**:支持 `DOUYIN_CLIENT_KEY`、`DOUYIN_CLIENT_SECRET`、`DOUYIN_ACCESS_TOKEN`
|
|
40
|
+
- ✅ **命令行安装**:通过 `uv tool install .` 安装为本机 `douyin` 命令
|
|
41
|
+
- ✅ **兼容能力**:保留旧版网页数据命令,适合已有脚本迁移
|
|
42
|
+
|
|
43
|
+
## 🚀快速开始
|
|
44
|
+
|
|
45
|
+
### 环境要求
|
|
46
|
+
|
|
47
|
+
- Python 3.13
|
|
48
|
+
- uv 0.9+
|
|
49
|
+
- 抖音开放平台应用的 `client_key` / `client_secret`
|
|
50
|
+
- Node.js(仅旧版网页数据命令需要)
|
|
51
|
+
- aria2c(可选,仅旧版下载命令需要)
|
|
52
|
+
|
|
53
|
+
### 安装命令
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
uv tool install .
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### 官方 OpenAPI
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
# 获取 client_token
|
|
63
|
+
douyin api client-token \
|
|
64
|
+
--client-key "$DOUYIN_CLIENT_KEY" \
|
|
65
|
+
--client-secret "$DOUYIN_CLIENT_SECRET"
|
|
66
|
+
|
|
67
|
+
# 用 OAuth code 换 access_token
|
|
68
|
+
douyin api access-token \
|
|
69
|
+
--client-key "$DOUYIN_CLIENT_KEY" \
|
|
70
|
+
--client-secret "$DOUYIN_CLIENT_SECRET" \
|
|
71
|
+
--code "授权码"
|
|
72
|
+
|
|
73
|
+
# 调用任意官方 OpenAPI 路径
|
|
74
|
+
douyin api request GET /oauth/userinfo/ \
|
|
75
|
+
--token "$DOUYIN_ACCESS_TOKEN" \
|
|
76
|
+
--param open_id=xxx
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
📖 详细使用说明请查看 [USAGE.md](USAGE.md)
|
|
80
|
+
|
|
81
|
+
## 📊 技术栈
|
|
82
|
+
|
|
83
|
+
- **运行时**: Python 3.13
|
|
84
|
+
- **命令行**: Click
|
|
85
|
+
- **官方接口**: 抖音开放平台 OpenAPI
|
|
86
|
+
- **HTTP 客户端**: niquests
|
|
87
|
+
|
|
88
|
+
## Star History
|
|
89
|
+
|
|
90
|
+
[](https://star-history.com/#erma0/douyin&Date)
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+

|
|
2
|
+
|
|
3
|
+
# ✨douyin-cli
|
|
4
|
+
|
|
5
|
+
> ❤️[开源不易,欢迎star⭐](#star-history)
|
|
6
|
+
|
|
7
|
+
## 📢声明
|
|
8
|
+
|
|
9
|
+
> 本项目是面向抖音开放平台官方 OpenAPI 的命令行工具,同时保留部分公开网页数据处理能力,仅用于测试和学习研究,禁止用于商业用途或任何非法用途。
|
|
10
|
+
>
|
|
11
|
+
> 任何用户直接或间接使用、传播本仓库内容时责任自负,本仓库的贡献者不对该等行为产生的任何后果负责。
|
|
12
|
+
>
|
|
13
|
+
> **如果相关方认为该项目的代码可能涉嫌侵犯其权利,请及时联系我删除相关代码**。
|
|
14
|
+
>
|
|
15
|
+
> 使用本仓库的内容即表示您同意本免责声明的所有条款和条件。如果你不接受以上的免责声明,请立即停止使用本项目。
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## 🍬功能特性
|
|
20
|
+
|
|
21
|
+
- ✅ **官方 OpenAPI**:支持通过 `douyin api` 调用抖音开放平台官方接口
|
|
22
|
+
- ✅ **Token 获取**:支持 `client_token` 与 OAuth `access_token` 获取流程
|
|
23
|
+
- ✅ **通用请求**:支持任意官方 OpenAPI 路径、查询参数、JSON body、表单和自定义请求头
|
|
24
|
+
- ✅ **环境变量**:支持 `DOUYIN_CLIENT_KEY`、`DOUYIN_CLIENT_SECRET`、`DOUYIN_ACCESS_TOKEN`
|
|
25
|
+
- ✅ **命令行安装**:通过 `uv tool install .` 安装为本机 `douyin` 命令
|
|
26
|
+
- ✅ **兼容能力**:保留旧版网页数据命令,适合已有脚本迁移
|
|
27
|
+
|
|
28
|
+
## 🚀快速开始
|
|
29
|
+
|
|
30
|
+
### 环境要求
|
|
31
|
+
|
|
32
|
+
- Python 3.13
|
|
33
|
+
- uv 0.9+
|
|
34
|
+
- 抖音开放平台应用的 `client_key` / `client_secret`
|
|
35
|
+
- Node.js(仅旧版网页数据命令需要)
|
|
36
|
+
- aria2c(可选,仅旧版下载命令需要)
|
|
37
|
+
|
|
38
|
+
### 安装命令
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
uv tool install .
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### 官方 OpenAPI
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
# 获取 client_token
|
|
48
|
+
douyin api client-token \
|
|
49
|
+
--client-key "$DOUYIN_CLIENT_KEY" \
|
|
50
|
+
--client-secret "$DOUYIN_CLIENT_SECRET"
|
|
51
|
+
|
|
52
|
+
# 用 OAuth code 换 access_token
|
|
53
|
+
douyin api access-token \
|
|
54
|
+
--client-key "$DOUYIN_CLIENT_KEY" \
|
|
55
|
+
--client-secret "$DOUYIN_CLIENT_SECRET" \
|
|
56
|
+
--code "授权码"
|
|
57
|
+
|
|
58
|
+
# 调用任意官方 OpenAPI 路径
|
|
59
|
+
douyin api request GET /oauth/userinfo/ \
|
|
60
|
+
--token "$DOUYIN_ACCESS_TOKEN" \
|
|
61
|
+
--param open_id=xxx
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
📖 详细使用说明请查看 [USAGE.md](USAGE.md)
|
|
65
|
+
|
|
66
|
+
## 📊 技术栈
|
|
67
|
+
|
|
68
|
+
- **运行时**: Python 3.13
|
|
69
|
+
- **命令行**: Click
|
|
70
|
+
- **官方接口**: 抖音开放平台 OpenAPI
|
|
71
|
+
- **HTTP 客户端**: niquests
|
|
72
|
+
|
|
73
|
+
## Star History
|
|
74
|
+
|
|
75
|
+
[](https://star-history.com/#erma0/douyin&Date)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""DouyinCrawler backend package."""
|
|
@@ -0,0 +1,491 @@
|
|
|
1
|
+
"""Command line interface for DouyinCrawler."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import traceback
|
|
6
|
+
from collections.abc import Iterator
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from json import JSONDecodeError
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
import click
|
|
12
|
+
import ujson as json
|
|
13
|
+
from loguru import logger
|
|
14
|
+
|
|
15
|
+
from backend.constants import APP_VERSION, SETTINGS_FILE
|
|
16
|
+
from backend.lib.cookies import CookieManager
|
|
17
|
+
from backend.lib.douyin import Douyin
|
|
18
|
+
from backend.lib.douyin.openapi import DouyinOpenAPIClient, DouyinOpenAPIError
|
|
19
|
+
from backend.lib.download import download
|
|
20
|
+
from backend.settings import settings
|
|
21
|
+
|
|
22
|
+
ACCOUNT_ONLY_TYPES = {"favorite", "collection", "following", "follower"}
|
|
23
|
+
NO_DOWNLOAD_TYPES = {"following", "follower"}
|
|
24
|
+
|
|
25
|
+
BANNER = rf"""
|
|
26
|
+
____ _ ____ _
|
|
27
|
+
| _ \ ___ _ _ _ _(_)_ __ / ___|_ __ __ ___ _| | ___ _ __
|
|
28
|
+
| | | |/ _ \| | | | | | | | '_ \ | | | '__/ _` \ \ /\ / / |/ _ \ '__|
|
|
29
|
+
| |_| | (_) | |_| | |_| | | | | | | |___| | | (_| |\ V V /| | __/ |
|
|
30
|
+
|____/ \___/ \__,_|\__, |_|_| |_| \____|_| \__,_| \_/\_/ |_|\___|_|
|
|
31
|
+
|___/
|
|
32
|
+
V{APP_VERSION}
|
|
33
|
+
Github: https://github.com/erma0/douyin
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass(frozen=True)
|
|
38
|
+
class CrawlOptions:
|
|
39
|
+
"""Normalized options used by one or more crawl tasks."""
|
|
40
|
+
|
|
41
|
+
limit: int
|
|
42
|
+
no_download: bool
|
|
43
|
+
crawl_type: str
|
|
44
|
+
output_path: str
|
|
45
|
+
cookie: str
|
|
46
|
+
filters: dict[str, str]
|
|
47
|
+
download_title: bool
|
|
48
|
+
download_cover: bool
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@click.group(invoke_without_command=True)
|
|
52
|
+
@click.pass_context
|
|
53
|
+
@click.option(
|
|
54
|
+
"-u",
|
|
55
|
+
"--urls",
|
|
56
|
+
type=click.STRING,
|
|
57
|
+
multiple=True,
|
|
58
|
+
help="作品/账号/话题/音乐等类型的URL链接/ID或搜索关键词,也可输入文件路径(文件内一行一个),可多次输入。",
|
|
59
|
+
)
|
|
60
|
+
@click.option(
|
|
61
|
+
"-l",
|
|
62
|
+
"--limit",
|
|
63
|
+
type=click.INT,
|
|
64
|
+
default=0,
|
|
65
|
+
help="限制最大采集数量,默认不限制(0表示不限制)",
|
|
66
|
+
)
|
|
67
|
+
@click.option(
|
|
68
|
+
"--no-download",
|
|
69
|
+
is_flag=True,
|
|
70
|
+
help="不下载文件,仅采集数据",
|
|
71
|
+
)
|
|
72
|
+
@click.option(
|
|
73
|
+
"-t",
|
|
74
|
+
"--type",
|
|
75
|
+
"crawl_type",
|
|
76
|
+
type=click.Choice(
|
|
77
|
+
[
|
|
78
|
+
"post",
|
|
79
|
+
"favorite",
|
|
80
|
+
"music",
|
|
81
|
+
"hashtag",
|
|
82
|
+
"search",
|
|
83
|
+
"following",
|
|
84
|
+
"follower",
|
|
85
|
+
"collection",
|
|
86
|
+
"mix",
|
|
87
|
+
"aweme",
|
|
88
|
+
],
|
|
89
|
+
case_sensitive=False,
|
|
90
|
+
),
|
|
91
|
+
default="post",
|
|
92
|
+
help="采集类型,默认为post(主页作品)。支持:post/favorite/music/hashtag/search/following/follower/collection/mix/aweme",
|
|
93
|
+
)
|
|
94
|
+
@click.option(
|
|
95
|
+
"-p",
|
|
96
|
+
"--path",
|
|
97
|
+
"output_path",
|
|
98
|
+
type=click.STRING,
|
|
99
|
+
default="下载",
|
|
100
|
+
help="下载文件夹路径,默认为[下载]",
|
|
101
|
+
)
|
|
102
|
+
@click.option(
|
|
103
|
+
"-c",
|
|
104
|
+
"--cookie",
|
|
105
|
+
type=click.STRING,
|
|
106
|
+
help=f"已登录账号的cookie,可填写在 {SETTINGS_FILE} 中",
|
|
107
|
+
)
|
|
108
|
+
@click.option(
|
|
109
|
+
"--sort-type",
|
|
110
|
+
type=click.Choice(["0", "1", "2"], case_sensitive=False),
|
|
111
|
+
help="搜索排序(仅search类型):0=综合,1=最多点赞,2=最新",
|
|
112
|
+
)
|
|
113
|
+
@click.option(
|
|
114
|
+
"--publish-time",
|
|
115
|
+
type=click.Choice(["0", "1", "7", "180"], case_sensitive=False),
|
|
116
|
+
help="发布时间(仅search类型):0=不限,1=一天内,7=一周内,180=半年内",
|
|
117
|
+
)
|
|
118
|
+
@click.option(
|
|
119
|
+
"--filter-duration",
|
|
120
|
+
type=click.Choice(["", "0-1", "1-5", "5-10000"], case_sensitive=False),
|
|
121
|
+
help="视频时长(仅search类型):空=不限,0-1=1分钟以下,1-5=1-5分钟,5-10000=5分钟以上",
|
|
122
|
+
)
|
|
123
|
+
@click.option(
|
|
124
|
+
"--download-title",
|
|
125
|
+
is_flag=True,
|
|
126
|
+
help="下载标题文本文件",
|
|
127
|
+
)
|
|
128
|
+
@click.option(
|
|
129
|
+
"--download-cover",
|
|
130
|
+
is_flag=True,
|
|
131
|
+
help="下载封面图片",
|
|
132
|
+
)
|
|
133
|
+
def main(
|
|
134
|
+
ctx: click.Context,
|
|
135
|
+
urls: tuple[str, ...],
|
|
136
|
+
limit: int,
|
|
137
|
+
no_download: bool,
|
|
138
|
+
crawl_type: str,
|
|
139
|
+
output_path: str,
|
|
140
|
+
cookie: str | None,
|
|
141
|
+
sort_type: str | None,
|
|
142
|
+
publish_time: str | None,
|
|
143
|
+
filter_duration: str | None,
|
|
144
|
+
download_title: bool,
|
|
145
|
+
download_cover: bool,
|
|
146
|
+
) -> None:
|
|
147
|
+
"""抖音数据采集命令行工具."""
|
|
148
|
+
if ctx.invoked_subcommand is not None:
|
|
149
|
+
return
|
|
150
|
+
|
|
151
|
+
click.echo(BANNER)
|
|
152
|
+
|
|
153
|
+
cookie_value = load_cookie(cookie)
|
|
154
|
+
if not cookie_value or not validate_cookie(cookie_value):
|
|
155
|
+
return
|
|
156
|
+
|
|
157
|
+
targets = resolve_targets(urls, crawl_type)
|
|
158
|
+
if targets is None:
|
|
159
|
+
return
|
|
160
|
+
|
|
161
|
+
options = CrawlOptions(
|
|
162
|
+
limit=limit,
|
|
163
|
+
no_download=no_download,
|
|
164
|
+
crawl_type=crawl_type,
|
|
165
|
+
output_path=output_path,
|
|
166
|
+
cookie=cookie_value,
|
|
167
|
+
filters=build_filters(sort_type, publish_time, filter_duration),
|
|
168
|
+
download_title=download_title,
|
|
169
|
+
download_cover=download_cover,
|
|
170
|
+
)
|
|
171
|
+
run_targets(targets, options)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
@main.group()
|
|
175
|
+
def api() -> None:
|
|
176
|
+
"""调用抖音开放平台官方 OpenAPI."""
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
@api.command("client-token")
|
|
180
|
+
@click.option("--client-key", envvar="DOUYIN_CLIENT_KEY", required=True)
|
|
181
|
+
@click.option("--client-secret", envvar="DOUYIN_CLIENT_SECRET", required=True)
|
|
182
|
+
def api_client_token(client_key: str, client_secret: str) -> None:
|
|
183
|
+
"""获取 client_token."""
|
|
184
|
+
with DouyinOpenAPIClient() as client:
|
|
185
|
+
echo_json(client.client_token(client_key, client_secret))
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
@api.command("access-token")
|
|
189
|
+
@click.option("--client-key", envvar="DOUYIN_CLIENT_KEY", required=True)
|
|
190
|
+
@click.option("--client-secret", envvar="DOUYIN_CLIENT_SECRET", required=True)
|
|
191
|
+
@click.option("--code", required=True, help="OAuth 授权码")
|
|
192
|
+
def api_access_token(client_key: str, client_secret: str, code: str) -> None:
|
|
193
|
+
"""用 OAuth code 换取 access_token."""
|
|
194
|
+
with DouyinOpenAPIClient() as client:
|
|
195
|
+
echo_json(client.access_token(client_key, client_secret, code))
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
@api.command("request")
|
|
199
|
+
@click.argument("method")
|
|
200
|
+
@click.argument("path")
|
|
201
|
+
@click.option("--token", envvar="DOUYIN_ACCESS_TOKEN", help="access_token/client_token")
|
|
202
|
+
@click.option("--param", "params", multiple=True, help="查询参数,格式 key=value")
|
|
203
|
+
@click.option("--json", "json_text", help="JSON 请求体")
|
|
204
|
+
@click.option("--form", "forms", multiple=True, help="表单参数,格式 key=value")
|
|
205
|
+
@click.option("--header", "headers", multiple=True, help="额外请求头,格式 key=value")
|
|
206
|
+
def api_request(
|
|
207
|
+
method: str,
|
|
208
|
+
path: str,
|
|
209
|
+
token: str | None,
|
|
210
|
+
params: tuple[str, ...],
|
|
211
|
+
json_text: str | None,
|
|
212
|
+
forms: tuple[str, ...],
|
|
213
|
+
headers: tuple[str, ...],
|
|
214
|
+
) -> None:
|
|
215
|
+
"""调用任意官方 OpenAPI 路径."""
|
|
216
|
+
try:
|
|
217
|
+
json_body = parse_json_body(json_text)
|
|
218
|
+
with DouyinOpenAPIClient() as client:
|
|
219
|
+
echo_json(
|
|
220
|
+
client.request(
|
|
221
|
+
method,
|
|
222
|
+
path,
|
|
223
|
+
token=token,
|
|
224
|
+
params=parse_key_values(params),
|
|
225
|
+
json_body=json_body,
|
|
226
|
+
form=parse_key_values(forms),
|
|
227
|
+
headers=parse_key_values(headers),
|
|
228
|
+
),
|
|
229
|
+
)
|
|
230
|
+
except (DouyinOpenAPIError, ValueError) as exc:
|
|
231
|
+
raise click.ClickException(str(exc)) from exc
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def build_filters(
|
|
235
|
+
sort_type: str | None,
|
|
236
|
+
publish_time: str | None,
|
|
237
|
+
filter_duration: str | None,
|
|
238
|
+
) -> dict[str, str]:
|
|
239
|
+
"""Build search filter arguments."""
|
|
240
|
+
filters = {}
|
|
241
|
+
if sort_type:
|
|
242
|
+
filters["sort_type"] = sort_type
|
|
243
|
+
if publish_time:
|
|
244
|
+
filters["publish_time"] = publish_time
|
|
245
|
+
if filter_duration is not None:
|
|
246
|
+
filters["filter_duration"] = filter_duration
|
|
247
|
+
return filters
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def echo_json(data: dict) -> None:
|
|
251
|
+
"""Print a JSON response."""
|
|
252
|
+
click.echo(json.dumps(data, ensure_ascii=False, indent=2))
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def parse_json_body(text: str | None) -> dict | list | None:
|
|
256
|
+
"""Parse a JSON body option."""
|
|
257
|
+
if text is None:
|
|
258
|
+
return None
|
|
259
|
+
try:
|
|
260
|
+
data = json.loads(text)
|
|
261
|
+
except JSONDecodeError as exc:
|
|
262
|
+
msg = f"--json 不是合法 JSON: {exc}"
|
|
263
|
+
raise ValueError(msg) from exc
|
|
264
|
+
if not isinstance(data, (dict, list)):
|
|
265
|
+
msg = "--json 必须是 JSON object 或 array"
|
|
266
|
+
raise ValueError(msg)
|
|
267
|
+
return data
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def parse_key_values(values: tuple[str, ...]) -> dict[str, str] | None:
|
|
271
|
+
"""Parse repeated key=value CLI options."""
|
|
272
|
+
if not values:
|
|
273
|
+
return None
|
|
274
|
+
parsed = {}
|
|
275
|
+
for value in values:
|
|
276
|
+
if "=" not in value:
|
|
277
|
+
msg = f"参数必须是 key=value 格式: {value}"
|
|
278
|
+
raise ValueError(msg)
|
|
279
|
+
key, item_value = value.split("=", 1)
|
|
280
|
+
if not key:
|
|
281
|
+
msg = f"参数 key 不能为空: {value}"
|
|
282
|
+
raise ValueError(msg)
|
|
283
|
+
parsed[key] = item_value
|
|
284
|
+
return parsed
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def load_cookie(cookie: str | None) -> str | None:
|
|
288
|
+
"""Load cookie from CLI, config, or interactive prompt."""
|
|
289
|
+
if cookie is not None:
|
|
290
|
+
logger.info("正在加载命令行指定的Cookie...")
|
|
291
|
+
cookie_value = cookie.strip()
|
|
292
|
+
if cookie_value:
|
|
293
|
+
return cookie_value
|
|
294
|
+
logger.error("无法加载指定的Cookie")
|
|
295
|
+
return None
|
|
296
|
+
|
|
297
|
+
cookie_value = settings.get("cookie", "").strip()
|
|
298
|
+
if cookie_value:
|
|
299
|
+
logger.info("✓ 已从配置文件加载Cookie")
|
|
300
|
+
return cookie_value
|
|
301
|
+
|
|
302
|
+
return prompt_for_cookie()
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def prompt_for_cookie() -> str | None:
|
|
306
|
+
"""Prompt for cookie when no configured cookie exists."""
|
|
307
|
+
logger.warning("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
|
308
|
+
logger.warning("⚠ 未找到Cookie配置")
|
|
309
|
+
logger.info("配置方法:")
|
|
310
|
+
logger.info(f" 方法1:在 {SETTINGS_FILE} 中设置 cookie 字段")
|
|
311
|
+
logger.info(" 方法2:使用 -c 参数:douyin -c 'your_cookie'")
|
|
312
|
+
logger.warning("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
|
313
|
+
|
|
314
|
+
try:
|
|
315
|
+
cookie_value = click.prompt(
|
|
316
|
+
"请粘贴Cookie字符串",
|
|
317
|
+
default="",
|
|
318
|
+
show_default=False,
|
|
319
|
+
)
|
|
320
|
+
except (click.Abort, EOFError):
|
|
321
|
+
logger.warning("\n用户取消输入")
|
|
322
|
+
return None
|
|
323
|
+
|
|
324
|
+
cookie_value = cookie_value.strip()
|
|
325
|
+
if not cookie_value:
|
|
326
|
+
logger.error("未输入Cookie,程序退出")
|
|
327
|
+
return None
|
|
328
|
+
logger.success("✓ Cookie已输入")
|
|
329
|
+
return cookie_value
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def validate_cookie(cookie: str) -> bool:
|
|
333
|
+
"""Validate cookie before crawling."""
|
|
334
|
+
if CookieManager.validate_cookie(cookie):
|
|
335
|
+
logger.success("✓ Cookie验证通过")
|
|
336
|
+
return True
|
|
337
|
+
|
|
338
|
+
logger.error("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
|
339
|
+
logger.error("✗ Cookie验证失败")
|
|
340
|
+
logger.info("可能原因:")
|
|
341
|
+
logger.info(" 1. Cookie已过期,请重新获取")
|
|
342
|
+
logger.info(" 2. Cookie格式不正确")
|
|
343
|
+
logger.info(" 3. 账号已退出登录")
|
|
344
|
+
logger.error("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
|
345
|
+
return False
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def resolve_targets(urls: tuple[str, ...], crawl_type: str) -> tuple[str, ...] | None:
|
|
349
|
+
"""Resolve CLI targets or prompt for one when needed."""
|
|
350
|
+
if urls:
|
|
351
|
+
return urls
|
|
352
|
+
|
|
353
|
+
if crawl_type in ACCOUNT_ONLY_TYPES:
|
|
354
|
+
logger.info(f"采集本账号的 {crawl_type} 数据")
|
|
355
|
+
return ("",)
|
|
356
|
+
|
|
357
|
+
url_input = click.prompt(
|
|
358
|
+
f"采集类型:{crawl_type},请输入目标关键词/URL链接/ID或文件路径",
|
|
359
|
+
default="",
|
|
360
|
+
show_default=False,
|
|
361
|
+
).strip()
|
|
362
|
+
if url_input:
|
|
363
|
+
return (url_input,)
|
|
364
|
+
|
|
365
|
+
logger.error("未输入目标,退出程序")
|
|
366
|
+
return None
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def run_targets(targets: tuple[str, ...], options: CrawlOptions) -> None:
|
|
370
|
+
"""Run all targets and print a summary."""
|
|
371
|
+
success_count = 0
|
|
372
|
+
fail_count = 0
|
|
373
|
+
|
|
374
|
+
for target in iter_targets(targets):
|
|
375
|
+
if run_task(target, options):
|
|
376
|
+
success_count += 1
|
|
377
|
+
else:
|
|
378
|
+
fail_count += 1
|
|
379
|
+
|
|
380
|
+
logger.info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
|
381
|
+
logger.success(f"✓ 任务完成:成功 {success_count} 个,失败 {fail_count} 个")
|
|
382
|
+
logger.info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def iter_targets(targets: tuple[str, ...]) -> Iterator[str | None]:
|
|
386
|
+
"""Yield individual targets, expanding files when an argument is a path."""
|
|
387
|
+
for raw_target in targets:
|
|
388
|
+
target = raw_target.strip()
|
|
389
|
+
if not target:
|
|
390
|
+
continue
|
|
391
|
+
|
|
392
|
+
path = Path(target)
|
|
393
|
+
if not path.exists():
|
|
394
|
+
yield target
|
|
395
|
+
continue
|
|
396
|
+
|
|
397
|
+
logger.info(f"从文件读取目标:{target}")
|
|
398
|
+
lines = read_target_file(path)
|
|
399
|
+
if not lines:
|
|
400
|
+
logger.error(f"文件 [{target}] 中没有发现目标URL")
|
|
401
|
+
yield None
|
|
402
|
+
continue
|
|
403
|
+
|
|
404
|
+
logger.info(f"文件中共有 {len(lines)} 个目标")
|
|
405
|
+
for index, line in enumerate(lines, 1):
|
|
406
|
+
logger.info(f"处理第 {index}/{len(lines)} 个目标")
|
|
407
|
+
yield line
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def read_target_file(path: Path) -> list[str]:
|
|
411
|
+
"""Read one target per non-empty line."""
|
|
412
|
+
try:
|
|
413
|
+
return [
|
|
414
|
+
line.strip()
|
|
415
|
+
for line in path.read_text(encoding="utf-8").splitlines()
|
|
416
|
+
if line.strip()
|
|
417
|
+
]
|
|
418
|
+
except OSError as exc:
|
|
419
|
+
logger.error(f"读取文件失败: {exc}")
|
|
420
|
+
return []
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
def run_task(target: str | None, options: CrawlOptions) -> bool:
|
|
424
|
+
"""Run one crawl task."""
|
|
425
|
+
if target is None:
|
|
426
|
+
return False
|
|
427
|
+
|
|
428
|
+
try:
|
|
429
|
+
log_task_start(target, options)
|
|
430
|
+
douyin = create_crawler(target, options)
|
|
431
|
+
douyin.run()
|
|
432
|
+
maybe_download(douyin, options)
|
|
433
|
+
return True
|
|
434
|
+
except KeyboardInterrupt:
|
|
435
|
+
logger.warning("用户中断任务")
|
|
436
|
+
return False
|
|
437
|
+
except Exception as exc:
|
|
438
|
+
logger.error(f"任务执行失败: {exc}")
|
|
439
|
+
logger.debug(traceback.format_exc())
|
|
440
|
+
return False
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
def log_task_start(target: str, options: CrawlOptions) -> None:
|
|
444
|
+
"""Log the task configuration."""
|
|
445
|
+
logger.info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
|
446
|
+
logger.info("开始采集任务")
|
|
447
|
+
logger.info(f" 目标: {target or '本账号'}")
|
|
448
|
+
logger.info(f" 类型: {options.crawl_type}")
|
|
449
|
+
logger.info(f" 数量限制: {'不限' if options.limit == 0 else f'{options.limit}条'}")
|
|
450
|
+
if options.filters:
|
|
451
|
+
logger.info(f" 筛选条件: {options.filters}")
|
|
452
|
+
if options.download_title:
|
|
453
|
+
logger.info(" 下载标题: ✓ 是")
|
|
454
|
+
if options.download_cover:
|
|
455
|
+
logger.info(" 下载封面: ✓ 是")
|
|
456
|
+
logger.info("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
def create_crawler(target: str, options: CrawlOptions) -> Douyin:
|
|
460
|
+
"""Create the crawler for one target."""
|
|
461
|
+
return Douyin(
|
|
462
|
+
target=target,
|
|
463
|
+
limit=options.limit,
|
|
464
|
+
type=options.crawl_type,
|
|
465
|
+
down_path=options.output_path,
|
|
466
|
+
cookie=options.cookie,
|
|
467
|
+
user_agent=settings.get("userAgent", ""),
|
|
468
|
+
filters=options.filters,
|
|
469
|
+
enable_download_title=options.download_title
|
|
470
|
+
or settings.get("enableDownloadTitle", False),
|
|
471
|
+
enable_download_cover=options.download_cover
|
|
472
|
+
or settings.get("enableDownloadCover", False),
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def maybe_download(douyin: Douyin, options: CrawlOptions) -> None:
|
|
477
|
+
"""Download crawled files when requested and supported."""
|
|
478
|
+
if options.no_download:
|
|
479
|
+
logger.info("已跳过下载(--no-download)")
|
|
480
|
+
return
|
|
481
|
+
|
|
482
|
+
if douyin.type in NO_DOWNLOAD_TYPES:
|
|
483
|
+
logger.info("此类型不需要下载文件")
|
|
484
|
+
return
|
|
485
|
+
|
|
486
|
+
logger.info("开始下载文件...")
|
|
487
|
+
download(douyin.down_path, douyin.aria2_conf)
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
if __name__ == "__main__":
|
|
491
|
+
main()
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Application constants."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from loguru import logger
|
|
6
|
+
|
|
7
|
+
# 兼容独立脚本运行和模块导入两种方式
|
|
8
|
+
try:
|
|
9
|
+
from .utils.paths import get_app_root
|
|
10
|
+
except ImportError:
|
|
11
|
+
from utils.paths import get_app_root
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
from importlib.metadata import version as get_version
|
|
15
|
+
|
|
16
|
+
APP_VERSION = get_version("douyin-cli")
|
|
17
|
+
except Exception as e:
|
|
18
|
+
logger.debug(f"获取版本号失败: {e}")
|
|
19
|
+
APP_VERSION = "0.0.0"
|
|
20
|
+
|
|
21
|
+
# 项目根目录(应用目录)
|
|
22
|
+
PROJECT_ROOT = get_app_root()
|
|
23
|
+
# 完整路径
|
|
24
|
+
CONFIG_DIR = str(Path(PROJECT_ROOT) / "config")
|
|
25
|
+
DOWNLOAD_DIR = str(Path(PROJECT_ROOT) / "download")
|
|
26
|
+
SETTINGS_FILE = str(Path(CONFIG_DIR) / "settings.json")
|
|
27
|
+
|
|
28
|
+
# 默认设置(用于首次运行创建配置文件)
|
|
29
|
+
DEFAULT_SETTINGS = {
|
|
30
|
+
"cookie": "",
|
|
31
|
+
"userAgent": "", # 留空则使用内置默认值 (RequestHeaders.USER_AGENT)
|
|
32
|
+
"downloadPath": DOWNLOAD_DIR,
|
|
33
|
+
"enableIncrementalFetch": True,
|
|
34
|
+
"enableDownloadTitle": False,
|
|
35
|
+
"enableDownloadCover": False,
|
|
36
|
+
"filenameFields": ["id", "title"],
|
|
37
|
+
"filenameSeparator": "_",
|
|
38
|
+
}
|