@9000ai/cli 0.5.5 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/transcribe.js +108 -3
- package/dist/index.js +1 -1
- package/package.json +2 -2
- package/skills/9000AI-hub/SKILL.md +2 -2
- package/skills/douyin-monitor/references/endpoints.md +0 -6
- package/skills/douyin-topic-discovery/references/endpoints.md +0 -6
- package/skills/video-transcription/SKILL.md +32 -43
- package/skills/video-transcription/references/endpoints.md +13 -13
|
@@ -1,15 +1,54 @@
|
|
|
1
1
|
import { request, printJson, pollUntilDone } from "../client.js";
|
|
2
2
|
import { loadJsonFile } from "../utils/format.js";
|
|
3
|
+
/** Extract unique video_ids from monitor task output */
|
|
4
|
+
function extractVideoIdsFromMonitorOutput(output) {
|
|
5
|
+
const data = (output.data ?? output);
|
|
6
|
+
const creators = (data.creators ?? []);
|
|
7
|
+
const seen = new Set();
|
|
8
|
+
for (const creator of creators) {
|
|
9
|
+
const vids = (creator.videos ?? []);
|
|
10
|
+
for (const v of vids) {
|
|
11
|
+
const vid = v.video_id;
|
|
12
|
+
if (vid)
|
|
13
|
+
seen.add(vid);
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
return [...seen];
|
|
17
|
+
}
|
|
3
18
|
export function registerTranscribeCommands(parent) {
|
|
4
19
|
const cmd = parent.command("transcribe").description("Video / audio transcription");
|
|
5
20
|
cmd
|
|
6
21
|
.command("submit")
|
|
7
|
-
.description("Submit
|
|
8
|
-
.
|
|
22
|
+
.description("Submit video-to-text task (by video IDs or JSON file)")
|
|
23
|
+
.option("--video-ids <ids>", "Comma-separated video IDs (backend resolves URLs from cache)")
|
|
24
|
+
.option("--json-file <path>", "JSON file with video task details (legacy, includes URLs)")
|
|
25
|
+
.option("--group-label <label>", "Batch group label for tracking")
|
|
26
|
+
.option("--webhook <url>", "Webhook URL for completion callback")
|
|
9
27
|
.option("--wait", "Poll until task completes then print results")
|
|
10
28
|
.option("--fields <list>", "Comma-separated fields to extract (used with --wait)")
|
|
11
29
|
.option("--compact", "One JSON object per line (used with --wait)")
|
|
12
30
|
.action(async (opts) => {
|
|
31
|
+
if (!opts.videoIds && !opts.jsonFile) {
|
|
32
|
+
console.error("Error: must provide --video-ids or --json-file");
|
|
33
|
+
process.exit(1);
|
|
34
|
+
}
|
|
35
|
+
// ── --video-ids: 新流程,后端自己查 URL + 去重 ──
|
|
36
|
+
if (opts.videoIds) {
|
|
37
|
+
const ids = opts.videoIds.split(",").map((s) => s.trim()).filter(Boolean);
|
|
38
|
+
const payload = { video_ids: ids };
|
|
39
|
+
if (opts.groupLabel)
|
|
40
|
+
payload.group_label = opts.groupLabel;
|
|
41
|
+
if (opts.webhook)
|
|
42
|
+
payload.webhook = opts.webhook;
|
|
43
|
+
const data = await request({
|
|
44
|
+
method: "POST",
|
|
45
|
+
path: "/api/v1/media/transcribe-by-video-ids",
|
|
46
|
+
payload,
|
|
47
|
+
});
|
|
48
|
+
printJson(data, { fields: opts.fields, compact: opts.compact });
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
// ── --json-file: 兼容老流程 ──
|
|
13
52
|
const payload = loadJsonFile(opts.jsonFile);
|
|
14
53
|
const data = await request({
|
|
15
54
|
method: "POST",
|
|
@@ -23,7 +62,6 @@ export function registerTranscribeCommands(parent) {
|
|
|
23
62
|
printJson(data);
|
|
24
63
|
return;
|
|
25
64
|
}
|
|
26
|
-
// --wait: poll then fetch results
|
|
27
65
|
console.error(`Batch submitted → ${batchId}, waiting...`);
|
|
28
66
|
await pollUntilDone(`/api/v1/tasks/batch/${batchId}`);
|
|
29
67
|
const resultData = await request({
|
|
@@ -73,4 +111,71 @@ export function registerTranscribeCommands(parent) {
|
|
|
73
111
|
clearTimeout(timer);
|
|
74
112
|
}
|
|
75
113
|
});
|
|
114
|
+
// ────────────────────────────────────────────
|
|
115
|
+
// from-monitor: 监控结果 → 提取 video_id → 后端处理
|
|
116
|
+
// ────────────────────────────────────────────
|
|
117
|
+
cmd
|
|
118
|
+
.command("from-monitor")
|
|
119
|
+
.description("Extract video IDs from monitor results, send to backend for dedup + transcribe")
|
|
120
|
+
.option("--task-id <id>", "Monitor task ID to extract videos from")
|
|
121
|
+
.option("--sec-user <ids>", "Comma-separated sec_user_id list (fetches videos, caches URLs)")
|
|
122
|
+
.option("--group-label <label>", "Batch group label for tracking")
|
|
123
|
+
.option("--count <n>", "Videos per user (with --sec-user)", "20")
|
|
124
|
+
.option("--webhook <url>", "Webhook URL for completion callback")
|
|
125
|
+
.action(async (opts) => {
|
|
126
|
+
if (!opts.taskId && !opts.secUser) {
|
|
127
|
+
console.error("Error: must provide --task-id or --sec-user");
|
|
128
|
+
process.exit(1);
|
|
129
|
+
}
|
|
130
|
+
// ── Step 1: Collect video_ids ──
|
|
131
|
+
console.error("Step 1: Collecting video IDs...");
|
|
132
|
+
const allVideoIds = new Set();
|
|
133
|
+
if (opts.taskId) {
|
|
134
|
+
const taskResp = await request({ method: "GET", path: `/api/v1/tasks/${opts.taskId}` });
|
|
135
|
+
const taskData = (taskResp.data ?? taskResp);
|
|
136
|
+
if (taskData.status !== "SUCCESS") {
|
|
137
|
+
console.error(`Task status: ${taskData.status ?? "unknown"} (need SUCCESS)`);
|
|
138
|
+
process.exit(1);
|
|
139
|
+
}
|
|
140
|
+
const output = (typeof taskData.output === "string" ? JSON.parse(taskData.output) : taskData.output);
|
|
141
|
+
for (const vid of extractVideoIdsFromMonitorOutput(output)) {
|
|
142
|
+
allVideoIds.add(vid);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
if (opts.secUser) {
|
|
146
|
+
// fetch 会同时写入后端 video_url_cache,所以只需收集 video_id
|
|
147
|
+
const secUsers = opts.secUser.split(",").map((s) => s.trim()).filter(Boolean);
|
|
148
|
+
for (const secId of secUsers) {
|
|
149
|
+
const params = new URLSearchParams({ sec_user_id: secId, count: opts.count ?? "20", sort_type: "0" });
|
|
150
|
+
console.error(` Fetching ${secId}...`);
|
|
151
|
+
const resp = await request({ method: "GET", path: `/api/v1/douyin/monitor/user-posts?${params}` });
|
|
152
|
+
const data = (resp.data ?? resp);
|
|
153
|
+
const items = (data.items ?? data.videos ?? []);
|
|
154
|
+
for (const v of items) {
|
|
155
|
+
const vid = v.video_id;
|
|
156
|
+
if (vid)
|
|
157
|
+
allVideoIds.add(vid);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
const videoIds = [...allVideoIds];
|
|
162
|
+
console.error(` Collected ${videoIds.length} unique video IDs`);
|
|
163
|
+
if (videoIds.length === 0) {
|
|
164
|
+
printJson({ total_videos: 0, already_transcribed: 0, newly_submitted: 0 });
|
|
165
|
+
return;
|
|
166
|
+
}
|
|
167
|
+
// ── Step 2: 调后端统一处理(去重 + 查缓存 + 转写) ──
|
|
168
|
+
console.error("Step 2: Sending to backend for dedup + transcribe...");
|
|
169
|
+
const payload = { video_ids: videoIds };
|
|
170
|
+
if (opts.groupLabel)
|
|
171
|
+
payload.group_label = opts.groupLabel;
|
|
172
|
+
if (opts.webhook)
|
|
173
|
+
payload.webhook = opts.webhook;
|
|
174
|
+
const data = await request({
|
|
175
|
+
method: "POST",
|
|
176
|
+
path: "/api/v1/media/transcribe-by-video-ids",
|
|
177
|
+
payload,
|
|
178
|
+
});
|
|
179
|
+
printJson(data);
|
|
180
|
+
});
|
|
76
181
|
}
|
package/dist/index.js
CHANGED
|
@@ -14,7 +14,7 @@ const program = new Command();
|
|
|
14
14
|
program
|
|
15
15
|
.name("9000ai")
|
|
16
16
|
.description("9000AI Toolbox CLI — unified interface for 9000AI platform")
|
|
17
|
-
.version("0.
|
|
17
|
+
.version("0.6.1");
|
|
18
18
|
registerConfigCommands(program);
|
|
19
19
|
registerAuthCommands(program);
|
|
20
20
|
registerSearchCommands(program);
|
package/package.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@9000ai/cli",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.1",
|
|
4
4
|
"description": "9000AI Toolbox CLI — unified command-line interface for 9000AI platform",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
7
|
-
"9000ai": "
|
|
7
|
+
"9000ai": "dist/index.js"
|
|
8
8
|
},
|
|
9
9
|
"scripts": {
|
|
10
10
|
"build": "tsc",
|
|
@@ -39,8 +39,8 @@ output-format: routing-only
|
|
|
39
39
|
}
|
|
40
40
|
|
|
41
41
|
# 有依赖,串行
|
|
42
|
-
9000ai search keyword "xxx" --wait --fields desc,
|
|
43
|
-
→ 拿
|
|
42
|
+
9000ai search keyword "xxx" --wait --fields desc,video_id,likes
|
|
43
|
+
→ 拿 video_id → 9000ai transcribe submit --video-ids <id1,id2,...>
|
|
44
44
|
```
|
|
45
45
|
|
|
46
46
|
## 模块目录
|
|
@@ -19,11 +19,23 @@ output-format: task-oriented
|
|
|
19
19
|
## 适用范围
|
|
20
20
|
|
|
21
21
|
用于:
|
|
22
|
-
-
|
|
23
|
-
-
|
|
24
|
-
-
|
|
22
|
+
- 按 video_id 提交转写(推荐)
|
|
23
|
+
- 从监控结果一键转写(from-monitor)
|
|
24
|
+
- 提交批量视频转文字任务(传 JSON 文件,兼容老流程)
|
|
25
|
+
- 查询转写任务状态和结果
|
|
25
26
|
- 只提取原文文案
|
|
26
|
-
|
|
27
|
+
|
|
28
|
+
## 三种提交方式
|
|
29
|
+
|
|
30
|
+
| 方式 | 命令 | 适用场景 |
|
|
31
|
+
|------|------|----------|
|
|
32
|
+
| **按 video_id**(推荐) | `transcribe submit --video-ids <id1,id2,...>` | 只传短 ID,简洁高效 |
|
|
33
|
+
| **从监控结果** | `transcribe from-monitor --task-id <id>` | 监控任务完成后一键转写所有视频 |
|
|
34
|
+
| **按 JSON 文件** | `transcribe submit --json-file <path>` | 需要自定义参数(超时、webhook 等) |
|
|
35
|
+
|
|
36
|
+
**优先用 `--video-ids`**:不需要接触 video_url,简洁省 token。
|
|
37
|
+
|
|
38
|
+
使用 `--video-ids` 前需要先通过 `monitor fetch` 或 `monitor run` 获取过视频列表。
|
|
27
39
|
|
|
28
40
|
## 工作方式
|
|
29
41
|
|
|
@@ -55,33 +67,13 @@ output-format: task-oriented
|
|
|
55
67
|
|
|
56
68
|
不要把整份转写 JSON 原样塞进上下文。
|
|
57
69
|
|
|
58
|
-
|
|
59
|
-
1. 先查 `9000ai task status --task-id <task_id>`
|
|
60
|
-
2. 从 `data.output.json_url` 拿到转写 JSON 地址
|
|
61
|
-
3. 直接提取 JSON 根字段:
|
|
62
|
-
|
|
63
|
-
```text
|
|
64
|
-
text
|
|
65
|
-
```
|
|
66
|
-
|
|
67
|
-
句子级文本在:
|
|
68
|
-
|
|
69
|
-
```text
|
|
70
|
-
timecodes.sentences[*].text
|
|
71
|
-
```
|
|
72
|
-
|
|
73
|
-
已经提供了直接提原文的命令:
|
|
70
|
+
如果只需要原文文案:
|
|
74
71
|
|
|
75
72
|
```bash
|
|
76
73
|
9000ai transcribe text --task-id <task_id>
|
|
77
74
|
```
|
|
78
75
|
|
|
79
|
-
|
|
80
|
-
- `text_field = "text"`
|
|
81
|
-
- `sentence_field = "timecodes.sentences[*].text"`
|
|
82
|
-
- `text`
|
|
83
|
-
|
|
84
|
-
所以后续如果 AI 只想拿原文文案,优先用 `text`,不要把整份 JSON 全读进上下文。
|
|
76
|
+
返回 `text` 字段即可,不要读全量 JSON。
|
|
85
77
|
|
|
86
78
|
## 命令
|
|
87
79
|
|
|
@@ -94,12 +86,21 @@ timecodes.sentences[*].text
|
|
|
94
86
|
常用命令:
|
|
95
87
|
|
|
96
88
|
```bash
|
|
97
|
-
|
|
98
|
-
9000ai
|
|
99
|
-
|
|
89
|
+
# 按 video_id 转写(推荐)
|
|
90
|
+
9000ai transcribe submit --video-ids 7582142740935970054,7576959070901930987 --group-label "竞品分析"
|
|
91
|
+
|
|
92
|
+
# 从监控结果一键转写
|
|
93
|
+
9000ai transcribe from-monitor --task-id <monitor_task_id> --group-label "对标分析-20260403"
|
|
94
|
+
9000ai transcribe from-monitor --sec-user <sec_user_id1>,<sec_user_id2> --group-label "竞品监控"
|
|
95
|
+
|
|
96
|
+
# JSON 文件提交(兼容老流程)
|
|
100
97
|
9000ai transcribe submit --json-file video_tasks.json
|
|
98
|
+
|
|
99
|
+
# 查询状态和结果
|
|
101
100
|
9000ai task status --task-id <task_id>
|
|
102
101
|
9000ai task results --task-id <task_id>
|
|
102
|
+
|
|
103
|
+
# 只提取原文
|
|
103
104
|
9000ai transcribe text --task-id <task_id>
|
|
104
105
|
```
|
|
105
106
|
|
|
@@ -107,7 +108,7 @@ timecodes.sentences[*].text
|
|
|
107
108
|
|
|
108
109
|
### 数据精简
|
|
109
110
|
|
|
110
|
-
|
|
111
|
+
**默认必须用 `--fields` 只取需要的字段,禁止全量读取。**
|
|
111
112
|
|
|
112
113
|
```bash
|
|
113
114
|
# 错误 — 返回全量转写 JSON,浪费上下文
|
|
@@ -116,26 +117,14 @@ timecodes.sentences[*].text
|
|
|
116
117
|
# 正确 — 只取需要的字段
|
|
117
118
|
9000ai task results --task-id <id> --fields status,output,video_url
|
|
118
119
|
|
|
119
|
-
# 只要原文 —
|
|
120
|
+
# 只要原文 — 用专用命令
|
|
120
121
|
9000ai transcribe text --task-id <task_id>
|
|
121
122
|
```
|
|
122
123
|
|
|
123
|
-
常用字段组合:
|
|
124
|
-
- 状态检查:`--fields status,progress,message`
|
|
125
|
-
- 结果提取:`--fields status,output,video_url`
|
|
126
|
-
|
|
127
124
|
### 并行执行
|
|
128
125
|
|
|
129
126
|
独立操作必须并行,不要串行等待。
|
|
130
127
|
|
|
131
|
-
可以并行:
|
|
132
|
-
- 查多个 task_id 的状态
|
|
133
|
-
- 提取多个任务的原文(多个 `transcribe text` 并行)
|
|
134
|
-
- 写文件 + 提交下一批转写任务
|
|
135
|
-
|
|
136
|
-
不能并行:
|
|
137
|
-
- 提交任务 → 查结果(需要 task_id,且任务需时间完成)
|
|
138
|
-
|
|
139
128
|
### 不暴露中间过程
|
|
140
129
|
|
|
141
130
|
拿到转写结果后直接输出文案或执行下一步,不要把原始 JSON 全量展示给用户。
|
|
@@ -12,25 +12,23 @@ http://127.0.0.1:8025
|
|
|
12
12
|
X-API-Key: <你的 key>
|
|
13
13
|
```
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
## 按 video_id 转写(推荐)
|
|
16
16
|
|
|
17
17
|
```text
|
|
18
|
-
video-
|
|
18
|
+
POST /api/v1/media/transcribe-by-video-ids
|
|
19
19
|
```
|
|
20
20
|
|
|
21
|
-
|
|
21
|
+
请求体:
|
|
22
22
|
|
|
23
|
-
```
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
```text
|
|
30
|
-
GET /api/v1/auth/capability-permissions
|
|
23
|
+
```json
|
|
24
|
+
{
|
|
25
|
+
"video_ids": ["7582142740935970054", "7576959070901930987"],
|
|
26
|
+
"group_label": "竞品分析",
|
|
27
|
+
"webhook": "https://example.com/callback"
|
|
28
|
+
}
|
|
31
29
|
```
|
|
32
30
|
|
|
33
|
-
##
|
|
31
|
+
## 提交批量视频转文字(传 URL)
|
|
34
32
|
|
|
35
33
|
```text
|
|
36
34
|
POST /api/v1/media/batch-video-to-text
|
|
@@ -45,7 +43,9 @@ POST /api/v1/media/batch-video-to-text
|
|
|
45
43
|
"video_url": "https://example.com/video.mp4",
|
|
46
44
|
"third_party_task_id": "demo-001"
|
|
47
45
|
}
|
|
48
|
-
]
|
|
46
|
+
],
|
|
47
|
+
"group_label": "批次标签",
|
|
48
|
+
"webhook": "https://example.com/callback"
|
|
49
49
|
}
|
|
50
50
|
```
|
|
51
51
|
|