@cablate/banini-tracker 2.0.11 → 2.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +54 -0
- package/dist/config.d.ts +1 -0
- package/dist/transcribe.js +4 -4
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -5,6 +5,8 @@ import { fetchFacebookPosts } from './facebook.js';
|
|
|
5
5
|
import { sendTelegramMessage } from './telegram.js';
|
|
6
6
|
import { filterNewPosts, markPostsSeen, listSeenIds, clearSeen } from './seen.js';
|
|
7
7
|
import { readFileSync } from 'fs';
|
|
8
|
+
import { createTranscriber, transcribeVideoPosts, isVideoPost } from './transcribe.js';
|
|
9
|
+
import { getDb } from './db.js';
|
|
8
10
|
const program = new Command();
|
|
9
11
|
program
|
|
10
12
|
.name('banini-tracker')
|
|
@@ -18,10 +20,13 @@ program
|
|
|
18
20
|
.option('--tg-bot-token <token>', 'Telegram Bot token')
|
|
19
21
|
.option('--tg-channel-id <id>', 'Telegram Channel ID')
|
|
20
22
|
.option('--fb-page-url <url>', 'Facebook 粉專網址', 'https://www.facebook.com/DieWithoutBang/')
|
|
23
|
+
.option('--groq-api-key <key>', 'Groq API key(影片轉錄用)')
|
|
21
24
|
.action((opts) => {
|
|
22
25
|
const config = defaultConfig();
|
|
23
26
|
if (opts.apifyToken)
|
|
24
27
|
config.apifyToken = opts.apifyToken;
|
|
28
|
+
if (opts.groqApiKey)
|
|
29
|
+
config.groqApiKey = opts.groqApiKey;
|
|
25
30
|
if (opts.tgBotToken || opts.tgChannelId) {
|
|
26
31
|
config.telegram = {
|
|
27
32
|
botToken: opts.tgBotToken ?? '',
|
|
@@ -66,6 +71,8 @@ program
|
|
|
66
71
|
.option('--until <date>', '只抓此時間之前的貼文')
|
|
67
72
|
.option('--no-dedup', '不做去重,抓到什麼就輸出什麼')
|
|
68
73
|
.option('--mark-seen', '輸出後自動標記為已讀')
|
|
74
|
+
.option('--transcribe', '自動轉錄影片(captionText 為空時走 Groq Whisper)')
|
|
75
|
+
.option('--save-db', '抓取後直接存入 SQLite')
|
|
69
76
|
.action(async (opts) => {
|
|
70
77
|
try {
|
|
71
78
|
const config = loadConfig();
|
|
@@ -80,6 +87,53 @@ program
|
|
|
80
87
|
if (opts.dedup !== false) {
|
|
81
88
|
posts = filterNewPosts(posts);
|
|
82
89
|
}
|
|
90
|
+
// 影片轉錄:captionText 為空的影片走 Groq
|
|
91
|
+
if (opts.transcribe) {
|
|
92
|
+
const groqKey = config.groqApiKey || process.env.GROQ_API_KEY;
|
|
93
|
+
if (!groqKey) {
|
|
94
|
+
console.error('⚠ --transcribe 需要 Groq API key,請用 init --groq-api-key 設定或設定環境變數 GROQ_API_KEY');
|
|
95
|
+
}
|
|
96
|
+
else {
|
|
97
|
+
const needsTranscribe = posts.filter((p) => isVideoPost(p.mediaType) && !p.captionText);
|
|
98
|
+
if (needsTranscribe.length > 0) {
|
|
99
|
+
console.error(`[轉錄] ${needsTranscribe.length} 篇影片需要轉錄...`);
|
|
100
|
+
if (!process.env.GROQ_API_KEY)
|
|
101
|
+
process.env.GROQ_API_KEY = groqKey;
|
|
102
|
+
const transcriber = createTranscriber('groq');
|
|
103
|
+
const transcripts = await transcribeVideoPosts(needsTranscribe, transcriber);
|
|
104
|
+
for (const p of needsTranscribe) {
|
|
105
|
+
const result = transcripts.get(p.id);
|
|
106
|
+
if (result)
|
|
107
|
+
p.captionText = result.text;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
// 存入 DB
|
|
113
|
+
if (opts.saveDb && posts.length > 0) {
|
|
114
|
+
const db = getDb();
|
|
115
|
+
const upsert = db.prepare(`
|
|
116
|
+
INSERT INTO posts (id, source, text, ocr_text, transcript_text, media_type, media_url, url, like_count, comment_count, post_timestamp, fetched_at)
|
|
117
|
+
VALUES (@id, @source, @text, @ocr_text, @transcript_text, @media_type, @media_url, @url, @like_count, @comment_count, @post_timestamp, @fetched_at)
|
|
118
|
+
ON CONFLICT(id) DO UPDATE SET
|
|
119
|
+
transcript_text = CASE WHEN excluded.transcript_text != '' THEN excluded.transcript_text ELSE posts.transcript_text END,
|
|
120
|
+
like_count = excluded.like_count,
|
|
121
|
+
comment_count = excluded.comment_count
|
|
122
|
+
`);
|
|
123
|
+
const now = new Date().toISOString();
|
|
124
|
+
db.transaction(() => {
|
|
125
|
+
for (const p of posts) {
|
|
126
|
+
upsert.run({
|
|
127
|
+
id: p.id, source: p.source, text: p.text,
|
|
128
|
+
ocr_text: p.ocrText || '', transcript_text: p.captionText || '',
|
|
129
|
+
media_type: p.mediaType, media_url: p.mediaUrl, url: p.url,
|
|
130
|
+
like_count: p.likeCount, comment_count: p.commentCount || 0,
|
|
131
|
+
post_timestamp: p.timestamp, fetched_at: now,
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
})();
|
|
135
|
+
console.error(`[DB] ${posts.length} 篇已存入`);
|
|
136
|
+
}
|
|
83
137
|
// 標記已讀
|
|
84
138
|
if (opts.markSeen && posts.length > 0) {
|
|
85
139
|
markPostsSeen(posts.map((p) => p.id));
|
package/dist/config.d.ts
CHANGED
package/dist/transcribe.js
CHANGED
|
@@ -66,7 +66,7 @@ export class GroqTranscriber {
|
|
|
66
66
|
};
|
|
67
67
|
}
|
|
68
68
|
async transcribeViaDownload(videoUrl) {
|
|
69
|
-
console.
|
|
69
|
+
console.error(`[轉錄] 下載音訊: ${videoUrl.slice(0, 60)}...`);
|
|
70
70
|
const audioFile = await downloadAudio(videoUrl);
|
|
71
71
|
try {
|
|
72
72
|
const result = await this.client.audio.transcriptions.create({
|
|
@@ -115,14 +115,14 @@ export async function transcribeVideoPosts(posts, transcriber) {
|
|
|
115
115
|
if (!isVideoPost(post.mediaType) || !post.mediaUrl)
|
|
116
116
|
continue;
|
|
117
117
|
try {
|
|
118
|
-
console.
|
|
118
|
+
console.error(`[轉錄][${transcriber.name}] 處理影片: ${post.id}`);
|
|
119
119
|
const result = await transcriber.transcribe(post.mediaUrl);
|
|
120
120
|
if (result.text.trim().length > 0) {
|
|
121
121
|
results.set(post.id, result);
|
|
122
|
-
console.
|
|
122
|
+
console.error(`[轉錄] ${post.id}: ${result.text.slice(0, 50)}...(${result.durationSec ?? '?'}s)`);
|
|
123
123
|
}
|
|
124
124
|
else {
|
|
125
|
-
console.
|
|
125
|
+
console.error(`[轉錄] ${post.id}: 無可辨識內容`);
|
|
126
126
|
}
|
|
127
127
|
}
|
|
128
128
|
catch (err) {
|