@cablate/banini-tracker 2.0.11 → 2.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -5,6 +5,8 @@ import { fetchFacebookPosts } from './facebook.js';
5
5
  import { sendTelegramMessage } from './telegram.js';
6
6
  import { filterNewPosts, markPostsSeen, listSeenIds, clearSeen } from './seen.js';
7
7
  import { readFileSync } from 'fs';
8
+ import { createTranscriber, transcribeVideoPosts, isVideoPost } from './transcribe.js';
9
+ import { getDb } from './db.js';
8
10
  const program = new Command();
9
11
  program
10
12
  .name('banini-tracker')
@@ -18,10 +20,13 @@ program
18
20
  .option('--tg-bot-token <token>', 'Telegram Bot token')
19
21
  .option('--tg-channel-id <id>', 'Telegram Channel ID')
20
22
  .option('--fb-page-url <url>', 'Facebook 粉專網址', 'https://www.facebook.com/DieWithoutBang/')
23
+ .option('--groq-api-key <key>', 'Groq API key(影片轉錄用)')
21
24
  .action((opts) => {
22
25
  const config = defaultConfig();
23
26
  if (opts.apifyToken)
24
27
  config.apifyToken = opts.apifyToken;
28
+ if (opts.groqApiKey)
29
+ config.groqApiKey = opts.groqApiKey;
25
30
  if (opts.tgBotToken || opts.tgChannelId) {
26
31
  config.telegram = {
27
32
  botToken: opts.tgBotToken ?? '',
@@ -66,6 +71,8 @@ program
66
71
  .option('--until <date>', '只抓此時間之前的貼文')
67
72
  .option('--no-dedup', '不做去重,抓到什麼就輸出什麼')
68
73
  .option('--mark-seen', '輸出後自動標記為已讀')
74
+ .option('--transcribe', '自動轉錄影片(captionText 為空時走 Groq Whisper)')
75
+ .option('--save-db', '抓取後直接存入 SQLite')
69
76
  .action(async (opts) => {
70
77
  try {
71
78
  const config = loadConfig();
@@ -80,6 +87,53 @@ program
80
87
  if (opts.dedup !== false) {
81
88
  posts = filterNewPosts(posts);
82
89
  }
90
+ // 影片轉錄:captionText 為空的影片走 Groq
91
+ if (opts.transcribe) {
92
+ const groqKey = config.groqApiKey || process.env.GROQ_API_KEY;
93
+ if (!groqKey) {
94
+ console.error('⚠ --transcribe 需要 Groq API key,請用 init --groq-api-key 設定或設定環境變數 GROQ_API_KEY');
95
+ }
96
+ else {
97
+ const needsTranscribe = posts.filter((p) => isVideoPost(p.mediaType) && !p.captionText);
98
+ if (needsTranscribe.length > 0) {
99
+ console.error(`[轉錄] ${needsTranscribe.length} 篇影片需要轉錄...`);
100
+ if (!process.env.GROQ_API_KEY)
101
+ process.env.GROQ_API_KEY = groqKey;
102
+ const transcriber = createTranscriber('groq');
103
+ const transcripts = await transcribeVideoPosts(needsTranscribe, transcriber);
104
+ for (const p of needsTranscribe) {
105
+ const result = transcripts.get(p.id);
106
+ if (result)
107
+ p.captionText = result.text;
108
+ }
109
+ }
110
+ }
111
+ }
112
+ // 存入 DB
113
+ if (opts.saveDb && posts.length > 0) {
114
+ const db = getDb();
115
+ const upsert = db.prepare(`
116
+ INSERT INTO posts (id, source, text, ocr_text, transcript_text, media_type, media_url, url, like_count, comment_count, post_timestamp, fetched_at)
117
+ VALUES (@id, @source, @text, @ocr_text, @transcript_text, @media_type, @media_url, @url, @like_count, @comment_count, @post_timestamp, @fetched_at)
118
+ ON CONFLICT(id) DO UPDATE SET
119
+ transcript_text = CASE WHEN excluded.transcript_text != '' THEN excluded.transcript_text ELSE posts.transcript_text END,
120
+ like_count = excluded.like_count,
121
+ comment_count = excluded.comment_count
122
+ `);
123
+ const now = new Date().toISOString();
124
+ db.transaction(() => {
125
+ for (const p of posts) {
126
+ upsert.run({
127
+ id: p.id, source: p.source, text: p.text,
128
+ ocr_text: p.ocrText || '', transcript_text: p.captionText || '',
129
+ media_type: p.mediaType, media_url: p.mediaUrl, url: p.url,
130
+ like_count: p.likeCount, comment_count: p.commentCount || 0,
131
+ post_timestamp: p.timestamp, fetched_at: now,
132
+ });
133
+ }
134
+ })();
135
+ console.error(`[DB] ${posts.length} 篇已存入`);
136
+ }
83
137
  // 標記已讀
84
138
  if (opts.markSeen && posts.length > 0) {
85
139
  markPostsSeen(posts.map((p) => p.id));
package/dist/config.d.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  export interface Config {
2
2
  apifyToken: string;
3
+ groqApiKey?: string;
3
4
  telegram?: {
4
5
  botToken: string;
5
6
  channelId: string;
@@ -66,7 +66,7 @@ export class GroqTranscriber {
66
66
  };
67
67
  }
68
68
  async transcribeViaDownload(videoUrl) {
69
- console.log(`[轉錄] 下載音訊: ${videoUrl.slice(0, 60)}...`);
69
+ console.error(`[轉錄] 下載音訊: ${videoUrl.slice(0, 60)}...`);
70
70
  const audioFile = await downloadAudio(videoUrl);
71
71
  try {
72
72
  const result = await this.client.audio.transcriptions.create({
@@ -115,14 +115,14 @@ export async function transcribeVideoPosts(posts, transcriber) {
115
115
  if (!isVideoPost(post.mediaType) || !post.mediaUrl)
116
116
  continue;
117
117
  try {
118
- console.log(`[轉錄][${transcriber.name}] 處理影片: ${post.id}`);
118
+ console.error(`[轉錄][${transcriber.name}] 處理影片: ${post.id}`);
119
119
  const result = await transcriber.transcribe(post.mediaUrl);
120
120
  if (result.text.trim().length > 0) {
121
121
  results.set(post.id, result);
122
- console.log(`[轉錄] ${post.id}: ${result.text.slice(0, 50)}...(${result.durationSec ?? '?'}s)`);
122
+ console.error(`[轉錄] ${post.id}: ${result.text.slice(0, 50)}...(${result.durationSec ?? '?'}s)`);
123
123
  }
124
124
  else {
125
- console.log(`[轉錄] ${post.id}: 無可辨識內容`);
125
+ console.error(`[轉錄] ${post.id}: 無可辨識內容`);
126
126
  }
127
127
  }
128
128
  catch (err) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cablate/banini-tracker",
3
- "version": "2.0.11",
3
+ "version": "2.0.12",
4
4
  "description": "巴逆逆反指標追蹤器 — 常駐排程 + CLI 雙模式",
5
5
  "type": "module",
6
6
  "bin": {