@cablate/banini-tracker 2.0.7 → 2.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/db.js CHANGED
@@ -19,6 +19,21 @@ export function getDb() {
19
19
  }
20
20
  function migrate(db) {
21
21
  db.exec(`
22
+ CREATE TABLE IF NOT EXISTS posts (
23
+ id TEXT PRIMARY KEY,
24
+ source TEXT NOT NULL DEFAULT 'facebook',
25
+ text TEXT NOT NULL DEFAULT '',
26
+ ocr_text TEXT NOT NULL DEFAULT '',
27
+ transcript_text TEXT NOT NULL DEFAULT '',
28
+ media_type TEXT NOT NULL DEFAULT 'text',
29
+ media_url TEXT NOT NULL DEFAULT '',
30
+ url TEXT NOT NULL DEFAULT '',
31
+ like_count INTEGER NOT NULL DEFAULT 0,
32
+ comment_count INTEGER NOT NULL DEFAULT 0,
33
+ post_timestamp TEXT NOT NULL,
34
+ fetched_at TEXT NOT NULL
35
+ );
36
+
22
37
  CREATE TABLE IF NOT EXISTS predictions (
23
38
  id INTEGER PRIMARY KEY AUTOINCREMENT,
24
39
  post_id TEXT NOT NULL,
@@ -3,6 +3,7 @@ export interface FacebookPost {
3
3
  source: 'facebook';
4
4
  text: string;
5
5
  ocrText: string;
6
+ captionText: string;
6
7
  timestamp: string;
7
8
  likeCount: number;
8
9
  commentCount: number;
package/dist/facebook.js CHANGED
@@ -10,6 +10,7 @@ export async function fetchFacebookPosts(pageUrl, token, maxPosts = 3) {
10
10
  body: JSON.stringify({
11
11
  startUrls: [{ url: pageUrl }],
12
12
  resultsLimit: maxPosts,
13
+ captionText: true,
13
14
  }),
14
15
  signal: AbortSignal.timeout(180_000),
15
16
  });
@@ -23,11 +24,17 @@ export async function fetchFacebookPosts(pageUrl, token, maxPosts = 3) {
23
24
  const ocrTexts = (item.media ?? [])
24
25
  .map((m) => m.ocrText ?? '')
25
26
  .filter((t) => t.length > 0);
27
+ // captionText 可能在 media item 或頂層
28
+ const captionTexts = (item.media ?? [])
29
+ .map((m) => m.captionText ?? '')
30
+ .filter((t) => t.length > 0);
31
+ const captionText = captionTexts.join('\n') || item.captionText || '';
26
32
  return {
27
33
  id: `fb_${item.postId ?? item.id ?? ''}`,
28
34
  source: 'facebook',
29
35
  text: item.text ?? item.message ?? '',
30
36
  ocrText: ocrTexts.join('\n'),
37
+ captionText,
31
38
  timestamp: item.time ?? new Date().toISOString(),
32
39
  likeCount: item.likes ?? 0,
33
40
  commentCount: item.comments ?? 0,
package/dist/index.js CHANGED
@@ -18,6 +18,7 @@ import { filterNewPosts as filterNew, markPostsSeen } from './seen.js';
18
18
  import { withRetry } from './retry.js';
19
19
  import { createTranscriber, transcribeVideoPosts } from './transcribe.js';
20
20
  import { recordPredictions, updateTracking } from './tracker.js';
21
+ import { getDb } from './db.js';
21
22
  // ── Config ──────────────────────────────────────────────────
22
23
  const FB_PAGE_URL = 'https://www.facebook.com/DieWithoutBang/';
23
24
  const DATA_DIR = process.env.DATA_DIR || join(process.cwd(), 'data');
@@ -34,7 +35,7 @@ function fromFacebook(p) {
34
35
  source: 'facebook',
35
36
  text: p.text,
36
37
  ocrText: p.ocrText,
37
- transcriptText: '',
38
+ transcriptText: p.captionText || '',
38
39
  timestamp: p.timestamp,
39
40
  likeCount: p.likeCount,
40
41
  replyCount: p.commentCount,
@@ -81,17 +82,56 @@ async function runInner(opts) {
81
82
  console.log('沒有新貼文,結束');
82
83
  return;
83
84
  }
84
- // 2.5. 影片轉錄
85
+ // 2.5. 影片轉錄(captionText 有值則跳過 Groq)
85
86
  const transcriberType = (process.env.TRANSCRIBER ?? 'noop');
86
87
  const transcriber = createTranscriber(transcriberType);
87
88
  if (transcriber.name !== 'noop') {
88
- const transcripts = await transcribeVideoPosts(newPosts, transcriber);
89
- for (const p of newPosts) {
90
- const result = transcripts.get(p.id);
91
- if (result)
92
- p.transcriptText = result.text;
89
+ const needsTranscribe = newPosts.filter((p) => !p.transcriptText);
90
+ if (needsTranscribe.length > 0) {
91
+ const transcripts = await transcribeVideoPosts(needsTranscribe, transcriber);
92
+ for (const p of needsTranscribe) {
93
+ const result = transcripts.get(p.id);
94
+ if (result)
95
+ p.transcriptText = result.text;
96
+ }
93
97
  }
94
98
  }
99
+ // 2.6. 貼文入庫
100
+ try {
101
+ const db = getDb();
102
+ const upsertPost = db.prepare(`
103
+ INSERT INTO posts (id, source, text, ocr_text, transcript_text, media_type, media_url, url, like_count, comment_count, post_timestamp, fetched_at)
104
+ VALUES (@id, @source, @text, @ocr_text, @transcript_text, @media_type, @media_url, @url, @like_count, @comment_count, @post_timestamp, @fetched_at)
105
+ ON CONFLICT(id) DO UPDATE SET
106
+ transcript_text = CASE WHEN excluded.transcript_text != '' THEN excluded.transcript_text ELSE posts.transcript_text END,
107
+ like_count = excluded.like_count,
108
+ comment_count = excluded.comment_count
109
+ `);
110
+ const now = new Date().toISOString();
111
+ const insertMany = db.transaction(() => {
112
+ for (const p of newPosts) {
113
+ upsertPost.run({
114
+ id: p.id,
115
+ source: p.source,
116
+ text: p.text,
117
+ ocr_text: p.ocrText,
118
+ transcript_text: p.transcriptText,
119
+ media_type: p.mediaType,
120
+ media_url: p.mediaUrl,
121
+ url: p.url,
122
+ like_count: p.likeCount,
123
+ comment_count: p.replyCount,
124
+ post_timestamp: p.timestamp,
125
+ fetched_at: now,
126
+ });
127
+ }
128
+ });
129
+ insertMany();
130
+ console.log(`[DB] 已存入 ${newPosts.length} 篇貼文`);
131
+ }
132
+ catch (err) {
133
+ console.error(`[DB] 貼文入庫失敗: ${err instanceof Error ? err.message : err}`);
134
+ }
95
135
  // 按時間從新到舊排序
96
136
  newPosts.sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime());
97
137
  // 標記當天貼文
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cablate/banini-tracker",
3
- "version": "2.0.7",
3
+ "version": "2.0.9",
4
4
  "description": "巴逆逆反指標追蹤器 — 常駐排程 + CLI 雙模式",
5
5
  "type": "module",
6
6
  "bin": {