npm - @cablate/banini-tracker - Versions diffs - 2.0.8 → 2.0.10 - Mend

@cablate/banini-tracker 2.0.8 → 2.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/cli.js CHANGED Viewed

@@ -62,6 +62,8 @@ program
     .description('抓取最新貼文（輸出 JSON 到 stdout）')
     .option('-s, --source <source>', '來源：fb', 'fb')
     .option('-n, --limit <n>', '每個來源抓幾篇', '3')
+    .option('--since <date>', '只抓此時間之後的貼文（YYYY-MM-DD 或 ISO 時間戳或相對時間如 "2 months"）')
+    .option('--until <date>', '只抓此時間之前的貼文')
     .option('--no-dedup', '不做去重，抓到什麼就輸出什麼')
     .option('--mark-seen', '輸出後自動標記為已讀')
     .action(async (opts) => {
@@ -69,7 +71,8 @@ program
         const config = loadConfig();
         const limit = parseInt(opts.limit, 10);
         let posts = [];
-        const fp = await fetchFacebookPosts(config.targets.facebookPageUrl, config.apifyToken, limit);
+        const fetchOpts = (opts.since || opts.until) ? { since: opts.since, until: opts.until } : undefined;
+        const fp = await fetchFacebookPosts(config.targets.facebookPageUrl, config.apifyToken, limit, fetchOpts);
         posts.push(...fp);
         // 按時間從新到舊
         posts.sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime());

package/dist/db.js CHANGED Viewed

@@ -19,6 +19,21 @@ export function getDb() {
 }
 function migrate(db) {
     db.exec(`
+    CREATE TABLE IF NOT EXISTS posts (
+      id TEXT PRIMARY KEY,
+      source TEXT NOT NULL DEFAULT 'facebook',
+      text TEXT NOT NULL DEFAULT '',
+      ocr_text TEXT NOT NULL DEFAULT '',
+      transcript_text TEXT NOT NULL DEFAULT '',
+      media_type TEXT NOT NULL DEFAULT 'text',
+      media_url TEXT NOT NULL DEFAULT '',
+      url TEXT NOT NULL DEFAULT '',
+      like_count INTEGER NOT NULL DEFAULT 0,
+      comment_count INTEGER NOT NULL DEFAULT 0,
+      post_timestamp TEXT NOT NULL,
+      fetched_at TEXT NOT NULL
+    );
     CREATE TABLE IF NOT EXISTS predictions (
       id INTEGER PRIMARY KEY AUTOINCREMENT,
       post_id TEXT NOT NULL,

package/dist/facebook.d.ts CHANGED Viewed

@@ -3,6 +3,7 @@ export interface FacebookPost {
     source: 'facebook';
     text: string;
     ocrText: string;
+    captionText: string;
     timestamp: string;
     likeCount: number;
     commentCount: number;
@@ -11,4 +12,8 @@ export interface FacebookPost {
     mediaType: string;
     mediaUrl: string;
 }
-export declare function fetchFacebookPosts(pageUrl: string, token: string, maxPosts?: number): Promise<FacebookPost[]>;
+export interface FetchOptions {
+    since?: string;
+    until?: string;
+}
+export declare function fetchFacebookPosts(pageUrl: string, token: string, maxPosts?: number, options?: FetchOptions): Promise<FacebookPost[]>;

package/dist/facebook.js CHANGED Viewed

@@ -1,16 +1,22 @@
-export async function fetchFacebookPosts(pageUrl, token, maxPosts = 3) {
+export async function fetchFacebookPosts(pageUrl, token, maxPosts = 3, options) {
     const actorId = 'apify~facebook-posts-scraper';
     const url = `https://api.apify.com/v2/acts/${actorId}/run-sync-get-dataset-items`;
+    const body = {
+        startUrls: [{ url: pageUrl }],
+        resultsLimit: maxPosts,
+        captionText: true,
+    };
+    if (options?.since)
+        body.onlyPostsNewerThan = options.since;
+    if (options?.until)
+        body.onlyPostsOlderThan = options.until;
     const res = await fetch(url, {
         method: 'POST',
         headers: {
             'Content-Type': 'application/json',
             Authorization: `Bearer ${token}`,
         },
-        body: JSON.stringify({
-            startUrls: [{ url: pageUrl }],
-            resultsLimit: maxPosts,
-        }),
+        body: JSON.stringify(body),
         signal: AbortSignal.timeout(180_000),
     });
     if (!res.ok) {
@@ -23,11 +29,17 @@ export async function fetchFacebookPosts(pageUrl, token, maxPosts = 3) {
         const ocrTexts = (item.media ?? [])
             .map((m) => m.ocrText ?? '')
             .filter((t) => t.length > 0);
+        // captionText 可能在 media item 或頂層
+        const captionTexts = (item.media ?? [])
+            .map((m) => m.captionText ?? '')
+            .filter((t) => t.length > 0);
+        const captionText = captionTexts.join('\n') || item.captionText || '';
         return {
             id: `fb_${item.postId ?? item.id ?? ''}`,
             source: 'facebook',
             text: item.text ?? item.message ?? '',
             ocrText: ocrTexts.join('\n'),
+            captionText,
             timestamp: item.time ?? new Date().toISOString(),
             likeCount: item.likes ?? 0,
             commentCount: item.comments ?? 0,

package/dist/index.js CHANGED Viewed

@@ -18,6 +18,7 @@ import { filterNewPosts as filterNew, markPostsSeen } from './seen.js';
 import { withRetry } from './retry.js';
 import { createTranscriber, transcribeVideoPosts } from './transcribe.js';
 import { recordPredictions, updateTracking } from './tracker.js';
+import { getDb } from './db.js';
 // ── Config ──────────────────────────────────────────────────
 const FB_PAGE_URL = 'https://www.facebook.com/DieWithoutBang/';
 const DATA_DIR = process.env.DATA_DIR || join(process.cwd(), 'data');
@@ -34,7 +35,7 @@ function fromFacebook(p) {
         source: 'facebook',
         text: p.text,
         ocrText: p.ocrText,
-        transcriptText: '',
+        transcriptText: p.captionText || '',
         timestamp: p.timestamp,
         likeCount: p.likeCount,
         replyCount: p.commentCount,
@@ -65,7 +66,8 @@ async function runInner(opts) {
     const allPosts = [];
     // 1. 抓取 Facebook（含 retry）
     try {
-        const fbPosts = await withRetry(() => fetchFacebookPosts(FB_PAGE_URL, apifyToken, opts.maxPosts), { label: 'Facebook', maxRetries: 2, baseDelayMs: 5000 });
+        const fetchOpts = (opts.since || opts.until) ? { since: opts.since, until: opts.until } : undefined;
+        const fbPosts = await withRetry(() => fetchFacebookPosts(FB_PAGE_URL, apifyToken, opts.maxPosts, fetchOpts), { label: 'Facebook', maxRetries: 2, baseDelayMs: 5000 });
         allPosts.push(...fbPosts.map(fromFacebook));
     }
     catch (err) {
@@ -81,17 +83,56 @@ async function runInner(opts) {
         console.log('沒有新貼文，結束');
         return;
     }
-    // 2.5. 影片轉錄
+    // 2.5. 影片轉錄（captionText 有值則跳過 Groq）
     const transcriberType = (process.env.TRANSCRIBER ?? 'noop');
     const transcriber = createTranscriber(transcriberType);
     if (transcriber.name !== 'noop') {
-        const transcripts = await transcribeVideoPosts(newPosts, transcriber);
-        for (const p of newPosts) {
-            const result = transcripts.get(p.id);
-            if (result)
-                p.transcriptText = result.text;
+        const needsTranscribe = newPosts.filter((p) => !p.transcriptText);
+        if (needsTranscribe.length > 0) {
+            const transcripts = await transcribeVideoPosts(needsTranscribe, transcriber);
+            for (const p of needsTranscribe) {
+                const result = transcripts.get(p.id);
+                if (result)
+                    p.transcriptText = result.text;
+            }
         }
     }
+    // 2.6. 貼文入庫
+    try {
+        const db = getDb();
+        const upsertPost = db.prepare(`
+      INSERT INTO posts (id, source, text, ocr_text, transcript_text, media_type, media_url, url, like_count, comment_count, post_timestamp, fetched_at)
+      VALUES (@id, @source, @text, @ocr_text, @transcript_text, @media_type, @media_url, @url, @like_count, @comment_count, @post_timestamp, @fetched_at)
+      ON CONFLICT(id) DO UPDATE SET
+        transcript_text = CASE WHEN excluded.transcript_text != '' THEN excluded.transcript_text ELSE posts.transcript_text END,
+        like_count = excluded.like_count,
+        comment_count = excluded.comment_count
+    `);
+        const now = new Date().toISOString();
+        const insertMany = db.transaction(() => {
+            for (const p of newPosts) {
+                upsertPost.run({
+                    id: p.id,
+                    source: p.source,
+                    text: p.text,
+                    ocr_text: p.ocrText,
+                    transcript_text: p.transcriptText,
+                    media_type: p.mediaType,
+                    media_url: p.mediaUrl,
+                    url: p.url,
+                    like_count: p.likeCount,
+                    comment_count: p.replyCount,
+                    post_timestamp: p.timestamp,
+                    fetched_at: now,
+                });
+            }
+        });
+        insertMany();
+        console.log(`[DB] 已存入 ${newPosts.length} 篇貼文`);
+    }
+    catch (err) {
+        console.error(`[DB] 貼文入庫失敗: ${err instanceof Error ? err.message : err}`);
+    }
     // 按時間從新到舊排序
     newPosts.sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime());
     // 標記當天貼文
@@ -226,16 +267,42 @@ async function runInner(opts) {
     writeFileSync(outFile, JSON.stringify({ timestamp: new Date().toISOString(), posts: newPosts, analysis }, null, 2), 'utf-8');
     console.log(`結果已存檔: ${outFile}`);
 }
+/**
+ * 產生台北時間今天指定時分的 ISO 時間戳
+ * 用於 Apify onlyPostsNewerThan 參數
+ */
+function taipeiToday(hours, minutes = 0) {
+    const now = new Date();
+    const taipeiStr = now.toLocaleString('en-US', { timeZone: 'Asia/Taipei' });
+    const taipeiNow = new Date(taipeiStr);
+    taipeiNow.setHours(hours, minutes, 0, 0);
+    // 轉回 UTC：台北 = UTC+8
+    const utc = new Date(taipeiNow.getTime() - 8 * 60 * 60 * 1000);
+    return utc.toISOString();
+}
+function taipeiYesterday(hours, minutes = 0) {
+    const now = new Date();
+    const taipeiStr = now.toLocaleString('en-US', { timeZone: 'Asia/Taipei' });
+    const taipeiNow = new Date(taipeiStr);
+    taipeiNow.setDate(taipeiNow.getDate() - 1);
+    taipeiNow.setHours(hours, minutes, 0, 0);
+    const utc = new Date(taipeiNow.getTime() - 8 * 60 * 60 * 1000);
+    return utc.toISOString();
+}
 // ── 入口 ────────────────────────────────────────────────────
 if (isCronMode) {
-    // 盤中：週一到五 09:00-13:30，每 30 分鐘，FB only 抓 1 篇
-    // cron 不支援半小時結束，用 9:00-13:00 每 30 分 + 13:30 單獨一個
+    // 早晨補漏：每天 08:00，抓前一晚 22:00 之後的貼文
+    cron.schedule('0 8 * * *', () => {
+        run({ maxPosts: 3, isDryRun: false, label: '早晨', since: taipeiYesterday(22, 0) })
+            .catch((err) => console.error('[早晨] 執行失敗:', err));
+    }, { timezone: 'Asia/Taipei' });
+    // 盤中：週一到五 09:00-13:30，每 30 分鐘，抓 08:30 之後的貼文
     cron.schedule('7,37 9-12 * * 1-5', () => {
-        run({ maxPosts: 1, isDryRun: false, label: '盤中' })
+        run({ maxPosts: 1, isDryRun: false, label: '盤中', since: taipeiToday(8, 30) })
             .catch((err) => console.error('[盤中] 執行失敗:', err));
     }, { timezone: 'Asia/Taipei' });
     cron.schedule('7 13 * * 1-5', () => {
-        run({ maxPosts: 1, isDryRun: false, label: '盤中' })
+        run({ maxPosts: 1, isDryRun: false, label: '盤中', since: taipeiToday(8, 30) })
             .catch((err) => console.error('[盤中] 執行失敗:', err));
     }, { timezone: 'Asia/Taipei' });
     // 追蹤更新：週一到五 15:00（收盤後更新預測追蹤）
@@ -243,15 +310,16 @@ if (isCronMode) {
         updateTracking()
             .catch((err) => console.error('[追蹤更新] 執行失敗:', err));
     }, { timezone: 'Asia/Taipei' });
-    // 盤後：每天晚上 23:00，FB 3 篇
+    // 盤後：每天晚上 23:03，抓 13:30 之後的貼文
     cron.schedule('3 23 * * *', () => {
-        run({ maxPosts: 3, isDryRun: false, label: '盤後' })
+        run({ maxPosts: 3, isDryRun: false, label: '盤後', since: taipeiToday(13, 30) })
             .catch((err) => console.error('[盤後] 執行失敗:', err));
     }, { timezone: 'Asia/Taipei' });
     console.log('=== 巴逆逆排程已啟動 ===');
-    console.log('  盤中：週一~五 09:07/09:37/10:07/.../13:07（FB, 1 篇）');
+    console.log('  早晨：每天 08:00（前晚 22:00 起，3 篇）');
+    console.log('  盤中：週一~五 09:07/09:37/10:07/.../13:07（08:30 起，1 篇）');
     console.log('  追蹤更新：週一~五 15:00（預測追蹤判定）');
-    console.log('  盤後：每天 23:03（FB, 3 篇）');
+    console.log('  盤後：每天 23:03（13:30 起，3 篇）');
     console.log('  按 Ctrl+C 停止\n');
 }
 else {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@cablate/banini-tracker",
-  "version": "2.0.8",
+  "version": "2.0.10",
   "description": "巴逆逆反指標追蹤器 — 常駐排程 + CLI 雙模式",
   "type": "module",
   "bin": {