npm - @aikeytake/social-automation - Versions diffs - 2.0.0 → 2.0.2 - Mend

@aikeytake/social-automation 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json +1 -2
package/src/index.js +0 -48
package/src/fetchers/linkedin.js +0 -400
package/src/fetchers/linkedin_browser.js +0 -167
package/src/fetchers/twitter.js +0 -194

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@aikeytake/social-automation",
-  "version": "2.0.0",
+  "version": "2.0.2",
   "description": "Content research and aggregation tool for AI agents",
   "main": "src/index.js",
   "type": "module",
@@ -28,7 +28,6 @@
     "axios": "^1.7.9",
     "cheerio": "^1.0.0",
     "dotenv": "^16.4.7",
-    "playwright": "^1.58.2",
     "rss-parser": "^3.13.0"
   },
   "devDependencies": {

package/src/index.js CHANGED Viewed

@@ -3,10 +3,7 @@ import createLogger from './utils/logger.js';
 import rssFetch from './fetchers/rss.js';
 import redditFetch from './fetchers/reddit.js';
 import hnFetch from './fetchers/hackernews.js';
-import linkedinFetch from './fetchers/linkedin.js';
 import apiFetch from './fetchers/api.js';
-import twitterFetch from './fetchers/twitter.js';
-import linkedinBrowserFetch from './fetchers/linkedin_browser.js';
 import fs from 'fs';
 import path from 'path';
 import { fileURLToPath } from 'url';
@@ -109,51 +106,6 @@ async function scrape(options = {}) {
     }
   }
-  // LinkedIn Browser (skip when toSupabase - requires browser)
-  if (config.linkedin_browser?.enabled && !toSupabase) {
-    logger.info('💼 Fetching from LinkedIn (browser)...');
-    try {
-      const items = await linkedinBrowserFetch(config);
-      results.sources.linkedin_browser = items.length;
-      results.items.push(...items);
-      if (saveToFilesystem) await saveSourceData('linkedin_browser', items, today);
-      logger.success(`✅ LinkedIn Browser: ${items.length} items`);
-    } catch (error) {
-      logger.error(`LinkedIn Browser fetch failed: ${error.message}`);
-      results.sources.linkedin_browser = 0;
-    }
-  }
-  // Twitter / X (skip when toSupabase - may require auth)
-  if (config.trendingSources?.twitter?.enabled && !toSupabase) {
-    logger.info('🐦 Fetching from Twitter/X...');
-    try {
-      const twitterItems = await twitterFetch(config);
-      results.sources.twitter = twitterItems.length;
-      results.items.push(...twitterItems);
-      if (saveToFilesystem) await saveSourceData('twitter', twitterItems, today);
-      logger.success(`✅ Twitter: ${twitterItems.length} items`);
-    } catch (error) {
-      logger.error(`Twitter fetch failed: ${error.message}`);
-      results.sources.twitter = 0;
-    }
-  }
-  // LinkedIn (skip when toSupabase - requires auth)
-  if (config.linkedin?.enabled && !toSupabase) {
-    logger.info('💼 Fetching from LinkedIn...');
-    try {
-      const linkedinItems = await linkedinFetch(config);
-      results.sources.linkedin = linkedinItems.length;
-      results.items.push(...linkedinItems);
-      if (saveToFilesystem) await saveSourceData('linkedin', linkedinItems, today);
-      logger.success(`✅ LinkedIn: ${linkedinItems.length} items`);
-    } catch (error) {
-      logger.error(`LinkedIn fetch failed: ${error.message}`);
-      results.sources.linkedin = 0;
-    }
-  }
   // Save to Supabase if requested
   if (supabase) {
     await saveToSupabase(supabase, results.items, today);

package/src/fetchers/linkedin.js DELETED Viewed

@@ -1,400 +0,0 @@
-import axios from 'axios';
-import fs from 'fs';
-import path from 'path';
-import { fileURLToPath } from 'url';
-import crypto from 'crypto';
-import createLogger from '../utils/logger.js';
-const logger = createLogger('LinkedInFetcher');
-const BRIGHTDATA_API_URL = 'https://api.brightdata.com/request';
-const __dirname = path.dirname(fileURLToPath(import.meta.url));
-const STATE_FILE = path.join(__dirname, '../../data/kol-state.json');
-// Defaults (overridable via config/sources.json linkedin section)
-const DEFAULTS = {
-  batchSize: 8,          // KOLs per SERP query
-  budgetPerRun: 25,      // Max SERP API calls per run (25 × 8 = 200 KOLs per run)
-  checkIntervalHours: 24, // Re-check each KOL every 24h
-  timeRange: 'w',        // w=week, d=day, m=month
-  resultsPerBatch: 10,   // Google results per batch query
-  enrichContent: true,   // Scrape each post URL for full content + engagement
-  enrichConcurrency: 5,  // Parallel enrichment requests
-};
-export default async function linkedinFetch(config) {
-  if (!config.linkedin?.enabled) return [];
-  const BRIGHTDATA_API_KEY = process.env.BRIGHTDATA_API_KEY;
-  const BRIGHTDATA_ZONE = process.env.BRIGHTDATA_ZONE || 'mcp_unlocker';
-  if (!BRIGHTDATA_API_KEY) {
-    logger.warn('BRIGHTDATA_API_KEY not set, skipping LinkedIn scraping');
-    return [];
-  }
-  const profilesFile = config.linkedin.profilesFile;
-  if (!fs.existsSync(profilesFile)) {
-    logger.error(`LinkedIn profiles file not found: ${profilesFile}`);
-    return [];
-  }
-  let profiles;
-  try {
-    profiles = JSON.parse(fs.readFileSync(profilesFile, 'utf-8'));
-  } catch (err) {
-    logger.error(`Failed to parse LinkedIn profiles file: ${err.message}`);
-    return [];
-  }
-  const cfg = { ...DEFAULTS, ...config.linkedin };
-  const state = loadState();
-  const now = new Date();
-  const cutoffMs = cfg.checkIntervalHours * 3600 * 1000;
-  // Select only KOLs not checked recently
-  const dueKols = profiles.filter(p => {
-    const last = state[p.name]?.lastChecked;
-    return !last || (now - new Date(last)) >= cutoffMs;
-  });
-  const maxKols = cfg.budgetPerRun * cfg.batchSize;
-  const selectedKols = dueKols.slice(0, maxKols);
-  if (selectedKols.length === 0) {
-    logger.info(`LinkedIn: all ${profiles.length} KOLs recently checked, nothing due`);
-    return [];
-  }
-  const numBatches = Math.ceil(selectedKols.length / cfg.batchSize);
-  logger.info(`LinkedIn: checking ${selectedKols.length}/${profiles.length} KOLs in ${numBatches} batches...`);
-  const allPosts = [];
-  const batches = chunk(selectedKols, cfg.batchSize);
-  for (const batch of batches) {
-    try {
-      let posts = await fetchBatchWithRetry(batch, state, cfg, BRIGHTDATA_API_KEY, BRIGHTDATA_ZONE);
-      // Enrich posts with full content + engagement by scraping each post URL
-      if (cfg.enrichContent && posts.length > 0) {
-        posts = await enrichPosts(posts, cfg, BRIGHTDATA_API_KEY, BRIGHTDATA_ZONE);
-      }
-      allPosts.push(...posts);
-      // Update state: mark all KOLs in batch as checked, record seen post IDs
-      for (const kol of batch) {
-        if (!state[kol.name]) state[kol.name] = { seenPostIds: [] };
-        state[kol.name].lastChecked = now.toISOString();
-        const newIds = posts.filter(p => p.sourceName === kol.name).map(p => p.id);
-        state[kol.name].seenPostIds = [
-          ...new Set([...newIds, ...(state[kol.name].seenPostIds || [])]),
-        ].slice(0, 100); // keep last 100 seen post IDs per KOL
-      }
-      await new Promise(r => setTimeout(r, 600));
-    } catch (err) {
-      logger.error(`Batch failed: ${err.message}`);
-    }
-  }
-  saveState(state);
-  const checked = selectedKols.length;
-  const remaining = dueKols.length - checked;
-  logger.success(`LinkedIn: ${allPosts.length} new posts (${checked} KOLs checked, ${remaining} still due)`);
-  return allPosts;
-}
-async function fetchBatchWithRetry(batch, state, cfg, apiKey, zone, retries = 1) {
-  try {
-    return await fetchBatch(batch, state, cfg, apiKey, zone);
-  } catch (err) {
-    if (retries > 0 && (err.code === 'ECONNABORTED' || err.message?.includes('timeout'))) {
-      logger.warn(`Batch timed out, retrying... (${retries} left)`);
-      await new Promise(r => setTimeout(r, 2000));
-      return fetchBatchWithRetry(batch, state, cfg, apiKey, zone, retries - 1);
-    }
-    throw err;
-  }
-}
-async function fetchBatch(batch, state, cfg, apiKey, zone) {
-  // Batch multiple KOL names into one SERP query
-  const nameList = batch.map(k => `"${k.name}"`).join(' OR ');
-  const searchQuery = `site:linkedin.com/posts (${nameList})`;
-  const googleUrl = [
-    'https://www.google.com/search',
-    `?q=${encodeURIComponent(searchQuery)}`,
-    `&num=${cfg.resultsPerBatch}`,
-    `&tbs=qdr:${cfg.timeRange}`, // time filter: recent posts only
-    '&brd_json=1',
-  ].join('');
-  const response = await axios.post(
-    BRIGHTDATA_API_URL,
-    { zone, url: googleUrl, format: 'raw', data_format: 'parsed_light' },
-    {
-      headers: { Authorization: `Bearer ${apiKey}`, 'Content-Type': 'application/json' },
-      timeout: 40000,
-    }
-  );
-  const organicResults = response.data?.organic || [];
-  const posts = [];
-  for (const item of organicResults) {
-    if (!item.link?.includes('linkedin.com/posts')) continue;
-    // Match result back to a specific KOL from this batch
-    const kol = matchKol(item, batch);
-    if (!kol) continue;
-    // Skip boilerplate / profile-bio-only snippets — these have no post content
-    const rawContent = item.description || item.snippet || '';
-    if (!isUsefulContent(rawContent)) continue;
-    const id = crypto.createHash('md5').update(item.link).digest('hex');
-    // Skip posts we've already seen for this KOL
-    if (state[kol.name]?.seenPostIds?.includes(id)) continue;
-    posts.push({
-      id,
-      source: 'linkedin',
-      sourceName: kol.name,
-      category: 'linkedin-kol',
-      title: cleanTitle(item.title || '', kol.name),
-      link: item.link,
-      url: item.link,
-      content: cleanContent(rawContent),
-      summary: cleanContent(rawContent).substring(0, 200),
-      author: kol.name,
-      role: kol.role || '',
-      pubDate: extractDate(item) || new Date().toISOString(),
-      scraped_at: new Date().toISOString(),
-      age_hours: 0,
-      engagement: { upvotes: 0, comments: 0 },
-      metadata: { score: 0 },
-    });
-  }
-  return posts;
-}
-// Scrape each post URL for full content + engagement, with concurrency limit
-async function enrichPosts(posts, cfg, apiKey, zone) {
-  const concurrency = cfg.enrichConcurrency || 5;
-  const enriched = [];
-  for (let i = 0; i < posts.length; i += concurrency) {
-    const batch = posts.slice(i, i + concurrency);
-    const results = await Promise.all(batch.map(p => enrichPost(p, apiKey, zone)));
-    enriched.push(...results);
-  }
-  return enriched;
-}
-async function enrichPost(post, apiKey, zone) {
-  try {
-    const response = await axios.post(
-      BRIGHTDATA_API_URL,
-      { zone, url: post.url, format: 'raw', data_format: 'markdown' },
-      {
-        headers: { Authorization: `Bearer ${apiKey}`, 'Content-Type': 'application/json' },
-        timeout: 40000,
-      }
-    );
-    const markdown = typeof response.data === 'string' ? response.data : JSON.stringify(response.data);
-    const fullContent = extractPostContent(markdown);
-    const engagement = extractEngagement(markdown);
-    const pubDate = extractDateFromPage(markdown) || post.pubDate;
-    if (fullContent) {
-      post.content = fullContent;
-      post.summary = fullContent.substring(0, 200);
-    }
-    post.engagement = engagement;
-    post.pubDate = pubDate;
-  } catch (err) {
-    // Best-effort — keep SERP snippet if scraping fails
-    logger.debug(`Enrich failed for ${post.sourceName}: ${err.message}`);
-  }
-  return post;
-}
-// Extract full post content from LinkedIn page markdown.
-// LinkedIn page structure (logged-out view):
-//   [Author] [Job] [N followers]
-//   [Nd/w/h ago] •
-//   [Full post text here]
-//   Like Comment Repost Send
-//   [N reactions] • [N comments]
-function extractPostContent(markdown) {
-  const lines = markdown.split('\n');
-  let start = -1;
-  let end = lines.length;
-  // Find the line after the date indicator (e.g. "3d •", "2w •", "1mo •")
-  for (let i = 0; i < lines.length; i++) {
-    const line = lines[i].trim();
-    if (/^\d+[dwm]\s*[•·]/.test(line) || /\d+\s+(hour|day|week|month)s?\s+ago/i.test(line)) {
-      start = i + 1;
-      break;
-    }
-  }
-  if (start === -1) return null;
-  // Find where content ends: engagement buttons or reaction counts
-  for (let i = start; i < lines.length; i++) {
-    const line = lines[i].trim().toLowerCase();
-    if (
-      line === 'like comment repost send' ||
-      line.startsWith('like') && line.includes('comment') ||
-      /^\d[\d,]*\s*reaction/.test(line) ||
-      line === 'reactions' ||
-      line.includes('sign in') ||
-      line.includes('join now')
-    ) {
-      end = i;
-      break;
-    }
-  }
-  const content = lines
-    .slice(start, end)
-    .join('\n')
-    .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1') // collapse markdown links to text
-    .replace(/^#+\s/gm, '')                   // strip heading markers
-    .replace(/\n{3,}/g, '\n\n')
-    .trim();
-  return content.length > 30 ? content : null;
-}
-// Extract like/comment/repost counts from LinkedIn page markdown
-function extractEngagement(markdown) {
-  const parse = (pattern) => {
-    const m = markdown.match(pattern);
-    return m ? parseInt(m[1].replace(/,/g, '')) : 0;
-  };
-  return {
-    upvotes: parse(/(\d[\d,]*)\s*reaction/i),
-    comments: parse(/(\d[\d,]*)\s*comment/i),
-    reposts: parse(/(\d[\d,]*)\s*repost/i),
-  };
-}
-// Extract post date from LinkedIn page markdown (more accurate than SERP snippet)
-function extractDateFromPage(markdown) {
-  const ago = markdown.match(/(\d+)\s*(hour|day|week|month)s?\s*ago/i) ||
-              markdown.match(/(\d+)(h|d|w|mo)\s*[•·]/);
-  if (!ago) return null;
-  const n = parseInt(ago[1]);
-  const unitRaw = ago[2].toLowerCase();
-  const unit = { h: 'hour', d: 'day', w: 'week', mo: 'month' }[unitRaw] || unitRaw;
-  const ms = { hour: 3600000, day: 86400000, week: 604800000, month: 2592000000 }[unit] || 0;
-  return ms ? new Date(Date.now() - n * ms).toISOString() : null;
-}
-// Match a search result to a KOL in the batch.
-// We use two signals, in order of reliability:
-//  1. URL username contains BOTH first+last name parts of the KOL → post is by them
-//  2. Title is "[KOL Name]'s Post" format → LinkedIn's own-post title format
-// Anything else (posts merely mentioning the KOL) is rejected.
-function matchKol(item, batch) {
-  const urlUsername = extractUrlUsername(item.link || '');
-  const titleAuthor = (item.title || '').match(/^(.+?)'s Post/i)?.[1]?.toLowerCase().trim() || '';
-  for (const kol of batch) {
-    const parts = sanitizeName(kol.name).split(' ').filter(Boolean);
-    if (parts.length < 2) continue;
-    const [first, ...rest] = parts;
-    const last = rest[rest.length - 1];
-    // Signal 1: URL username contains both first AND last name → reliable authorship
-    if (urlUsername && urlUsername.includes(first) && urlUsername.includes(last)) return kol;
-    // Signal 2: Title's "Name's Post" format matches this KOL
-    if (titleAuthor && titleAuthor.includes(first) && titleAuthor.includes(last)) return kol;
-  }
-  return null;
-}
-// Extract the author username from a LinkedIn post URL
-// linkedin.com/posts/USERNAME_post-title-activity-ID
-function extractUrlUsername(url) {
-  const m = url.match(/linkedin\.com\/posts\/([^_/]+)/);
-  return m?.[1]?.toLowerCase() || '';
-}
-function sanitizeName(name) {
-  return name.toLowerCase().replace(/[^a-z0-9 ]/g, '').trim();
-}
-// Returns false for snippets that are just LinkedIn profile bios / boilerplate
-function isUsefulContent(content) {
-  if (!content || content.length < 40) return false;
-  const lower = content.toLowerCase();
-  // Common boilerplate patterns from LinkedIn search results
-  const boilerplate = [
-    'view profile for',
-    'report this comment',
-    'close menu',
-    'like · reply',
-    '1 reaction',
-  ];
-  return !boilerplate.some(b => lower.includes(b));
-}
-// Remove LinkedIn UI chrome from content
-function cleanContent(text) {
-  return text
-    .replace(/\.\.\.Read more$/i, '')
-    .replace(/View profile for [^.]+\./gi, '')
-    .replace(/\s+/g, ' ')
-    .trim();
-}
-// Remove "Name's Post" boilerplate from title
-function cleanTitle(title, authorName) {
-  const cleaned = title
-    .replace(new RegExp(`^${authorName}'s Post\\s*[-–]?\\s*`, 'i'), '')
-    .trim();
-  return cleaned || title;
-}
-// Parse relative date strings from Google snippets into ISO dates
-function extractDate(item) {
-  const text = `${item.description || ''} ${item.date || ''}`;
-  const ago = text.match(/(\d+)\s*(hour|day|week|month)s?\s*ago/i);
-  if (!ago) return null;
-  const n = parseInt(ago[1]);
-  const unit = ago[2].toLowerCase();
-  const ms = { hour: 3600000, day: 86400000, week: 604800000, month: 2592000000 }[unit] || 0;
-  return new Date(Date.now() - n * ms).toISOString();
-}
-function chunk(arr, size) {
-  const result = [];
-  for (let i = 0; i < arr.length; i += size) result.push(arr.slice(i, i + size));
-  return result;
-}
-function loadState() {
-  try {
-    if (fs.existsSync(STATE_FILE)) return JSON.parse(fs.readFileSync(STATE_FILE, 'utf-8'));
-  } catch { /* start fresh */ }
-  return {};
-}
-function saveState(state) {
-  const dir = path.dirname(STATE_FILE);
-  if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
-  fs.writeFileSync(STATE_FILE, JSON.stringify(state, null, 2));
-}

package/src/fetchers/linkedin_browser.js DELETED Viewed

@@ -1,167 +0,0 @@
-import { chromium } from 'playwright';
-import fs from 'fs';
-import path from 'path';
-import crypto from 'crypto';
-import { fileURLToPath } from 'url';
-import createLogger from '../utils/logger.js';
-const logger = createLogger('LinkedInBrowser');
-const __dirname = path.dirname(fileURLToPath(import.meta.url));
-const DEFAULT_PROFILE_DIR = path.join(__dirname, '../../data/playwright-profile');
-const sleep = ms => new Promise(r => setTimeout(r, ms));
-function validateProfile(profileDir) {
-  if (!fs.existsSync(profileDir)) return 'browser profile not found';
-  if (!fs.existsSync(path.join(profileDir, 'Default'))) return 'browser profile is incomplete';
-  return null;
-}
-export default async function linkedinBrowserFetch(config) {
-  const cfg = config.linkedin_browser;
-  if (!cfg?.enabled) return [];
-  const profileDir = cfg.profileDir || DEFAULT_PROFILE_DIR;
-  const profileError = validateProfile(profileDir);
-  if (profileError) {
-    logger.warn(`LinkedIn Browser skipped: ${profileError}`);
-    logger.warn('Run: npm run setup:twitter (same profile as Twitter)');
-    return [];
-  }
-  const accounts = [...(cfg.accounts || [])].sort(() => Math.random() - 0.5);
-  const maxPerAccount = cfg.maxPostsPerAccount || 5;
-  const maxAgeHours = cfg.maxAgeHours || 48;
-  const cutoff = new Date(Date.now() - maxAgeHours * 3600000);
-  const delay = cfg.delayBetweenAccountsMs || 10000;
-  let context;
-  try {
-    context = await chromium.launchPersistentContext(profileDir, {
-      headless: false,
-      channel: 'chrome',
-      ignoreDefaultArgs: ['--enable-automation'],
-      args: ['--disable-blink-features=AutomationControlled'],
-      viewport: { width: 1280, height: 900 },
-    });
-    const page = context.pages()[0] ?? await context.newPage();
-    await sleep(3000);
-    const allItems = [];
-    for (let i = 0; i < accounts.length; i++) {
-      const account = accounts[i];
-      try {
-        logger.info(`Scraping linkedin.com/in/${account}...`);
-        const posts = await scrapeAccount(page, account, maxPerAccount, cutoff);
-        allItems.push(...posts);
-        logger.debug(`  → ${posts.length} posts from ${account}`);
-      } catch (err) {
-        logger.error(`Failed ${account}: ${err.message}`);
-      }
-      if (i < accounts.length - 1) {
-        const wait = delay + Math.random() * 5000;
-        logger.debug(`  Waiting ${Math.round(wait / 1000)}s...`);
-        await sleep(wait);
-      }
-    }
-    logger.success(`Fetched ${allItems.length} posts from ${accounts.length} LinkedIn accounts`);
-    return allItems;
-  } finally {
-    if (context) await context.close();
-  }
-}
-async function scrapeAccount(page, slug, limit, cutoff) {
-  const url = `https://www.linkedin.com/in/${slug}/recent-activity/all/`;
-  await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
-  // Wait for posts feed
-  await page.waitForSelector('div[data-urn]', { timeout: 15000 });
-  await sleep(2000);
-  // Scroll to load more posts
-  for (let i = 0; i < 3; i++) {
-    await page.mouse.wheel(0, 500 + Math.random() * 300);
-    await sleep(700 + Math.random() * 400);
-  }
-  const rawPosts = await page.evaluate((limit) => {
-    const containers = [...document.querySelectorAll('div[data-urn^="urn:li:activity"]')]
-      .slice(0, limit);
-    return containers.map(el => {
-      const urn = el.getAttribute('data-urn');
-      // Post text
-      const textEl = el.querySelector('.update-components-text, [class*="commentary"]');
-      const text = textEl?.innerText?.trim() || '';
-      // Construct post URL directly from urn
-      const link = urn ? `https://www.linkedin.com/feed/update/${urn}/` : '';
-      // Time ago — take first segment before " •" or newline
-      const timeEl = el.querySelector('.update-components-actor__sub-description');
-      const timeAgo = timeEl?.innerText?.trim().split(/\s*[•\n]/)[0].trim() || '';
-      // Reactions count
-      const reactionsEl = el.querySelector('.social-details-social-counts__reactions-count');
-      const reactions = parseInt(reactionsEl?.innerText?.replace(/[^0-9]/g, '') || '0', 10);
-      // Comments count — parse from social counts block
-      const countsEl = el.querySelector('[class*="social-counts"]');
-      const commentsMatch = countsEl?.innerText?.match(/(\d+)\s+comment/);
-      const comments = commentsMatch ? parseInt(commentsMatch[1]) : 0;
-      return { text, link, timeAgo, reactions, comments };
-    });
-  }, limit);
-  return rawPosts
-    .filter(p => p.link && p.text)
-    .filter(p => {
-      if (!p.timeAgo) return true;
-      const pubDate = parseTimeAgo(p.timeAgo);
-      return !pubDate || pubDate >= cutoff;
-    })
-    .map(p => {
-      const pubDate = parseTimeAgo(p.timeAgo) || new Date();
-      return {
-        id: crypto.createHash('md5').update(p.link).digest('hex'),
-        source: 'linkedin_browser',
-        sourceName: slug,
-        category: 'linkedin',
-        title: p.text.substring(0, 100) + (p.text.length > 100 ? '…' : ''),
-        link: p.link,
-        url: p.link,
-        content: p.text,
-        summary: p.text.substring(0, 200),
-        author: slug,
-        pubDate: pubDate.toISOString(),
-        scraped_at: new Date().toISOString(),
-        age_hours: Math.floor((Date.now() - pubDate.getTime()) / 3600000),
-        tags: [],
-        engagement: {
-          upvotes: p.reactions,
-          comments: p.comments,
-        },
-        metadata: {
-          score: p.reactions,
-          timeAgo: p.timeAgo,
-        },
-      };
-    });
-}
-function parseTimeAgo(str) {
-  if (!str) return null;
-  const m = str.match(/(\d+)\s*(s|m|h|d|w|mo)/i);
-  if (!m) return null;
-  const n = parseInt(m[1]);
-  const unit = m[2].toLowerCase();
-  const ms = { s: 1000, m: 60000, h: 3600000, d: 86400000, w: 604800000, mo: 2592000000 }[unit] || 0;
-  return new Date(Date.now() - n * ms);
-}

package/src/fetchers/twitter.js DELETED Viewed

@@ -1,194 +0,0 @@
-import { chromium } from 'playwright';
-import fs from 'fs';
-import path from 'path';
-import crypto from 'crypto';
-import { fileURLToPath } from 'url';
-import createLogger from '../utils/logger.js';
-const logger = createLogger('TwitterFetcher');
-const __dirname = path.dirname(fileURLToPath(import.meta.url));
-const DEFAULT_PROFILE_DIR = path.join(__dirname, '../../data/playwright-profile');
-const sleep = ms => new Promise(r => setTimeout(r, ms));
-function validateProfile(profileDir) {
-  if (!fs.existsSync(profileDir)) {
-    return 'browser profile not found';
-  }
-  // A valid Chromium profile always contains a Default directory
-  if (!fs.existsSync(path.join(profileDir, 'Default'))) {
-    return 'browser profile is incomplete or empty';
-  }
-  return null; // valid
-}
-export default async function twitterFetch(config) {
-  const cfg = config.trendingSources?.twitter;
-  if (!cfg?.enabled) return [];
-  const profileDir = cfg.profileDir || DEFAULT_PROFILE_DIR;
-  const profileError = validateProfile(profileDir);
-  if (profileError) {
-    logger.warn(`Twitter skipped: ${profileError}`);
-    logger.warn('Run: npm run setup:twitter');
-    return [];
-  }
-  // Randomise visit order
-  const accounts = [...(cfg.accounts || [])].sort(() => Math.random() - 0.5);
-  const minLikes = cfg.minLikes || 0;
-  const maxPerAccount = cfg.maxTweetsPerAccount || 10;
-  const maxAgeHours = cfg.maxAgeHours || 24;
-  const cutoff = new Date(Date.now() - maxAgeHours * 3600000);
-  let context;
-  try {
-    context = await chromium.launchPersistentContext(profileDir, {
-      headless: false,
-      channel: 'chrome',
-      ignoreDefaultArgs: ['--enable-automation'],
-      args: ['--disable-blink-features=AutomationControlled'],
-      viewport: { width: 1280, height: 800 },
-    });
-    const page = context.pages()[0] ?? await context.newPage();
-    await sleep(5000);
-    // Land on X home first so the search box is available
-    await page.goto('https://x.com/home', { waitUntil: 'domcontentloaded', timeout: 20000 });
-    await page.waitForSelector('[data-testid="SearchBox_Search_Input"], [aria-label="Search query"]', { timeout: 15000 });
-    await sleep(2000);
-    const allItems = [];
-    for (let i = 0; i < accounts.length; i++) {
-      const account = accounts[i];
-      try {
-        logger.info(`Scraping @${account}...`);
-        const tweets = await scrapeAccount(page, account, maxPerAccount, minLikes, cutoff);
-        allItems.push(...tweets);
-        logger.debug(`  → ${tweets.length} tweets from @${account}`);
-      } catch (err) {
-        logger.error(`Failed @${account}: ${err.message}`);
-      }
-      // Rate limit: random 20-30s between accounts
-      if (i < accounts.length - 1) {
-        const wait = 20000 + Math.random() * 10000;
-        logger.debug(`  Waiting ${Math.round(wait / 1000)}s before next account...`);
-        await sleep(wait);
-      }
-    }
-    logger.success(`Fetched ${allItems.length} tweets from ${accounts.length} accounts`);
-    return allItems;
-  } finally {
-    if (context) await context.close();
-  }
-}
-async function navigateViaSearch(page, account) {
-  // Click the search box
-  await page.click('[data-testid="SearchBox_Search_Input"], [aria-label="Search query"]');
-  await sleep(800 + Math.random() * 400);
-  // Type account name with human-like delay
-  await page.keyboard.type(account, { delay: 80 + Math.random() * 60 });
-  await sleep(1500);
-  // Wait for dropdown results
-  await page.waitForSelector('[data-testid="TypeaheadUser"]', { timeout: 8000 });
-  // Find the result whose username matches the account
-  const matched = await page.evaluate((account) => {
-    const results = [...document.querySelectorAll('[data-testid="TypeaheadUser"]')];
-    for (const el of results) {
-      const handle = el.querySelector('[tabindex="-1"] span')?.innerText?.toLowerCase() || '';
-      if (handle.includes(account.toLowerCase())) {
-        el.click();
-        return true;
-      }
-    }
-    // Fall back to first result
-    if (results[0]) { results[0].click(); return true; }
-    return false;
-  }, account);
-  if (!matched) throw new Error(`No search result found for @${account}`);
-  await page.waitForLoadState('domcontentloaded');
-  await sleep(1500);
-}
-async function scrollAndWait(page, times = 3) {
-  for (let i = 0; i < times; i++) {
-    await page.mouse.wheel(0, 400 + Math.random() * 300);
-    await sleep(600 + Math.random() * 400);
-  }
-}
-async function scrapeAccount(page, account, limit, minLikes, cutoff) {
-  await navigateViaSearch(page, account);
-  await page.waitForSelector('article[data-testid="tweet"]', { timeout: 15000 });
-  await sleep(1500);
-  // Scroll to load more tweets naturally
-  await scrollAndWait(page, 3);
-  const rawTweets = await page.evaluate((limit) => {
-    const articles = [...document.querySelectorAll('article[data-testid="tweet"]')].slice(0, limit);
-    return articles.map(article => {
-      const textEl = article.querySelector('[data-testid="tweetText"]');
-      const text = textEl?.innerText?.trim() || '';
-      const timeEl = article.querySelector('time');
-      const link = timeEl?.closest('a')?.href || '';
-      const date = timeEl?.getAttribute('datetime') || '';
-      const parseCount = (testId) => {
-        const el = article.querySelector(`[data-testid="${testId}"]`);
-        const label = el?.getAttribute('aria-label') || '';
-        const m = label.match(/(\d[\d,]*)/);
-        return m ? parseInt(m[1].replace(/,/g, ''), 10) : 0;
-      };
-      return {
-        text,
-        link,
-        date,
-        likes: parseCount('like'),
-        replies: parseCount('reply'),
-        retweets: parseCount('retweet'),
-      };
-    });
-  }, limit);
-  return rawTweets
-    .filter(t => t.link && t.text && t.likes >= minLikes && (!t.date || new Date(t.date) >= cutoff))
-    .map(t => ({
-      id: crypto.createHash('md5').update(t.link).digest('hex'),
-      source: 'twitter',
-      sourceName: `@${account}`,
-      category: 'social',
-      title: t.text.substring(0, 100) + (t.text.length > 100 ? '…' : ''),
-      link: t.link,
-      url: t.link,
-      content: t.text,
-      summary: t.text.substring(0, 200),
-      author: account,
-      pubDate: t.date || new Date().toISOString(),
-      scraped_at: new Date().toISOString(),
-      age_hours: t.date
-        ? Math.floor((Date.now() - new Date(t.date).getTime()) / 3600000)
-        : 0,
-      tags: [],
-      engagement: {
-        upvotes: t.likes,
-        comments: t.replies,
-        retweets: t.retweets,
-      },
-      metadata: {
-        score: t.likes,
-      },
-    }));
-}