npm - commentscraper - Versions diffs - 1.0.0 - Mend

commentscraper 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/SKILL.md +112 -0
package/bin/commentscraper.js +86 -0
package/commands/login.js +47 -0
package/commands/logout.js +12 -0
package/commands/scrape.js +108 -0
package/commands/whoami.js +21 -0
package/core/config.js +2 -0
package/core/package.json +1 -0
package/core/platform-detect.js +37 -0
package/core/progress.js +11 -0
package/core/scrapers/hackernews.js +99 -0
package/core/scrapers/index.js +44 -0
package/core/scrapers/notion.js +256 -0
package/core/scrapers/producthunt.js +46 -0
package/core/scrapers/reddit-profile.js +146 -0
package/core/scrapers/reddit.js +227 -0
package/core/scrapers/steam.js +119 -0
package/core/utils.js +62 -0
package/lib/auth.js +72 -0
package/lib/cli-progress.js +11 -0
package/lib/config.js +33 -0
package/package.json +46 -0

package/core/scrapers/steam.js ADDED Viewed

@@ -0,0 +1,119 @@
+const STEAM_REVIEWS_API = 'https://store.steampowered.com/appreviews';
+/**
+ * Fetch Steam game reviews via the free public API.
+ * @param {string} url - Steam store URL
+ * @param {{ progress: { send: function } }} opts
+ */
+export async function fetchSteamReviews(url, { progress }) {
+  try {
+    const match = url.match(/\/app\/(\d+)/);
+    if (!match) return { success: false, error: 'Could not detect Steam app ID from URL.' };
+    const appId = match[1];
+    progress.send('Fetching Steam reviews...', 10);
+    const [detailsRes, firstReviewRes] = await Promise.all([
+      fetch(`https://store.steampowered.com/api/appdetails?appids=${appId}`),
+      fetch(`${STEAM_REVIEWS_API}/${appId}?json=1&filter=recent&language=all&num_per_page=100&cursor=*&review_type=all&purchase_type=all`),
+    ]);
+    let gameTitle = `Steam App ${appId}`;
+    try {
+      const detailsData = await detailsRes.json();
+      if (detailsData[appId]?.success) {
+        gameTitle = detailsData[appId].data.name || gameTitle;
+      }
+    } catch { /* use default title */ }
+    const firstData = await firstReviewRes.json();
+    if (firstData.success !== 1) throw new Error('Steam API returned error');
+    const qs = firstData.query_summary || {};
+    const post = {
+      title: gameTitle,
+      body: [
+        qs.review_score_desc || '',
+        qs.total_reviews ? `${qs.total_reviews.toLocaleString()} reviews` : '',
+      ].filter(Boolean).join(' \u00b7 '),
+      url: `https://store.steampowered.com/app/${appId}/`,
+      subreddit: 'Steam',
+    };
+    const reviews = parseSteamReviews(firstData.reviews || [], appId);
+    let cursor = firstData.cursor;
+    let page = 1;
+    const MAX_PAGES = 20;
+    progress.send(`Found ${reviews.length} reviews (page 1)`, 20);
+    while (page < MAX_PAGES) {
+      if (!cursor || cursor === '*') break;
+      await new Promise(r => setTimeout(r, 500));
+      const params = new URLSearchParams({
+        json: '1',
+        filter: 'recent',
+        language: 'all',
+        num_per_page: '100',
+        cursor,
+        review_type: 'all',
+        purchase_type: 'all',
+      });
+      const res = await fetch(`${STEAM_REVIEWS_API}/${appId}?${params}`);
+      const data = await res.json();
+      if (data.success !== 1) break;
+      const pageReviews = parseSteamReviews(data.reviews || [], appId);
+      if (pageReviews.length === 0) break;
+      reviews.push(...pageReviews);
+      page++;
+      if (data.cursor === cursor) break;
+      cursor = data.cursor;
+      const pct = 20 + Math.round((page / MAX_PAGES) * 70);
+      progress.send(`Loading Steam reviews... ${reviews.length} found`, Math.min(pct, 90));
+    }
+    progress.send(`Loaded ${reviews.length} reviews`, 95);
+    return { success: true, comments: reviews, post, method: 'json' };
+  } catch (error) {
+    return { success: false, error: error.message };
+  }
+}
+/**
+ * Parse Steam API review objects into the Comment model.
+ */
+export function parseSteamReviews(reviews, appId) {
+  return reviews.map(r => {
+    const recommended = r.voted_up ? 'Recommended' : 'Not Recommended';
+    const hours = r.author?.playtime_forever
+      ? Math.round(r.author.playtime_forever / 60)
+      : 0;
+    const playtime = hours ? `${hours}h on record` : '';
+    let text = `[${recommended}]`;
+    if (playtime) text += ` (${playtime})`;
+    text += `\n\n${r.review || ''}`;
+    return {
+      text,
+      author: r.author?.steamid || '',
+      timestamp: r.timestamp_created
+        ? new Date(r.timestamp_created * 1000).toISOString()
+        : '',
+      permalink: r.author?.steamid
+        ? `https://steamcommunity.com/profiles/${r.author.steamid}/recommended/${appId}/`
+        : '',
+      links: [],
+      score: r.votes_up || 0,
+      depth: 0,
+    };
+  });
+}

package/core/utils.js ADDED Viewed

@@ -0,0 +1,62 @@
+/**
+ * Extract URLs from markdown text.
+ * @param {string} markdown
+ * @returns {string[]}
+ */
+export function extractLinksFromMarkdown(markdown) {
+  const links = [];
+  const markdownLinkRegex = /\[([^\]]*)\]\(([^)]+)\)/g;
+  const rawUrlRegex = /https?:\/\/[^\s\])<>]+/g;
+  let match;
+  while ((match = markdownLinkRegex.exec(markdown)) !== null) {
+    if (match[2] && !links.includes(match[2])) {
+      links.push(match[2]);
+    }
+  }
+  while ((match = rawUrlRegex.exec(markdown)) !== null) {
+    if (!links.includes(match[0])) {
+      links.push(match[0]);
+    }
+  }
+  return links;
+}
+/**
+ * Convert HN comment HTML to plain text.
+ * @param {string} html
+ * @returns {string}
+ */
+export function hnHtmlToText(html) {
+  return (html || '')
+    .replace(/<p>/gi, '\n\n')
+    .replace(/<br\s*\/?>/gi, '\n')
+    .replace(/<[^>]+>/g, '')
+    .replace(/&amp;/g, '&')
+    .replace(/&lt;/g, '<')
+    .replace(/&gt;/g, '>')
+    .replace(/&quot;/g, '"')
+    .replace(/&#x27;/g, "'")
+    .replace(/&#x2F;/g, '/')
+    .replace(/&nbsp;/g, ' ')
+    .trim();
+}
+/**
+ * Convert Notion rich text array to plain text.
+ * Notion stores text as arrays like [["Hello", [["b"]]], [" world"]]
+ * @param {Array} richText
+ * @returns {string}
+ */
+export function notionRichTextToPlain(richText) {
+  if (!richText || !Array.isArray(richText)) return '';
+  return richText.map(segment => {
+    if (typeof segment === 'string') return segment;
+    if (Array.isArray(segment)) return segment[0] || '';
+    return '';
+  }).join('').trim();
+}

package/lib/auth.js ADDED Viewed

@@ -0,0 +1,72 @@
+import { SUPABASE_URL, SUPABASE_ANON_KEY } from '../core/config.js';
+import { getToken } from './config.js';
+/**
+ * Verify CLI token with Supabase edge function.
+ * Returns { valid, email, plan, plan_type } or { valid: false, error }.
+ */
+export async function verifyToken() {
+  const token = getToken();
+  if (!token) {
+    return { valid: false, error: 'Not logged in. Run: commentscraper login' };
+  }
+  try {
+    const res = await fetch(`${SUPABASE_URL}/functions/v1/cli-auth`, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'apikey': SUPABASE_ANON_KEY,
+      },
+      body: JSON.stringify({ action: 'verify', cli_token: token }),
+    });
+    if (!res.ok) {
+      return { valid: false, error: `Auth server error (${res.status})` };
+    }
+    return await res.json();
+  } catch (error) {
+    return { valid: false, error: `Could not reach auth server: ${error.message}` };
+  }
+}
+/**
+ * Create a device code for the login flow.
+ */
+export async function createDeviceCode() {
+  const res = await fetch(`${SUPABASE_URL}/functions/v1/cli-auth`, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'apikey': SUPABASE_ANON_KEY,
+    },
+    body: JSON.stringify({ action: 'create' }),
+  });
+  if (!res.ok) {
+    throw new Error(`Auth server error (${res.status})`);
+  }
+  return await res.json();
+}
+/**
+ * Poll for device code approval.
+ */
+export async function pollDeviceCode(deviceCode) {
+  const res = await fetch(`${SUPABASE_URL}/functions/v1/cli-auth`, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'apikey': SUPABASE_ANON_KEY,
+    },
+    body: JSON.stringify({ action: 'poll', device_code: deviceCode }),
+  });
+  if (!res.ok) {
+    throw new Error(`Auth server error (${res.status})`);
+  }
+  return await res.json();
+}

package/lib/cli-progress.js ADDED Viewed

@@ -0,0 +1,11 @@
+import { createProgressReporter } from '../core/progress.js';
+export function createConsoleProgress(quiet = false) {
+  if (quiet) {
+    return { send: () => {} };
+  }
+  return createProgressReporter((message, percent) => {
+    process.stderr.write(`\r\x1b[K${message} [${percent}%]`);
+    if (percent >= 95) process.stderr.write('\n');
+  });
+}

package/lib/config.js ADDED Viewed

@@ -0,0 +1,33 @@
+import { readFileSync, writeFileSync, mkdirSync, unlinkSync } from 'node:fs';
+import { join } from 'node:path';
+import { homedir } from 'node:os';
+const CONFIG_DIR = join(homedir(), '.commentscraper');
+const CONFIG_FILE = join(CONFIG_DIR, 'config.json');
+export function readConfig() {
+  try {
+    return JSON.parse(readFileSync(CONFIG_FILE, 'utf8'));
+  } catch {
+    return null;
+  }
+}
+export function writeConfig(data) {
+  mkdirSync(CONFIG_DIR, { recursive: true });
+  writeFileSync(CONFIG_FILE, JSON.stringify(data, null, 2) + '\n', 'utf8');
+}
+export function deleteConfig() {
+  try {
+    unlinkSync(CONFIG_FILE);
+    return true;
+  } catch {
+    return false;
+  }
+}
+export function getToken() {
+  const config = readConfig();
+  return config?.cli_token || null;
+}

package/package.json ADDED Viewed

@@ -0,0 +1,46 @@
+{
+  "name": "commentscraper",
+  "version": "1.0.0",
+  "description": "Scrape comments and reviews from Reddit, Hacker News, Steam, Product Hunt, Notion, and Reddit profiles. Built for AI agents.",
+  "type": "module",
+  "bin": {
+    "commentscraper": "./bin/commentscraper.js"
+  },
+  "files": [
+    "bin/",
+    "commands/",
+    "lib/",
+    "core/",
+    "SKILL.md",
+    "README.md"
+  ],
+  "engines": {
+    "node": ">=18"
+  },
+  "keywords": [
+    "scraper",
+    "reddit",
+    "hackernews",
+    "steam",
+    "producthunt",
+    "notion",
+    "comments",
+    "reviews",
+    "cli",
+    "agent-skills",
+    "claude-code",
+    "openclaw",
+    "openclaw-skills",
+    "agentic-ai",
+    "mcp"
+  ],
+  "scripts": {
+    "prepublish": "node prepublish.js"
+  },
+  "author": "DDTechSolution",
+  "license": "proprietary",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/daniel-ddtech/commentscraper-cli.git"
+  }
+}