intelwatch 1.2.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,161 @@
1
+ /**
2
+ * Reddit & Hacker News scrapers for brand/keyword mentions.
3
+ *
4
+ * Uses public JSON APIs (no auth required):
5
+ * - Reddit: https://www.reddit.com/search.json?q=<query>
6
+ * - HN (Algolia): https://hn.algolia.com/api/v1/search?query=<query>
7
+ */
8
+
9
+ import { fetch as fetchWithRetry } from '../utils/fetcher.js';
10
+ import { getLimits, isPro } from '../license.js';
11
+
12
+ const REDDIT_SEARCH_URL = 'https://www.reddit.com/search.json';
13
+ const HN_SEARCH_URL = 'https://hn.algolia.com/api/v1/search';
14
+
15
+ // ── Reddit ───────────────────────────────────────────────────────────────────
16
+
17
+ /**
18
+ * Search Reddit for mentions of a brand/keyword.
19
+ * @param {string} query
20
+ * @param {{ limit?: number, sort?: string, timeFilter?: string }} options
21
+ * @returns {Promise<Array<{ title: string, url: string, subreddit: string, score: number, numComments: number, author: string, createdAt: string, selftext: string, domain: string, source: string }>>}
22
+ */
23
+ export async function searchReddit(query, options = {}) {
24
+ // Pro-only: Reddit scraping requires a license
25
+ if (!isPro()) {
26
+ if (process.env.DEBUG) {
27
+ console.error('[reddit] Skipped — Pro license required');
28
+ }
29
+ return [];
30
+ }
31
+
32
+ const limits = getLimits();
33
+ const { limit = limits.redditMaxResults, sort = 'relevance', timeFilter = 'month' } = options;
34
+
35
+ try {
36
+ const params = new URLSearchParams({
37
+ q: query,
38
+ limit: String(Math.min(limit, 100)),
39
+ sort,
40
+ t: timeFilter,
41
+ type: 'link',
42
+ });
43
+
44
+ const url = `${REDDIT_SEARCH_URL}?${params}`;
45
+ const resp = await fetchWithRetry(url, {
46
+ headers: {
47
+ 'User-Agent': 'intelwatch/1.2.0 (competitive intelligence CLI)',
48
+ 'Accept': 'application/json',
49
+ },
50
+ timeout: 15000,
51
+ });
52
+
53
+ if (!resp || resp.status >= 400) {
54
+ return [];
55
+ }
56
+
57
+ const data = typeof resp.data === 'string' ? JSON.parse(resp.data) : resp.data;
58
+ const posts = data?.data?.children || [];
59
+
60
+ return posts.slice(0, limits.redditMaxResults).map(({ data: post }) => ({
61
+ title: post.title || '',
62
+ url: `https://www.reddit.com${post.permalink}`,
63
+ subreddit: post.subreddit_name_prefixed || `r/${post.subreddit}`,
64
+ score: post.score || 0,
65
+ numComments: post.num_comments || 0,
66
+ author: post.author || '[deleted]',
67
+ createdAt: new Date((post.created_utc || 0) * 1000).toISOString(),
68
+ selftext: (post.selftext || '').slice(0, 500),
69
+ domain: post.domain || '',
70
+ source: 'reddit',
71
+ }));
72
+ } catch (err) {
73
+ // Silently fail — Reddit rate-limits aggressively
74
+ if (process.env.DEBUG) {
75
+ console.error(`[reddit] Search failed: ${err.message}`);
76
+ }
77
+ return [];
78
+ }
79
+ }
80
+
81
+ // ── Hacker News ──────────────────────────────────────────────────────────────
82
+
83
+ /**
84
+ * Search Hacker News via Algolia API.
85
+ * @param {string} query
86
+ * @param {{ limit?: number, tags?: string }} options
87
+ * @returns {Promise<Array<{ title: string, url: string, hnUrl: string, points: number, numComments: number, author: string, createdAt: string, source: string }>>}
88
+ */
89
+ export async function searchHackerNews(query, options = {}) {
90
+ // Pro-only: HackerNews scraping requires a license
91
+ if (!isPro()) {
92
+ if (process.env.DEBUG) {
93
+ console.error('[hn] Skipped — Pro license required');
94
+ }
95
+ return [];
96
+ }
97
+
98
+ const limits = getLimits();
99
+ const { limit = limits.hnMaxResults, tags = 'story' } = options;
100
+
101
+ try {
102
+ const params = new URLSearchParams({
103
+ query,
104
+ tags,
105
+ hitsPerPage: String(Math.min(limit, 100)),
106
+ });
107
+
108
+ const url = `${HN_SEARCH_URL}?${params}`;
109
+ const resp = await fetchWithRetry(url, {
110
+ headers: { 'Accept': 'application/json' },
111
+ timeout: 15000,
112
+ });
113
+
114
+ if (!resp || resp.status >= 400) {
115
+ return [];
116
+ }
117
+
118
+ const data = typeof resp.data === 'string' ? JSON.parse(resp.data) : resp.data;
119
+ const hits = data?.hits || [];
120
+
121
+ return hits.slice(0, limits.hnMaxResults).map(hit => ({
122
+ title: hit.title || '',
123
+ url: hit.url || `https://news.ycombinator.com/item?id=${hit.objectID}`,
124
+ hnUrl: `https://news.ycombinator.com/item?id=${hit.objectID}`,
125
+ points: hit.points || 0,
126
+ numComments: hit.num_comments || 0,
127
+ author: hit.author || '',
128
+ createdAt: hit.created_at || '',
129
+ source: 'hackernews',
130
+ }));
131
+ } catch (err) {
132
+ if (process.env.DEBUG) {
133
+ console.error(`[hn] Search failed: ${err.message}`);
134
+ }
135
+ return [];
136
+ }
137
+ }
138
+
139
+ // ── Combined search ──────────────────────────────────────────────────────────
140
+
141
+ /**
142
+ * Search both Reddit and HN, return combined results sorted by recency.
143
+ * @param {string} query
144
+ * @param {{ redditLimit?: number, hnLimit?: number }} options
145
+ * @returns {Promise<Array<object>>}
146
+ */
147
+ export async function searchCommunities(query, options = {}) {
148
+ const [redditResults, hnResults] = await Promise.all([
149
+ searchReddit(query, { limit: options.redditLimit || 15 }),
150
+ searchHackerNews(query, { limit: options.hnLimit || 15 }),
151
+ ]);
152
+
153
+ // Merge and sort by date (most recent first)
154
+ const all = [...redditResults, ...hnResults].sort((a, b) => {
155
+ const dateA = new Date(a.createdAt).getTime() || 0;
156
+ const dateB = new Date(b.createdAt).getTime() || 0;
157
+ return dateB - dateA;
158
+ });
159
+
160
+ return all;
161
+ }
@@ -1,21 +1,84 @@
1
1
  import { scrapeNewsMentions } from '../scrapers/google-news.js';
2
+ import { searchReddit, searchHackerNews } from '../scrapers/reddit-hn.js';
3
+ import { isPro, printProUpgrade } from '../license.js';
2
4
 
3
5
  export async function runBrandCheck(tracker) {
4
6
  const { brandName } = tracker;
5
7
 
6
- const mentionData = await scrapeNewsMentions(brandName);
8
+ // Fetch from all sources in parallel
9
+ const [mentionData, redditResults, hnResults] = await Promise.all([
10
+ scrapeNewsMentions(brandName),
11
+ searchReddit(brandName, { limit: 15, timeFilter: 'month' }).catch(() => []),
12
+ searchHackerNews(brandName, { limit: 15 }).catch(() => []),
13
+ ]);
14
+
15
+ // Convert Reddit results to mention format
16
+ const redditMentions = redditResults.map(r => ({
17
+ title: r.title,
18
+ url: r.url,
19
+ domain: 'reddit.com',
20
+ category: r.subreddit,
21
+ source: 'reddit',
22
+ sentiment: scoreSentiment(r.title + ' ' + r.selftext),
23
+ score: r.score,
24
+ numComments: r.numComments,
25
+ author: r.author,
26
+ date: r.createdAt,
27
+ }));
28
+
29
+ // Convert HN results to mention format
30
+ const hnMentions = hnResults.map(r => ({
31
+ title: r.title,
32
+ url: r.hnUrl,
33
+ domain: 'news.ycombinator.com',
34
+ category: 'hackernews',
35
+ source: 'hackernews',
36
+ sentiment: 'neutral',
37
+ score: r.points,
38
+ numComments: r.numComments,
39
+ author: r.author,
40
+ date: r.createdAt,
41
+ }));
42
+
43
+ const allMentions = [...(mentionData.mentions || []), ...redditMentions, ...hnMentions];
7
44
 
8
45
  return {
9
46
  type: 'brand',
10
47
  trackerId: tracker.id,
11
48
  brandName,
12
49
  checkedAt: new Date().toISOString(),
13
- mentions: mentionData.mentions,
14
- mentionCount: mentionData.mentionCount,
50
+ mentions: allMentions,
51
+ mentionCount: allMentions.length,
52
+ sources: {
53
+ googleNews: (mentionData.mentions || []).length,
54
+ reddit: redditMentions.length,
55
+ hackerNews: hnMentions.length,
56
+ },
15
57
  error: mentionData.error || null,
58
+ tier: isPro() ? 'pro' : 'free',
16
59
  };
17
60
  }
18
61
 
62
+ /**
63
+ * Simple sentiment scorer for Reddit/HN text.
64
+ */
65
+ function scoreSentiment(text) {
66
+ if (!text) return 'neutral';
67
+ const lower = text.toLowerCase();
68
+ const positive = ['great', 'awesome', 'excellent', 'love', 'best', 'amazing', 'good', 'fantastic', 'recommend', 'impressed'];
69
+ const negative = ['bad', 'terrible', 'worst', 'hate', 'awful', 'horrible', 'scam', 'avoid', 'disappointed', 'broken', 'bug'];
70
+
71
+ let score = 0;
72
+ for (const word of positive) { if (lower.includes(word)) score++; }
73
+ for (const word of negative) { if (lower.includes(word)) score--; }
74
+
75
+ if (score >= 2) return 'positive';
76
+ if (score === 1) return 'slightly_positive';
77
+ if (score <= -2) return 'negative';
78
+ if (score === -1) return 'slightly_negative';
79
+ return 'neutral';
80
+ }
81
+
19
82
  export function diffBrandSnapshots(prev, curr) {
20
83
  const changes = [];
21
84
 
@@ -1,7 +1,7 @@
1
1
  import { analyzeSite, analyzeKeyPages } from '../scrapers/site-analyzer.js';
2
2
  import { scrapeNewsMentions } from '../scrapers/google-news.js';
3
3
  import { searchPressMentions, extractRatingsFromResults } from '../scrapers/brave-search.js';
4
- import { pappersLookup, hasPappersKey } from '../scrapers/pappers.js';
4
+ import { lookupCompany, resolveProvider } from '../providers/registry.js';
5
5
  import { diffTechStacks } from '../utils/tech-detect.js';
6
6
  import { fetch } from '../utils/fetcher.js';
7
7
  import { load } from '../utils/parser.js';
@@ -83,14 +83,11 @@ export async function runCompetitorCheck(tracker) {
83
83
  }
84
84
  } catch {}
85
85
 
86
- // --- Pappers lookup for .fr domains ---
87
- let pappers = null;
88
- const hostname = new URL(url).hostname;
89
- if (hostname.endsWith('.fr') && hasPappersKey()) {
90
- try {
91
- pappers = await pappersLookup(brandName);
92
- } catch {}
93
- }
86
+ // --- Company data lookup (adapts to TLD: Pappers for .fr, OpenCorporates for international) ---
87
+ let companyData = null;
88
+ try {
89
+ companyData = await lookupCompany(brandName, url);
90
+ } catch {}
94
91
 
95
92
  return {
96
93
  type: 'competitor',
@@ -113,7 +110,9 @@ export async function runCompetitorCheck(tracker) {
113
110
  contentStats: siteData.contentStats,
114
111
  press,
115
112
  reputation,
116
- pappers,
113
+ companyData,
114
+ // Backward compat: keep 'pappers' key if data came from Pappers
115
+ pappers: companyData?.source === 'pappers' ? companyData : (companyData || null),
117
116
  };
118
117
  }
119
118
 
@@ -47,6 +47,16 @@ export function withErrorHandling(fn) {
47
47
  * Handle and format errors appropriately
48
48
  */
49
49
  export function handleError(error, context = '') {
50
+ // Guard against null/undefined/non-object errors
51
+ if (error == null) {
52
+ console.error(chalk.red(`\n❌ Unknown error${context ? ` in ${context}` : ''}`));
53
+ return;
54
+ }
55
+ if (typeof error === 'string') {
56
+ console.error(chalk.red(`\n❌ ${error}`));
57
+ return;
58
+ }
59
+
50
60
  if (process.env.NODE_ENV === 'development' || process.env.DEBUG_ERRORS) {
51
61
  console.error(chalk.red(`\n❌ Error${context ? ` in ${context}` : ''}:`));
52
62
  console.error(error.stack || error);