intelwatch 1.2.0 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG-DRAFT.md +44 -0
- package/CHANGELOG.md +30 -0
- package/Endrix-Intelwatch-DueDil.pdf +0 -0
- package/RELEASE.md +15 -0
- package/export.pdf +0 -0
- package/package.json +3 -2
- package/profile-480254275.pdf +0 -0
- package/profile-775726417.pdf +0 -0
- package/profile-794598813.pdf +0 -0
- package/src/ai/client.js +39 -1
- package/src/commands/profile.js +58 -48
- package/src/commands/report.js +11 -13
- package/src/index.js +30 -4
- package/src/license.js +194 -0
- package/src/providers/apollo.js +172 -0
- package/src/providers/clearbit.js +136 -0
- package/src/providers/index.js +30 -0
- package/src/providers/opencorporates.js +159 -0
- package/src/providers/pappers.js +75 -0
- package/src/providers/registry.js +531 -0
- package/src/scrapers/reddit-hn.js +161 -0
- package/src/trackers/brand.js +66 -3
- package/src/trackers/competitor.js +9 -10
- package/src/utils/error-handler.js +10 -0
- package/src/utils/export.js +221 -99
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Reddit & Hacker News scrapers for brand/keyword mentions.
|
|
3
|
+
*
|
|
4
|
+
* Uses public JSON APIs (no auth required):
|
|
5
|
+
* - Reddit: https://www.reddit.com/search.json?q=<query>
|
|
6
|
+
* - HN (Algolia): https://hn.algolia.com/api/v1/search?query=<query>
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { fetch as fetchWithRetry } from '../utils/fetcher.js';
|
|
10
|
+
import { getLimits, isPro } from '../license.js';
|
|
11
|
+
|
|
12
|
+
const REDDIT_SEARCH_URL = 'https://www.reddit.com/search.json';
|
|
13
|
+
const HN_SEARCH_URL = 'https://hn.algolia.com/api/v1/search';
|
|
14
|
+
|
|
15
|
+
// ── Reddit ───────────────────────────────────────────────────────────────────
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Search Reddit for mentions of a brand/keyword.
|
|
19
|
+
* @param {string} query
|
|
20
|
+
* @param {{ limit?: number, sort?: string, timeFilter?: string }} options
|
|
21
|
+
* @returns {Promise<Array<{ title: string, url: string, subreddit: string, score: number, numComments: number, author: string, createdAt: string, selftext: string, domain: string, source: string }>>}
|
|
22
|
+
*/
|
|
23
|
+
export async function searchReddit(query, options = {}) {
|
|
24
|
+
// Pro-only: Reddit scraping requires a license
|
|
25
|
+
if (!isPro()) {
|
|
26
|
+
if (process.env.DEBUG) {
|
|
27
|
+
console.error('[reddit] Skipped — Pro license required');
|
|
28
|
+
}
|
|
29
|
+
return [];
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const limits = getLimits();
|
|
33
|
+
const { limit = limits.redditMaxResults, sort = 'relevance', timeFilter = 'month' } = options;
|
|
34
|
+
|
|
35
|
+
try {
|
|
36
|
+
const params = new URLSearchParams({
|
|
37
|
+
q: query,
|
|
38
|
+
limit: String(Math.min(limit, 100)),
|
|
39
|
+
sort,
|
|
40
|
+
t: timeFilter,
|
|
41
|
+
type: 'link',
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
const url = `${REDDIT_SEARCH_URL}?${params}`;
|
|
45
|
+
const resp = await fetchWithRetry(url, {
|
|
46
|
+
headers: {
|
|
47
|
+
'User-Agent': 'intelwatch/1.2.0 (competitive intelligence CLI)',
|
|
48
|
+
'Accept': 'application/json',
|
|
49
|
+
},
|
|
50
|
+
timeout: 15000,
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
if (!resp || resp.status >= 400) {
|
|
54
|
+
return [];
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const data = typeof resp.data === 'string' ? JSON.parse(resp.data) : resp.data;
|
|
58
|
+
const posts = data?.data?.children || [];
|
|
59
|
+
|
|
60
|
+
return posts.slice(0, limits.redditMaxResults).map(({ data: post }) => ({
|
|
61
|
+
title: post.title || '',
|
|
62
|
+
url: `https://www.reddit.com${post.permalink}`,
|
|
63
|
+
subreddit: post.subreddit_name_prefixed || `r/${post.subreddit}`,
|
|
64
|
+
score: post.score || 0,
|
|
65
|
+
numComments: post.num_comments || 0,
|
|
66
|
+
author: post.author || '[deleted]',
|
|
67
|
+
createdAt: new Date((post.created_utc || 0) * 1000).toISOString(),
|
|
68
|
+
selftext: (post.selftext || '').slice(0, 500),
|
|
69
|
+
domain: post.domain || '',
|
|
70
|
+
source: 'reddit',
|
|
71
|
+
}));
|
|
72
|
+
} catch (err) {
|
|
73
|
+
// Silently fail — Reddit rate-limits aggressively
|
|
74
|
+
if (process.env.DEBUG) {
|
|
75
|
+
console.error(`[reddit] Search failed: ${err.message}`);
|
|
76
|
+
}
|
|
77
|
+
return [];
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// ── Hacker News ──────────────────────────────────────────────────────────────
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Search Hacker News via Algolia API.
|
|
85
|
+
* @param {string} query
|
|
86
|
+
* @param {{ limit?: number, tags?: string }} options
|
|
87
|
+
* @returns {Promise<Array<{ title: string, url: string, hnUrl: string, points: number, numComments: number, author: string, createdAt: string, source: string }>>}
|
|
88
|
+
*/
|
|
89
|
+
export async function searchHackerNews(query, options = {}) {
|
|
90
|
+
// Pro-only: HackerNews scraping requires a license
|
|
91
|
+
if (!isPro()) {
|
|
92
|
+
if (process.env.DEBUG) {
|
|
93
|
+
console.error('[hn] Skipped — Pro license required');
|
|
94
|
+
}
|
|
95
|
+
return [];
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const limits = getLimits();
|
|
99
|
+
const { limit = limits.hnMaxResults, tags = 'story' } = options;
|
|
100
|
+
|
|
101
|
+
try {
|
|
102
|
+
const params = new URLSearchParams({
|
|
103
|
+
query,
|
|
104
|
+
tags,
|
|
105
|
+
hitsPerPage: String(Math.min(limit, 100)),
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
const url = `${HN_SEARCH_URL}?${params}`;
|
|
109
|
+
const resp = await fetchWithRetry(url, {
|
|
110
|
+
headers: { 'Accept': 'application/json' },
|
|
111
|
+
timeout: 15000,
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
if (!resp || resp.status >= 400) {
|
|
115
|
+
return [];
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const data = typeof resp.data === 'string' ? JSON.parse(resp.data) : resp.data;
|
|
119
|
+
const hits = data?.hits || [];
|
|
120
|
+
|
|
121
|
+
return hits.slice(0, limits.hnMaxResults).map(hit => ({
|
|
122
|
+
title: hit.title || '',
|
|
123
|
+
url: hit.url || `https://news.ycombinator.com/item?id=${hit.objectID}`,
|
|
124
|
+
hnUrl: `https://news.ycombinator.com/item?id=${hit.objectID}`,
|
|
125
|
+
points: hit.points || 0,
|
|
126
|
+
numComments: hit.num_comments || 0,
|
|
127
|
+
author: hit.author || '',
|
|
128
|
+
createdAt: hit.created_at || '',
|
|
129
|
+
source: 'hackernews',
|
|
130
|
+
}));
|
|
131
|
+
} catch (err) {
|
|
132
|
+
if (process.env.DEBUG) {
|
|
133
|
+
console.error(`[hn] Search failed: ${err.message}`);
|
|
134
|
+
}
|
|
135
|
+
return [];
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// ── Combined search ──────────────────────────────────────────────────────────
|
|
140
|
+
|
|
141
|
+
/**
|
|
142
|
+
* Search both Reddit and HN, return combined results sorted by recency.
|
|
143
|
+
* @param {string} query
|
|
144
|
+
* @param {{ redditLimit?: number, hnLimit?: number }} options
|
|
145
|
+
* @returns {Promise<Array<object>>}
|
|
146
|
+
*/
|
|
147
|
+
export async function searchCommunities(query, options = {}) {
|
|
148
|
+
const [redditResults, hnResults] = await Promise.all([
|
|
149
|
+
searchReddit(query, { limit: options.redditLimit || 15 }),
|
|
150
|
+
searchHackerNews(query, { limit: options.hnLimit || 15 }),
|
|
151
|
+
]);
|
|
152
|
+
|
|
153
|
+
// Merge and sort by date (most recent first)
|
|
154
|
+
const all = [...redditResults, ...hnResults].sort((a, b) => {
|
|
155
|
+
const dateA = new Date(a.createdAt).getTime() || 0;
|
|
156
|
+
const dateB = new Date(b.createdAt).getTime() || 0;
|
|
157
|
+
return dateB - dateA;
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
return all;
|
|
161
|
+
}
|
package/src/trackers/brand.js
CHANGED
|
@@ -1,21 +1,84 @@
|
|
|
1
1
|
import { scrapeNewsMentions } from '../scrapers/google-news.js';
|
|
2
|
+
import { searchReddit, searchHackerNews } from '../scrapers/reddit-hn.js';
|
|
3
|
+
import { isPro, printProUpgrade } from '../license.js';
|
|
2
4
|
|
|
3
5
|
export async function runBrandCheck(tracker) {
|
|
4
6
|
const { brandName } = tracker;
|
|
5
7
|
|
|
6
|
-
|
|
8
|
+
// Fetch from all sources in parallel
|
|
9
|
+
const [mentionData, redditResults, hnResults] = await Promise.all([
|
|
10
|
+
scrapeNewsMentions(brandName),
|
|
11
|
+
searchReddit(brandName, { limit: 15, timeFilter: 'month' }).catch(() => []),
|
|
12
|
+
searchHackerNews(brandName, { limit: 15 }).catch(() => []),
|
|
13
|
+
]);
|
|
14
|
+
|
|
15
|
+
// Convert Reddit results to mention format
|
|
16
|
+
const redditMentions = redditResults.map(r => ({
|
|
17
|
+
title: r.title,
|
|
18
|
+
url: r.url,
|
|
19
|
+
domain: 'reddit.com',
|
|
20
|
+
category: r.subreddit,
|
|
21
|
+
source: 'reddit',
|
|
22
|
+
sentiment: scoreSentiment(r.title + ' ' + r.selftext),
|
|
23
|
+
score: r.score,
|
|
24
|
+
numComments: r.numComments,
|
|
25
|
+
author: r.author,
|
|
26
|
+
date: r.createdAt,
|
|
27
|
+
}));
|
|
28
|
+
|
|
29
|
+
// Convert HN results to mention format
|
|
30
|
+
const hnMentions = hnResults.map(r => ({
|
|
31
|
+
title: r.title,
|
|
32
|
+
url: r.hnUrl,
|
|
33
|
+
domain: 'news.ycombinator.com',
|
|
34
|
+
category: 'hackernews',
|
|
35
|
+
source: 'hackernews',
|
|
36
|
+
sentiment: 'neutral',
|
|
37
|
+
score: r.points,
|
|
38
|
+
numComments: r.numComments,
|
|
39
|
+
author: r.author,
|
|
40
|
+
date: r.createdAt,
|
|
41
|
+
}));
|
|
42
|
+
|
|
43
|
+
const allMentions = [...(mentionData.mentions || []), ...redditMentions, ...hnMentions];
|
|
7
44
|
|
|
8
45
|
return {
|
|
9
46
|
type: 'brand',
|
|
10
47
|
trackerId: tracker.id,
|
|
11
48
|
brandName,
|
|
12
49
|
checkedAt: new Date().toISOString(),
|
|
13
|
-
mentions:
|
|
14
|
-
mentionCount:
|
|
50
|
+
mentions: allMentions,
|
|
51
|
+
mentionCount: allMentions.length,
|
|
52
|
+
sources: {
|
|
53
|
+
googleNews: (mentionData.mentions || []).length,
|
|
54
|
+
reddit: redditMentions.length,
|
|
55
|
+
hackerNews: hnMentions.length,
|
|
56
|
+
},
|
|
15
57
|
error: mentionData.error || null,
|
|
58
|
+
tier: isPro() ? 'pro' : 'free',
|
|
16
59
|
};
|
|
17
60
|
}
|
|
18
61
|
|
|
62
|
+
/**
|
|
63
|
+
* Simple sentiment scorer for Reddit/HN text.
|
|
64
|
+
*/
|
|
65
|
+
function scoreSentiment(text) {
|
|
66
|
+
if (!text) return 'neutral';
|
|
67
|
+
const lower = text.toLowerCase();
|
|
68
|
+
const positive = ['great', 'awesome', 'excellent', 'love', 'best', 'amazing', 'good', 'fantastic', 'recommend', 'impressed'];
|
|
69
|
+
const negative = ['bad', 'terrible', 'worst', 'hate', 'awful', 'horrible', 'scam', 'avoid', 'disappointed', 'broken', 'bug'];
|
|
70
|
+
|
|
71
|
+
let score = 0;
|
|
72
|
+
for (const word of positive) { if (lower.includes(word)) score++; }
|
|
73
|
+
for (const word of negative) { if (lower.includes(word)) score--; }
|
|
74
|
+
|
|
75
|
+
if (score >= 2) return 'positive';
|
|
76
|
+
if (score === 1) return 'slightly_positive';
|
|
77
|
+
if (score <= -2) return 'negative';
|
|
78
|
+
if (score === -1) return 'slightly_negative';
|
|
79
|
+
return 'neutral';
|
|
80
|
+
}
|
|
81
|
+
|
|
19
82
|
export function diffBrandSnapshots(prev, curr) {
|
|
20
83
|
const changes = [];
|
|
21
84
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { analyzeSite, analyzeKeyPages } from '../scrapers/site-analyzer.js';
|
|
2
2
|
import { scrapeNewsMentions } from '../scrapers/google-news.js';
|
|
3
3
|
import { searchPressMentions, extractRatingsFromResults } from '../scrapers/brave-search.js';
|
|
4
|
-
import {
|
|
4
|
+
import { lookupCompany, resolveProvider } from '../providers/registry.js';
|
|
5
5
|
import { diffTechStacks } from '../utils/tech-detect.js';
|
|
6
6
|
import { fetch } from '../utils/fetcher.js';
|
|
7
7
|
import { load } from '../utils/parser.js';
|
|
@@ -83,14 +83,11 @@ export async function runCompetitorCheck(tracker) {
|
|
|
83
83
|
}
|
|
84
84
|
} catch {}
|
|
85
85
|
|
|
86
|
-
// ---
|
|
87
|
-
let
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
pappers = await pappersLookup(brandName);
|
|
92
|
-
} catch {}
|
|
93
|
-
}
|
|
86
|
+
// --- Company data lookup (adapts to TLD: Pappers for .fr, OpenCorporates for international) ---
|
|
87
|
+
let companyData = null;
|
|
88
|
+
try {
|
|
89
|
+
companyData = await lookupCompany(brandName, url);
|
|
90
|
+
} catch {}
|
|
94
91
|
|
|
95
92
|
return {
|
|
96
93
|
type: 'competitor',
|
|
@@ -113,7 +110,9 @@ export async function runCompetitorCheck(tracker) {
|
|
|
113
110
|
contentStats: siteData.contentStats,
|
|
114
111
|
press,
|
|
115
112
|
reputation,
|
|
116
|
-
|
|
113
|
+
companyData,
|
|
114
|
+
// Backward compat: keep 'pappers' key if data came from Pappers
|
|
115
|
+
pappers: companyData?.source === 'pappers' ? companyData : (companyData || null),
|
|
117
116
|
};
|
|
118
117
|
}
|
|
119
118
|
|
|
@@ -47,6 +47,16 @@ export function withErrorHandling(fn) {
|
|
|
47
47
|
* Handle and format errors appropriately
|
|
48
48
|
*/
|
|
49
49
|
export function handleError(error, context = '') {
|
|
50
|
+
// Guard against null/undefined/non-object errors
|
|
51
|
+
if (error == null) {
|
|
52
|
+
console.error(chalk.red(`\n❌ Unknown error${context ? ` in ${context}` : ''}`));
|
|
53
|
+
return;
|
|
54
|
+
}
|
|
55
|
+
if (typeof error === 'string') {
|
|
56
|
+
console.error(chalk.red(`\n❌ ${error}`));
|
|
57
|
+
return;
|
|
58
|
+
}
|
|
59
|
+
|
|
50
60
|
if (process.env.NODE_ENV === 'development' || process.env.DEBUG_ERRORS) {
|
|
51
61
|
console.error(chalk.red(`\n❌ Error${context ? ` in ${context}` : ''}:`));
|
|
52
62
|
console.error(error.stack || error);
|