intelwatch 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +39 -0
- package/README.md +175 -0
- package/bin/intelwatch.js +8 -0
- package/package.json +43 -0
- package/src/ai/client.js +130 -0
- package/src/commands/ai-summary.js +147 -0
- package/src/commands/check.js +267 -0
- package/src/commands/compare.js +124 -0
- package/src/commands/diff.js +118 -0
- package/src/commands/digest.js +156 -0
- package/src/commands/discover.js +301 -0
- package/src/commands/history.js +60 -0
- package/src/commands/list.js +43 -0
- package/src/commands/notify.js +121 -0
- package/src/commands/pitch.js +156 -0
- package/src/commands/report.js +82 -0
- package/src/commands/track.js +94 -0
- package/src/config.js +65 -0
- package/src/index.js +182 -0
- package/src/report/html.js +499 -0
- package/src/report/json.js +44 -0
- package/src/report/markdown.js +156 -0
- package/src/scrapers/brave-search.js +268 -0
- package/src/scrapers/google-news.js +111 -0
- package/src/scrapers/google.js +113 -0
- package/src/scrapers/pappers.js +119 -0
- package/src/scrapers/site-analyzer.js +252 -0
- package/src/storage.js +168 -0
- package/src/trackers/brand.js +76 -0
- package/src/trackers/competitor.js +268 -0
- package/src/trackers/keyword.js +121 -0
- package/src/trackers/person.js +132 -0
- package/src/utils/display.js +102 -0
- package/src/utils/fetcher.js +82 -0
- package/src/utils/parser.js +110 -0
- package/src/utils/sentiment.js +95 -0
- package/src/utils/tech-detect.js +94 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
export function generateMarkdownReport(data) {
|
|
2
|
+
const lines = [];
|
|
3
|
+
const date = new Date(data.generatedAt).toLocaleString();
|
|
4
|
+
|
|
5
|
+
lines.push('# IntelWatch Intelligence Report');
|
|
6
|
+
lines.push(`\n_Generated: ${date}_\n`);
|
|
7
|
+
lines.push('---\n');
|
|
8
|
+
|
|
9
|
+
// Summary
|
|
10
|
+
const totalChanges =
|
|
11
|
+
data.competitors.reduce((s, c) => s + c.changes.length, 0) +
|
|
12
|
+
data.keywords.reduce((s, k) => s + k.changes.length, 0) +
|
|
13
|
+
data.brands.reduce((s, b) => s + b.changes.length, 0);
|
|
14
|
+
|
|
15
|
+
lines.push('## Summary\n');
|
|
16
|
+
lines.push(`| Category | Tracked | Changes |`);
|
|
17
|
+
lines.push(`|----------|---------|---------|`);
|
|
18
|
+
lines.push(`| Competitors | ${data.competitors.length} | ${data.competitors.reduce((s, c) => s + c.changes.length, 0)} |`);
|
|
19
|
+
lines.push(`| Keywords | ${data.keywords.length} | ${data.keywords.reduce((s, k) => s + k.changes.length, 0)} |`);
|
|
20
|
+
lines.push(`| Brands | ${data.brands.length} | ${data.brands.reduce((s, b) => s + b.changes.length, 0)} |`);
|
|
21
|
+
lines.push(`| **Total** | **${data.competitors.length + data.keywords.length + data.brands.length}** | **${totalChanges}** |`);
|
|
22
|
+
lines.push('');
|
|
23
|
+
|
|
24
|
+
// Competitor section
|
|
25
|
+
if (data.competitors.length > 0) {
|
|
26
|
+
lines.push('## Competitors\n');
|
|
27
|
+
|
|
28
|
+
// Threat level table
|
|
29
|
+
const sorted = [...data.competitors].sort((a, b) => b.threatScore - a.threatScore);
|
|
30
|
+
lines.push('### Threat Levels\n');
|
|
31
|
+
lines.push('| Competitor | Threat | Pages | Tech | Jobs | Changes |');
|
|
32
|
+
lines.push('|------------|--------|-------|------|------|---------|');
|
|
33
|
+
for (const { tracker, snapshot, changes, threatScore } of sorted) {
|
|
34
|
+
const threat = threatScore >= 8 ? '🔴 HIGH' : threatScore >= 4 ? '🟡 MED' : '🟢 LOW';
|
|
35
|
+
lines.push(
|
|
36
|
+
`| [${tracker.name || tracker.url}](${tracker.url}) | ${threat} (${threatScore}/10) | ${snapshot.pageCount || 0} | ${(snapshot.techStack || []).length} | ${snapshot.jobs?.estimatedOpenings || '?'} | ${changes.length} |`
|
|
37
|
+
);
|
|
38
|
+
}
|
|
39
|
+
lines.push('');
|
|
40
|
+
|
|
41
|
+
for (const { tracker, snapshot, changes, threatScore } of data.competitors) {
|
|
42
|
+
lines.push(`### ${tracker.name || tracker.url}`);
|
|
43
|
+
lines.push(`\n- **URL:** ${tracker.url}`);
|
|
44
|
+
lines.push(`- **Last checked:** ${new Date(snapshot.checkedAt).toLocaleString()}`);
|
|
45
|
+
lines.push(`- **Pages:** ${snapshot.pageCount || 0}`);
|
|
46
|
+
lines.push(`- **Threat score:** ${threatScore}/10`);
|
|
47
|
+
|
|
48
|
+
if (snapshot.techStack?.length > 0) {
|
|
49
|
+
const byCategory = {};
|
|
50
|
+
for (const t of snapshot.techStack) {
|
|
51
|
+
(byCategory[t.category] = byCategory[t.category] || []).push(t.name);
|
|
52
|
+
}
|
|
53
|
+
lines.push(`\n**Tech Stack:**`);
|
|
54
|
+
for (const [cat, names] of Object.entries(byCategory)) {
|
|
55
|
+
lines.push(`- ${cat}: ${names.join(', ')}`);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
if (snapshot.pricing?.prices?.length > 0) {
|
|
60
|
+
lines.push(`\n**Pricing:** ${snapshot.pricing.prices.slice(0, 5).join(' | ')}`);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if (snapshot.jobs) {
|
|
64
|
+
lines.push(`\n**Jobs:** ~${snapshot.jobs.estimatedOpenings} open positions (${snapshot.jobs.url})`);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const metaPage = snapshot.keyPages?.['/'];
|
|
68
|
+
if (metaPage?.title) {
|
|
69
|
+
lines.push(`\n**Homepage title:** ${metaPage.title}`);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
if (changes.length > 0) {
|
|
73
|
+
lines.push(`\n**Changes (${changes.length}):**`);
|
|
74
|
+
for (const c of changes) {
|
|
75
|
+
const emoji = c.type === 'new' ? '🟢' : c.type === 'removed' ? '🔴' : '🟡';
|
|
76
|
+
lines.push(`- ${emoji} [${c.field}] ${c.value}`);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
lines.push('');
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Keyword section
|
|
85
|
+
if (data.keywords.length > 0) {
|
|
86
|
+
lines.push('## Keyword Rankings\n');
|
|
87
|
+
|
|
88
|
+
for (const { tracker, snapshot, changes } of data.keywords) {
|
|
89
|
+
lines.push(`### "${tracker.keyword}"`);
|
|
90
|
+
lines.push(`\n_Checked: ${new Date(snapshot.checkedAt).toLocaleString()}_\n`);
|
|
91
|
+
|
|
92
|
+
if (snapshot.results?.length > 0) {
|
|
93
|
+
lines.push('| # | Domain | Title |');
|
|
94
|
+
lines.push('|---|--------|-------|');
|
|
95
|
+
for (const r of snapshot.results.slice(0, 10)) {
|
|
96
|
+
const star = r.isFeaturedSnippet ? ' ⭐' : '';
|
|
97
|
+
lines.push(`| ${r.position} | ${r.domain}${star} | ${(r.title || '').slice(0, 60)} |`);
|
|
98
|
+
}
|
|
99
|
+
lines.push('');
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (changes.length > 0) {
|
|
103
|
+
lines.push('**Changes:**');
|
|
104
|
+
for (const c of changes) {
|
|
105
|
+
const emoji = c.type === 'new' ? '🟢' : c.type === 'removed' ? '🔴' : '🟡';
|
|
106
|
+
lines.push(`- ${emoji} ${c.value}`);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
lines.push('');
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Brand section
|
|
115
|
+
if (data.brands.length > 0) {
|
|
116
|
+
lines.push('## Brand Mentions\n');
|
|
117
|
+
|
|
118
|
+
for (const { tracker, snapshot, changes } of data.brands) {
|
|
119
|
+
lines.push(`### "${tracker.brandName}"`);
|
|
120
|
+
lines.push(`\n_${snapshot.mentionCount || 0} mentions found — checked: ${new Date(snapshot.checkedAt).toLocaleString()}_\n`);
|
|
121
|
+
|
|
122
|
+
const mentions = snapshot.mentions || [];
|
|
123
|
+
const negative = mentions.filter(m => m.sentiment === 'negative' || m.sentiment === 'slightly_negative');
|
|
124
|
+
|
|
125
|
+
if (negative.length > 0) {
|
|
126
|
+
lines.push(`⚠️ **${negative.length} negative mention(s) detected:**`);
|
|
127
|
+
for (const m of negative.slice(0, 3)) {
|
|
128
|
+
lines.push(`- [${m.title?.slice(0, 60)}](${m.url}) — ${m.domain}`);
|
|
129
|
+
}
|
|
130
|
+
lines.push('');
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
if (mentions.length > 0) {
|
|
134
|
+
lines.push('**Recent mentions:**');
|
|
135
|
+
for (const m of mentions.slice(0, 5)) {
|
|
136
|
+
const sentEmoji = m.sentiment === 'positive' ? '😊' : m.sentiment === 'negative' ? '😞' : '😐';
|
|
137
|
+
lines.push(`- ${sentEmoji} [${(m.title || m.url).slice(0, 80)}](${m.url}) [${m.category}]`);
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
if (changes.length > 0) {
|
|
142
|
+
lines.push('\n**New mentions:**');
|
|
143
|
+
for (const c of changes.filter(c => c.field === 'mention')) {
|
|
144
|
+
lines.push(`- 🟢 ${c.value}`);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
lines.push('');
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
lines.push('---');
|
|
153
|
+
lines.push('_Generated by [intelwatch](https://github.com/intelwatch/intelwatch) — competitive intelligence from the terminal_');
|
|
154
|
+
|
|
155
|
+
return lines.join('\n');
|
|
156
|
+
}
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import { analyzeSentiment, categorizeMention } from '../utils/sentiment.js';
|
|
3
|
+
|
|
4
|
+
const BRAVE_API = 'https://api.search.brave.com/res/v1';
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Search via Brave Search API — reliable, no rate limiting issues.
|
|
8
|
+
* Uses BRAVE_API_KEY env var or falls back to config.
|
|
9
|
+
*/
|
|
10
|
+
function getApiKey() {
|
|
11
|
+
return process.env.BRAVE_API_KEY || process.env.BRAVE_SEARCH_API_KEY || null;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Web search via Brave
|
|
16
|
+
*/
|
|
17
|
+
export async function braveWebSearch(query, options = {}) {
|
|
18
|
+
const apiKey = getApiKey();
|
|
19
|
+
if (!apiKey) return { results: [], error: 'No BRAVE_API_KEY set' };
|
|
20
|
+
|
|
21
|
+
try {
|
|
22
|
+
const params = {
|
|
23
|
+
q: query,
|
|
24
|
+
count: options.count || 20,
|
|
25
|
+
country: options.country || 'FR',
|
|
26
|
+
search_lang: options.lang || 'fr',
|
|
27
|
+
freshness: options.freshness || undefined, // 'pd' (day), 'pw' (week), 'pm' (month)
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
const resp = await axios.get(`${BRAVE_API}/web/search`, {
|
|
31
|
+
headers: { 'X-Subscription-Token': apiKey, 'Accept': 'application/json' },
|
|
32
|
+
params,
|
|
33
|
+
timeout: 15000,
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
const results = (resp.data.web?.results || []).map(r => ({
|
|
37
|
+
title: r.title,
|
|
38
|
+
url: r.url,
|
|
39
|
+
domain: r.meta_url?.hostname?.replace('www.', '') || new URL(r.url).hostname.replace('www.', ''),
|
|
40
|
+
snippet: r.description || '',
|
|
41
|
+
age: r.age || null,
|
|
42
|
+
}));
|
|
43
|
+
|
|
44
|
+
return { results, error: null };
|
|
45
|
+
} catch (err) {
|
|
46
|
+
return { results: [], error: err.message };
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* News search via Brave
|
|
52
|
+
*/
|
|
53
|
+
export async function braveNewsSearch(query, options = {}) {
|
|
54
|
+
const apiKey = getApiKey();
|
|
55
|
+
if (!apiKey) return { results: [], error: 'No BRAVE_API_KEY set' };
|
|
56
|
+
|
|
57
|
+
try {
|
|
58
|
+
const params = {
|
|
59
|
+
q: query,
|
|
60
|
+
count: options.count || 20,
|
|
61
|
+
country: options.country || 'FR',
|
|
62
|
+
search_lang: options.lang || 'fr',
|
|
63
|
+
freshness: options.freshness || 'pm', // last month by default
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
const resp = await axios.get(`${BRAVE_API}/news/search`, {
|
|
67
|
+
headers: { 'X-Subscription-Token': apiKey, 'Accept': 'application/json' },
|
|
68
|
+
params,
|
|
69
|
+
timeout: 15000,
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
const results = (resp.data.results || []).map(r => ({
|
|
73
|
+
title: r.title,
|
|
74
|
+
url: r.url,
|
|
75
|
+
domain: r.meta_url?.hostname?.replace('www.', '') || '',
|
|
76
|
+
snippet: r.description || '',
|
|
77
|
+
age: r.age || null,
|
|
78
|
+
source: r.meta_url?.hostname || '',
|
|
79
|
+
}));
|
|
80
|
+
|
|
81
|
+
return { results, error: null };
|
|
82
|
+
} catch (err) {
|
|
83
|
+
return { results: [], error: err.message };
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Full press & mentions search for a brand/company.
|
|
89
|
+
* Combines news + web results, analyzes sentiment, categorizes.
|
|
90
|
+
*/
|
|
91
|
+
export async function searchPressMentions(brandName, options = {}) {
|
|
92
|
+
const mentions = [];
|
|
93
|
+
|
|
94
|
+
// 1. News search
|
|
95
|
+
const news = await braveNewsSearch(brandName, { freshness: 'pm', ...options });
|
|
96
|
+
for (const r of news.results) {
|
|
97
|
+
const sentiment = analyzeSentiment(r.title + ' ' + r.snippet);
|
|
98
|
+
mentions.push({
|
|
99
|
+
source: 'news',
|
|
100
|
+
url: r.url,
|
|
101
|
+
domain: r.domain || r.source,
|
|
102
|
+
title: r.title,
|
|
103
|
+
snippet: r.snippet?.substring(0, 300),
|
|
104
|
+
age: r.age,
|
|
105
|
+
sentiment: sentiment.label,
|
|
106
|
+
sentimentScore: sentiment.score,
|
|
107
|
+
category: categorizeMention(r.url, r.title, r.snippet),
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// 2. Web search for recent mentions
|
|
112
|
+
await new Promise(r => setTimeout(r, 500));
|
|
113
|
+
const web = await braveWebSearch(`"${brandName}" avis OR actualité OR news`, { freshness: 'pw', ...options });
|
|
114
|
+
for (const r of web.results) {
|
|
115
|
+
if (mentions.some(m => m.url === r.url)) continue; // dedupe
|
|
116
|
+
const sentiment = analyzeSentiment(r.title + ' ' + r.snippet);
|
|
117
|
+
mentions.push({
|
|
118
|
+
source: 'web',
|
|
119
|
+
url: r.url,
|
|
120
|
+
domain: r.domain,
|
|
121
|
+
title: r.title,
|
|
122
|
+
snippet: r.snippet?.substring(0, 300),
|
|
123
|
+
age: r.age,
|
|
124
|
+
sentiment: sentiment.label,
|
|
125
|
+
sentimentScore: sentiment.score,
|
|
126
|
+
category: categorizeMention(r.url, r.title, r.snippet),
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// 3. Search for reviews specifically
|
|
131
|
+
await new Promise(r => setTimeout(r, 500));
|
|
132
|
+
const reviews = await braveWebSearch(`"${brandName}" avis clients trustpilot`, { count: 10, ...options });
|
|
133
|
+
for (const r of reviews.results) {
|
|
134
|
+
if (mentions.some(m => m.url === r.url)) continue;
|
|
135
|
+
const sentiment = analyzeSentiment(r.title + ' ' + r.snippet);
|
|
136
|
+
if (/trustpilot|avis|review|capterra|g2\.com|glassdoor/.test(r.url + r.title)) {
|
|
137
|
+
mentions.push({
|
|
138
|
+
source: 'review',
|
|
139
|
+
url: r.url,
|
|
140
|
+
domain: r.domain,
|
|
141
|
+
title: r.title,
|
|
142
|
+
snippet: r.snippet?.substring(0, 300),
|
|
143
|
+
age: r.age,
|
|
144
|
+
sentiment: sentiment.label,
|
|
145
|
+
sentimentScore: sentiment.score,
|
|
146
|
+
category: 'review',
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
return {
|
|
152
|
+
brandName,
|
|
153
|
+
checkedAt: new Date().toISOString(),
|
|
154
|
+
mentions,
|
|
155
|
+
mentionCount: mentions.length,
|
|
156
|
+
error: news.error || web.error || null,
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Search SERP rankings for a keyword
|
|
162
|
+
*/
|
|
163
|
+
export async function searchKeywordRankings(keyword, options = {}) {
|
|
164
|
+
const search = await braveWebSearch(keyword, { count: 20, ...options });
|
|
165
|
+
|
|
166
|
+
return search.results.map((r, i) => ({
|
|
167
|
+
position: i + 1,
|
|
168
|
+
url: r.url,
|
|
169
|
+
domain: r.domain,
|
|
170
|
+
title: r.title,
|
|
171
|
+
snippet: r.snippet,
|
|
172
|
+
}));
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Social media search via Brave — filters by platform.
|
|
177
|
+
* platforms: array of 'twitter', 'reddit', 'linkedin'
|
|
178
|
+
*/
|
|
179
|
+
export async function searchSocial(query, platforms = ['twitter', 'reddit', 'linkedin'], options = {}) {
|
|
180
|
+
const apiKey = getApiKey();
|
|
181
|
+
if (!apiKey) return { results: [], byPlatform: {}, error: 'No BRAVE_API_KEY set' };
|
|
182
|
+
|
|
183
|
+
const siteFilters = {
|
|
184
|
+
twitter: 'site:x.com OR site:twitter.com',
|
|
185
|
+
reddit: 'site:reddit.com',
|
|
186
|
+
linkedin: 'site:linkedin.com',
|
|
187
|
+
};
|
|
188
|
+
|
|
189
|
+
const siteQuery = platforms
|
|
190
|
+
.map(p => siteFilters[p])
|
|
191
|
+
.filter(Boolean)
|
|
192
|
+
.join(' OR ');
|
|
193
|
+
|
|
194
|
+
const fullQuery = `${query} (${siteQuery})`;
|
|
195
|
+
|
|
196
|
+
const search = await braveWebSearch(fullQuery, { count: options.count || 15, ...options });
|
|
197
|
+
|
|
198
|
+
const results = (search.results || []).map(r => {
|
|
199
|
+
let platform = 'other';
|
|
200
|
+
const urlLower = r.url.toLowerCase();
|
|
201
|
+
if (urlLower.includes('x.com') || urlLower.includes('twitter.com')) platform = 'twitter';
|
|
202
|
+
else if (urlLower.includes('reddit.com')) platform = 'reddit';
|
|
203
|
+
else if (urlLower.includes('linkedin.com')) platform = 'linkedin';
|
|
204
|
+
return { ...r, platform };
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
const byPlatform = {};
|
|
208
|
+
for (const r of results) {
|
|
209
|
+
if (!byPlatform[r.platform]) byPlatform[r.platform] = [];
|
|
210
|
+
byPlatform[r.platform].push(r);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
return { results, byPlatform, error: search.error };
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
/**
|
|
217
|
+
* Extract review ratings from search snippets
|
|
218
|
+
*/
|
|
219
|
+
export function extractRatingsFromResults(results) {
|
|
220
|
+
const platforms = [];
|
|
221
|
+
|
|
222
|
+
for (const r of results) {
|
|
223
|
+
const text = `${r.title} ${r.snippet}`.toLowerCase();
|
|
224
|
+
|
|
225
|
+
// Trustpilot pattern
|
|
226
|
+
if (/trustpilot/.test(r.url) || /trustpilot/.test(text)) {
|
|
227
|
+
const ratingMatch = text.match(/(\d[.,]\d)\s*(?:\/\s*5|sur\s*5|out of 5|stars?|étoiles?)/);
|
|
228
|
+
const countMatch = text.match(/([\d\s,.]+)\s*(?:avis|reviews?|évaluations?)/);
|
|
229
|
+
if (ratingMatch || countMatch) {
|
|
230
|
+
platforms.push({
|
|
231
|
+
name: 'Trustpilot',
|
|
232
|
+
url: r.url,
|
|
233
|
+
rating: ratingMatch ? parseFloat(ratingMatch[1].replace(',', '.')) : null,
|
|
234
|
+
reviewCount: countMatch ? countMatch[1].replace(/\s/g, '').replace(',', '') : null,
|
|
235
|
+
});
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// Google reviews pattern
|
|
240
|
+
if (/google/.test(text) && /avis|review/.test(text)) {
|
|
241
|
+
const ratingMatch = text.match(/(\d[.,]\d)\s*(?:\/\s*5|sur\s*5|stars?|étoiles?)/);
|
|
242
|
+
const countMatch = text.match(/([\d\s,.]+)\s*(?:avis|reviews?|évaluations?)/);
|
|
243
|
+
if (ratingMatch) {
|
|
244
|
+
platforms.push({
|
|
245
|
+
name: 'Google',
|
|
246
|
+
url: r.url,
|
|
247
|
+
rating: parseFloat(ratingMatch[1].replace(',', '.')),
|
|
248
|
+
reviewCount: countMatch ? countMatch[1].replace(/\s/g, '') : null,
|
|
249
|
+
});
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// Glassdoor (employer reputation)
|
|
254
|
+
if (/glassdoor/.test(r.url)) {
|
|
255
|
+
const ratingMatch = text.match(/(\d[.,]\d)\s*(?:\/\s*5|sur\s*5|stars?)/);
|
|
256
|
+
if (ratingMatch) {
|
|
257
|
+
platforms.push({
|
|
258
|
+
name: 'Glassdoor',
|
|
259
|
+
url: r.url,
|
|
260
|
+
rating: parseFloat(ratingMatch[1].replace(',', '.')),
|
|
261
|
+
reviewCount: null,
|
|
262
|
+
});
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
return platforms;
|
|
268
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import { fetch } from '../utils/fetcher.js';
|
|
2
|
+
import { load } from '../utils/parser.js';
|
|
3
|
+
import { analyzeSentiment, categorizeMention } from '../utils/sentiment.js';
|
|
4
|
+
|
|
5
|
+
const GOOGLE_NEWS_BASE = 'https://www.google.com/search';
|
|
6
|
+
const GOOGLE_SEARCH_BASE = 'https://www.google.com/search';
|
|
7
|
+
|
|
8
|
+
export async function scrapeNewsMentions(brandName, options = {}) {
|
|
9
|
+
const mentions = [];
|
|
10
|
+
|
|
11
|
+
// Search Google News
|
|
12
|
+
try {
|
|
13
|
+
const newsParams = new URLSearchParams({
|
|
14
|
+
q: brandName,
|
|
15
|
+
tbm: 'nws',
|
|
16
|
+
num: '20',
|
|
17
|
+
hl: 'en',
|
|
18
|
+
});
|
|
19
|
+
const newsUrl = `${GOOGLE_NEWS_BASE}?${newsParams}`;
|
|
20
|
+
const response = await fetch(newsUrl, { retries: 3, delay: 2000 });
|
|
21
|
+
|
|
22
|
+
if (response.status === 200) {
|
|
23
|
+
const $ = load(response.data);
|
|
24
|
+
|
|
25
|
+
$('a[href]').each((_, el) => {
|
|
26
|
+
const href = $(el).attr('href') || '';
|
|
27
|
+
if (!href.startsWith('http') || href.includes('google.com')) return;
|
|
28
|
+
|
|
29
|
+
const title = $(el).text().trim();
|
|
30
|
+
if (title.length < 10) return;
|
|
31
|
+
|
|
32
|
+
try {
|
|
33
|
+
const url = new URL(href);
|
|
34
|
+
const domain = url.hostname.replace('www.', '');
|
|
35
|
+
const snippet = $(el).parent().text().replace(title, '').trim().slice(0, 200);
|
|
36
|
+
const sentiment = analyzeSentiment(title + ' ' + snippet);
|
|
37
|
+
const category = categorizeMention(href, title, snippet);
|
|
38
|
+
|
|
39
|
+
mentions.push({
|
|
40
|
+
source: 'google_news',
|
|
41
|
+
url: href,
|
|
42
|
+
domain,
|
|
43
|
+
title: title.slice(0, 200),
|
|
44
|
+
snippet: snippet.slice(0, 300),
|
|
45
|
+
sentiment: sentiment.label,
|
|
46
|
+
sentimentScore: sentiment.score,
|
|
47
|
+
category,
|
|
48
|
+
foundAt: new Date().toISOString(),
|
|
49
|
+
});
|
|
50
|
+
} catch {}
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
} catch (err) {
|
|
54
|
+
// Graceful degradation
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Also search recent web results (last 24h)
|
|
58
|
+
try {
|
|
59
|
+
await new Promise(r => setTimeout(r, 2000));
|
|
60
|
+
const webParams = new URLSearchParams({
|
|
61
|
+
q: brandName,
|
|
62
|
+
tbs: 'qdr:d',
|
|
63
|
+
num: '20',
|
|
64
|
+
hl: 'en',
|
|
65
|
+
});
|
|
66
|
+
const webUrl = `${GOOGLE_SEARCH_BASE}?${webParams}`;
|
|
67
|
+
const response = await fetch(webUrl, { retries: 3, delay: 2000 });
|
|
68
|
+
|
|
69
|
+
if (response.status === 200) {
|
|
70
|
+
const $ = load(response.data);
|
|
71
|
+
|
|
72
|
+
$('a[href]').each((_, el) => {
|
|
73
|
+
const href = $(el).attr('href') || '';
|
|
74
|
+
if (!href.startsWith('http') || href.includes('google.com')) return;
|
|
75
|
+
|
|
76
|
+
const title = $(el).find('h3').text().trim() || $(el).text().trim().slice(0, 100);
|
|
77
|
+
if (title.length < 10) return;
|
|
78
|
+
|
|
79
|
+
// Skip duplicates
|
|
80
|
+
if (mentions.some(m => m.url === href)) return;
|
|
81
|
+
|
|
82
|
+
try {
|
|
83
|
+
const url = new URL(href);
|
|
84
|
+
const domain = url.hostname.replace('www.', '');
|
|
85
|
+
const snippet = $(el).parent().text().replace(title, '').trim().slice(0, 200);
|
|
86
|
+
const sentiment = analyzeSentiment(title + ' ' + snippet);
|
|
87
|
+
const category = categorizeMention(href, title, snippet);
|
|
88
|
+
|
|
89
|
+
mentions.push({
|
|
90
|
+
source: 'google_web_24h',
|
|
91
|
+
url: href,
|
|
92
|
+
domain,
|
|
93
|
+
title: title.slice(0, 200),
|
|
94
|
+
snippet: snippet.slice(0, 300),
|
|
95
|
+
sentiment: sentiment.label,
|
|
96
|
+
sentimentScore: sentiment.score,
|
|
97
|
+
category,
|
|
98
|
+
foundAt: new Date().toISOString(),
|
|
99
|
+
});
|
|
100
|
+
} catch {}
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
} catch {}
|
|
104
|
+
|
|
105
|
+
return {
|
|
106
|
+
brandName,
|
|
107
|
+
checkedAt: new Date().toISOString(),
|
|
108
|
+
mentions: mentions.slice(0, 40),
|
|
109
|
+
mentionCount: mentions.length,
|
|
110
|
+
};
|
|
111
|
+
}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import { fetch } from '../utils/fetcher.js';
|
|
2
|
+
import { load } from '../utils/parser.js';
|
|
3
|
+
|
|
4
|
+
const GOOGLE_BASE = 'https://www.google.com/search';
|
|
5
|
+
|
|
6
|
+
export async function scrapeSerp(keyword, options = {}) {
|
|
7
|
+
const { num = 20, lang = 'en' } = options;
|
|
8
|
+
|
|
9
|
+
const params = new URLSearchParams({
|
|
10
|
+
q: keyword,
|
|
11
|
+
num: String(num),
|
|
12
|
+
hl: lang,
|
|
13
|
+
gl: 'us',
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
const url = `${GOOGLE_BASE}?${params}`;
|
|
17
|
+
|
|
18
|
+
let response;
|
|
19
|
+
try {
|
|
20
|
+
response = await fetch(url, {
|
|
21
|
+
retries: 3,
|
|
22
|
+
delay: 2000,
|
|
23
|
+
headers: {
|
|
24
|
+
'Accept': 'text/html,application/xhtml+xml',
|
|
25
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
26
|
+
'Referer': 'https://www.google.com/',
|
|
27
|
+
},
|
|
28
|
+
});
|
|
29
|
+
} catch (err) {
|
|
30
|
+
throw new Error(`Google SERP fetch failed: ${err.message}`);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
if (response.status === 429) {
|
|
34
|
+
throw new Error('Google rate limited (429). Try again later.');
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
if (response.status !== 200) {
|
|
38
|
+
throw new Error(`Google returned status ${response.status}`);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const $ = load(response.data);
|
|
42
|
+
const results = [];
|
|
43
|
+
|
|
44
|
+
// Parse organic results
|
|
45
|
+
$('div.g, div[data-sokoban-container]').each((i, el) => {
|
|
46
|
+
const titleEl = $(el).find('h3').first();
|
|
47
|
+
const linkEl = $(el).find('a[href]').first();
|
|
48
|
+
const snippetEl = $(el).find('[data-sncf], .VwiC3b, .s3v9rd, span[data-ved]').first();
|
|
49
|
+
|
|
50
|
+
const title = titleEl.text().trim();
|
|
51
|
+
const href = linkEl.attr('href') || '';
|
|
52
|
+
const snippet = snippetEl.text().trim();
|
|
53
|
+
|
|
54
|
+
if (!title || !href) return;
|
|
55
|
+
|
|
56
|
+
let cleanUrl = href;
|
|
57
|
+
if (href.startsWith('/url?')) {
|
|
58
|
+
const parsed = new URLSearchParams(href.slice(5));
|
|
59
|
+
cleanUrl = parsed.get('q') || href;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
try {
|
|
63
|
+
const parsedUrl = new URL(cleanUrl);
|
|
64
|
+
results.push({
|
|
65
|
+
position: results.length + 1,
|
|
66
|
+
title,
|
|
67
|
+
url: cleanUrl,
|
|
68
|
+
domain: parsedUrl.hostname.replace('www.', ''),
|
|
69
|
+
snippet,
|
|
70
|
+
isFeaturedSnippet: false,
|
|
71
|
+
});
|
|
72
|
+
} catch {}
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
// Try alternate selectors if main ones didn't work
|
|
76
|
+
if (results.length === 0) {
|
|
77
|
+
$('a[href]').each((i, el) => {
|
|
78
|
+
const href = $(el).attr('href') || '';
|
|
79
|
+
if (!href.startsWith('http') || href.includes('google.com')) return;
|
|
80
|
+
|
|
81
|
+
const title = $(el).find('h3').text().trim() || $(el).text().trim().slice(0, 100);
|
|
82
|
+
if (!title) return;
|
|
83
|
+
|
|
84
|
+
try {
|
|
85
|
+
const parsedUrl = new URL(href);
|
|
86
|
+
if (results.length < num) {
|
|
87
|
+
results.push({
|
|
88
|
+
position: results.length + 1,
|
|
89
|
+
title,
|
|
90
|
+
url: href,
|
|
91
|
+
domain: parsedUrl.hostname.replace('www.', ''),
|
|
92
|
+
snippet: '',
|
|
93
|
+
isFeaturedSnippet: false,
|
|
94
|
+
});
|
|
95
|
+
}
|
|
96
|
+
} catch {}
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Check for featured snippet
|
|
101
|
+
const featuredSnippet = $('[data-attrid="wa:/description"], .xpdopen, .g-blk').first();
|
|
102
|
+
if (featuredSnippet.length && results.length > 0) {
|
|
103
|
+
results[0].isFeaturedSnippet = true;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return {
|
|
107
|
+
keyword,
|
|
108
|
+
checkedAt: new Date().toISOString(),
|
|
109
|
+
results: results.slice(0, num),
|
|
110
|
+
resultCount: results.length,
|
|
111
|
+
error: results.length === 0 ? 'No results parsed (Google may have changed layout or rate-limited)' : null,
|
|
112
|
+
};
|
|
113
|
+
}
|