intelwatch 1.3.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/README.md +72 -118
- package/ROADMAP-PREMIUM.md +17 -17
- package/package.json +2 -2
- package/src/ai/client.js +82 -3
- package/src/commands/compare.js +323 -0
- package/src/commands/discover.js +3 -3
- package/src/commands/profile.js +90 -28
- package/src/index.js +5 -4
- package/src/providers/annuaire-entreprises.js +83 -0
- package/src/providers/index.js +2 -0
- package/src/providers/registry.js +156 -50
- package/src/scrapers/annuaire-entreprises.js +461 -0
- package/src/scrapers/searxng-search.js +486 -0
- package/src/trackers/competitor.js +3 -3
- package/src/trackers/keyword.js +3 -3
- package/src/trackers/person.js +3 -3
- package/src/utils/fetcher.js +123 -4
- package/src/utils/parser.js +5 -3
- package/Endrix-Intelwatch-DueDil.pdf +0 -0
- package/src/scrapers/brave-search.js +0 -281
package/src/utils/fetcher.js
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
import axios from 'axios';
|
|
2
2
|
|
|
3
|
+
// ── Debug stub (remplacer par logger réel en prod) ──────────────────────────
|
|
4
|
+
const debug = (...args) => {
|
|
5
|
+
if (process.env.DEBUG_FETCHER) console.log('[fetcher]', ...args);
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
// ── User-Agent rotation ────────────────────────────────────────────────────
|
|
3
9
|
const USER_AGENTS = [
|
|
4
10
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
5
11
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
@@ -12,18 +18,108 @@ function randomUserAgent() {
|
|
|
12
18
|
return USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)];
|
|
13
19
|
}
|
|
14
20
|
|
|
15
|
-
|
|
21
|
+
// ── Utilitaires ────────────────────────────────────────────────────────────
|
|
22
|
+
export function sleep(ms) {
|
|
16
23
|
return new Promise(resolve => setTimeout(resolve, ms));
|
|
17
24
|
}
|
|
18
25
|
|
|
26
|
+
// ── Domaines protégés (Cloudflare / anti-bot lourd) ────────────────────────
|
|
27
|
+
const PROTECTED_DOMAINS = ['pappers.fr', 'societe.com', 'verif.com', 'score3.fr', 'manageo.fr'];
|
|
28
|
+
|
|
29
|
+
export function isProtectedDomain(url) {
|
|
30
|
+
try {
|
|
31
|
+
const { hostname } = new URL(url);
|
|
32
|
+
return PROTECTED_DOMAINS.some(d => hostname === d || hostname.endsWith(`.${d}`));
|
|
33
|
+
} catch {
|
|
34
|
+
return false;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// ── Camofox (anti-bot bypass local) ────────────────────────────────────────
|
|
39
|
+
const CAMOFOX_BASE = 'http://localhost:9377';
|
|
40
|
+
const CAMOFOX_USER_ID = 'intelwatch';
|
|
41
|
+
const CAMOFOX_SESSION_KEY = 'default';
|
|
42
|
+
const CAMOFOX_WAIT_MS = 7000;
|
|
43
|
+
|
|
44
|
+
export async function camofoxFetch(url, options = {}) {
|
|
45
|
+
const { timeout = 30000 } = options;
|
|
46
|
+
|
|
47
|
+
// Vérifier disponibilité Camofox
|
|
48
|
+
let healthCheck;
|
|
49
|
+
try {
|
|
50
|
+
healthCheck = await axios.get(`${CAMOFOX_BASE}/health`, { timeout: 2000 });
|
|
51
|
+
} catch {
|
|
52
|
+
debug('camofox indisponible sur', CAMOFOX_BASE);
|
|
53
|
+
throw new Error(`Camofox unavailable at ${CAMOFOX_BASE} — cannot bypass protection for ${url}`);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
let tabId;
|
|
57
|
+
try {
|
|
58
|
+
// POST /tabs — ouvrir onglet navigateur
|
|
59
|
+
const createRes = await axios.post(`${CAMOFOX_BASE}/tabs`, {
|
|
60
|
+
userId: CAMOFOX_USER_ID,
|
|
61
|
+
sessionKey: CAMOFOX_SESSION_KEY,
|
|
62
|
+
url,
|
|
63
|
+
}, { timeout });
|
|
64
|
+
|
|
65
|
+
tabId = createRes.data?.tabId || createRes.data?.id;
|
|
66
|
+
if (!tabId) throw new Error('Camofox: no tabId returned from POST /tabs');
|
|
67
|
+
|
|
68
|
+
debug('camofox tab created:', tabId, '— waiting', CAMOFOX_WAIT_MS, 'ms');
|
|
69
|
+
|
|
70
|
+
// Attente résolution challenge CF
|
|
71
|
+
await sleep(CAMOFOX_WAIT_MS);
|
|
72
|
+
|
|
73
|
+
// GET /tabs/{tabId}/snapshot — récupérer HTML rendu
|
|
74
|
+
const snapRes = await axios.get(`${CAMOFOX_BASE}/tabs/${tabId}/snapshot`, {
|
|
75
|
+
params: { userId: CAMOFOX_USER_ID },
|
|
76
|
+
timeout,
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
// Wrapper dans un format compatible response Axios
|
|
80
|
+
return {
|
|
81
|
+
status: snapRes.status,
|
|
82
|
+
statusText: snapRes.statusText,
|
|
83
|
+
headers: snapRes.headers,
|
|
84
|
+
data: snapRes.data,
|
|
85
|
+
config: snapRes.config,
|
|
86
|
+
request: snapRes.request,
|
|
87
|
+
_camofox: true,
|
|
88
|
+
};
|
|
89
|
+
} finally {
|
|
90
|
+
// Toujours cleanup, même en cas d'erreur
|
|
91
|
+
if (tabId) {
|
|
92
|
+
try {
|
|
93
|
+
await axios.delete(`${CAMOFOX_BASE}/tabs/${tabId}`, {
|
|
94
|
+
params: { userId: CAMOFOX_USER_ID },
|
|
95
|
+
timeout: 5000,
|
|
96
|
+
});
|
|
97
|
+
debug('camofox tab cleaned up:', tabId);
|
|
98
|
+
} catch (err) {
|
|
99
|
+
debug('camofox cleanup failed for tab', tabId, err.message);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// ── Fetch principal (Axios + fallback Camofox) ─────────────────────────────
|
|
19
106
|
export async function fetch(url, options = {}) {
|
|
20
107
|
const {
|
|
21
108
|
retries = 3,
|
|
22
109
|
delay = 1500,
|
|
23
110
|
timeout = 15000,
|
|
24
111
|
headers = {},
|
|
112
|
+
forceCamofox = false,
|
|
25
113
|
} = options;
|
|
26
114
|
|
|
115
|
+
// Mode force : court-circuiter Axios, aller direct Camofox
|
|
116
|
+
if (forceCamofox) {
|
|
117
|
+
return camofoxFetch(url, options);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Domaine protégé connu : tentative Axios puis fallback si 403
|
|
121
|
+
const protected_ = isProtectedDomain(url);
|
|
122
|
+
|
|
27
123
|
const config = {
|
|
28
124
|
url,
|
|
29
125
|
method: options.method || 'GET',
|
|
@@ -41,6 +137,8 @@ export async function fetch(url, options = {}) {
|
|
|
41
137
|
};
|
|
42
138
|
|
|
43
139
|
let lastError;
|
|
140
|
+
let needsCamofox = false;
|
|
141
|
+
|
|
44
142
|
for (let attempt = 1; attempt <= retries; attempt++) {
|
|
45
143
|
try {
|
|
46
144
|
if (attempt > 1) {
|
|
@@ -59,6 +157,13 @@ export async function fetch(url, options = {}) {
|
|
|
59
157
|
throw new Error(`Rate limited (429) after ${retries} attempts`);
|
|
60
158
|
}
|
|
61
159
|
|
|
160
|
+
// 403 = signature Cloudflare → fallback Camofox
|
|
161
|
+
if (response.status === 403) {
|
|
162
|
+
debug('403 détecté pour', url, '— fallback camofox');
|
|
163
|
+
needsCamofox = true;
|
|
164
|
+
break;
|
|
165
|
+
}
|
|
166
|
+
|
|
62
167
|
return response;
|
|
63
168
|
} catch (err) {
|
|
64
169
|
lastError = err;
|
|
@@ -68,9 +173,25 @@ export async function fetch(url, options = {}) {
|
|
|
68
173
|
}
|
|
69
174
|
}
|
|
70
175
|
|
|
71
|
-
|
|
176
|
+
// Fallback Camofox si 403 ou domaine protégé (et Axios a échoué)
|
|
177
|
+
if (needsCamofox || (protected_ && lastError)) {
|
|
178
|
+
try {
|
|
179
|
+
return await camofoxFetch(url, options);
|
|
180
|
+
} catch (camofoxErr) {
|
|
181
|
+
// Camofox indisponible → propager l'erreur Axios originale
|
|
182
|
+
debug('camofox fallback échoué:', camofoxErr.message);
|
|
183
|
+
if (lastError) throw lastError;
|
|
184
|
+
throw camofoxErr;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
if (lastError) throw lastError;
|
|
189
|
+
|
|
190
|
+
// Ne devrait jamais arriver, mais sécurité
|
|
191
|
+
throw new Error(`fetch failed for ${url}`);
|
|
72
192
|
}
|
|
73
193
|
|
|
194
|
+
// ── Fetch avec jitter ───────────────────────────────────────────────────────
|
|
74
195
|
export async function fetchWithDelay(url, options = {}) {
|
|
75
196
|
const minDelay = options.minDelay ?? 1000;
|
|
76
197
|
const maxDelay = options.maxDelay ?? 2000;
|
|
@@ -78,5 +199,3 @@ export async function fetchWithDelay(url, options = {}) {
|
|
|
78
199
|
await sleep(jitter);
|
|
79
200
|
return fetch(url, options);
|
|
80
201
|
}
|
|
81
|
-
|
|
82
|
-
export { sleep };
|
package/src/utils/parser.js
CHANGED
|
@@ -89,9 +89,11 @@ export function extractPricing($, html) {
|
|
|
89
89
|
const planKeywords = ['starter', 'basic', 'pro', 'professional', 'business', 'enterprise', 'free', 'premium', 'plus'];
|
|
90
90
|
const plans = [];
|
|
91
91
|
for (const kw of planKeywords) {
|
|
92
|
-
|
|
93
|
-
const
|
|
94
|
-
|
|
92
|
+
// Rechercher dans le texte propre (au lieu du code HTML raw) pour éviter de capturer du code source
|
|
93
|
+
const textContent = $.text().replace(/\s+/g, ' ');
|
|
94
|
+
const regex = new RegExp(`(?:^|\\s)${kw}\\s[^$€£]{0,50}?[$€£][\\d,.]+`, 'gi');
|
|
95
|
+
const matches = textContent.match(regex) || [];
|
|
96
|
+
plans.push(...matches.slice(0, 2).map(m => m.trim()));
|
|
95
97
|
}
|
|
96
98
|
|
|
97
99
|
return {
|
|
Binary file
|
|
@@ -1,281 +0,0 @@
|
|
|
1
|
-
import axios from 'axios';
|
|
2
|
-
import { analyzeSentiment, categorizeMention } from '../utils/sentiment.js';
|
|
3
|
-
|
|
4
|
-
const BRAVE_API = 'https://api.search.brave.com/res/v1';
|
|
5
|
-
|
|
6
|
-
/**
|
|
7
|
-
* Search via Brave Search API — reliable, no rate limiting issues.
|
|
8
|
-
* Uses BRAVE_API_KEY env var or falls back to config.
|
|
9
|
-
*/
|
|
10
|
-
function getApiKey() {
|
|
11
|
-
return process.env.BRAVE_API_KEY || process.env.BRAVE_SEARCH_API_KEY || null;
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
/**
|
|
15
|
-
* Web search via Brave
|
|
16
|
-
*/
|
|
17
|
-
export async function braveWebSearch(query, options = {}) {
|
|
18
|
-
const apiKey = getApiKey();
|
|
19
|
-
if (!apiKey) return { results: [], error: 'No BRAVE_API_KEY set' };
|
|
20
|
-
|
|
21
|
-
try {
|
|
22
|
-
const params = {
|
|
23
|
-
q: query,
|
|
24
|
-
count: options.count || 20,
|
|
25
|
-
country: options.country || 'FR',
|
|
26
|
-
search_lang: options.lang || 'fr',
|
|
27
|
-
freshness: options.freshness || undefined, // 'pd' (day), 'pw' (week), 'pm' (month)
|
|
28
|
-
};
|
|
29
|
-
|
|
30
|
-
const resp = await axios.get(`${BRAVE_API}/web/search`, {
|
|
31
|
-
headers: { 'X-Subscription-Token': apiKey, 'Accept': 'application/json' },
|
|
32
|
-
params,
|
|
33
|
-
timeout: 15000,
|
|
34
|
-
});
|
|
35
|
-
|
|
36
|
-
const results = (resp.data.web?.results || []).map(r => ({
|
|
37
|
-
title: r.title,
|
|
38
|
-
url: r.url,
|
|
39
|
-
domain: r.meta_url?.hostname?.replace('www.', '') || new URL(r.url).hostname.replace('www.', ''),
|
|
40
|
-
snippet: r.description || '',
|
|
41
|
-
age: r.age || null,
|
|
42
|
-
}));
|
|
43
|
-
|
|
44
|
-
return { results, error: null };
|
|
45
|
-
} catch (err) {
|
|
46
|
-
return { results: [], error: err.message };
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
/**
|
|
51
|
-
* News search via Brave
|
|
52
|
-
*/
|
|
53
|
-
export async function braveNewsSearch(query, options = {}) {
|
|
54
|
-
const apiKey = getApiKey();
|
|
55
|
-
if (!apiKey) return { results: [], error: 'No BRAVE_API_KEY set' };
|
|
56
|
-
|
|
57
|
-
try {
|
|
58
|
-
const params = {
|
|
59
|
-
q: query,
|
|
60
|
-
count: options.count || 20,
|
|
61
|
-
country: options.country || 'FR',
|
|
62
|
-
search_lang: options.lang || 'fr',
|
|
63
|
-
freshness: options.freshness || 'pm', // last month by default
|
|
64
|
-
};
|
|
65
|
-
|
|
66
|
-
const resp = await axios.get(`${BRAVE_API}/news/search`, {
|
|
67
|
-
headers: { 'X-Subscription-Token': apiKey, 'Accept': 'application/json' },
|
|
68
|
-
params,
|
|
69
|
-
timeout: 15000,
|
|
70
|
-
});
|
|
71
|
-
|
|
72
|
-
const results = (resp.data.results || []).map(r => ({
|
|
73
|
-
title: r.title,
|
|
74
|
-
url: r.url,
|
|
75
|
-
domain: r.meta_url?.hostname?.replace('www.', '') || '',
|
|
76
|
-
snippet: r.description || '',
|
|
77
|
-
age: r.age || null,
|
|
78
|
-
source: r.meta_url?.hostname || '',
|
|
79
|
-
}));
|
|
80
|
-
|
|
81
|
-
return { results, error: null };
|
|
82
|
-
} catch (err) {
|
|
83
|
-
return { results: [], error: err.message };
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
/**
|
|
88
|
-
* Full press & mentions search for a brand/company.
|
|
89
|
-
* Combines news + web results, analyzes sentiment, categorizes.
|
|
90
|
-
*/
|
|
91
|
-
export async function searchPressMentions(brandName, options = {}) {
|
|
92
|
-
const mentions = [];
|
|
93
|
-
|
|
94
|
-
// 1. News search
|
|
95
|
-
const news = await braveNewsSearch(brandName, { freshness: 'pm', ...options });
|
|
96
|
-
for (const r of news.results) {
|
|
97
|
-
const sentiment = analyzeSentiment(r.title + ' ' + r.snippet);
|
|
98
|
-
mentions.push({
|
|
99
|
-
source: 'news',
|
|
100
|
-
url: r.url,
|
|
101
|
-
domain: r.domain || r.source,
|
|
102
|
-
title: r.title,
|
|
103
|
-
snippet: r.snippet?.substring(0, 300),
|
|
104
|
-
age: r.age,
|
|
105
|
-
sentiment: sentiment.label,
|
|
106
|
-
sentimentScore: sentiment.score,
|
|
107
|
-
category: categorizeMention(r.url, r.title, r.snippet),
|
|
108
|
-
});
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
// 2. Web search for recent mentions
|
|
112
|
-
await new Promise(r => setTimeout(r, 500));
|
|
113
|
-
const web = await braveWebSearch(`"${brandName}" avis OR actualité OR news`, { freshness: 'pw', ...options });
|
|
114
|
-
for (const r of web.results) {
|
|
115
|
-
if (mentions.some(m => m.url === r.url)) continue; // dedupe
|
|
116
|
-
const sentiment = analyzeSentiment(r.title + ' ' + r.snippet);
|
|
117
|
-
mentions.push({
|
|
118
|
-
source: 'web',
|
|
119
|
-
url: r.url,
|
|
120
|
-
domain: r.domain,
|
|
121
|
-
title: r.title,
|
|
122
|
-
snippet: r.snippet?.substring(0, 300),
|
|
123
|
-
age: r.age,
|
|
124
|
-
sentiment: sentiment.label,
|
|
125
|
-
sentimentScore: sentiment.score,
|
|
126
|
-
category: categorizeMention(r.url, r.title, r.snippet),
|
|
127
|
-
});
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
// 3. Search for reviews specifically
|
|
131
|
-
await new Promise(r => setTimeout(r, 500));
|
|
132
|
-
const reviews = await braveWebSearch(`"${brandName}" avis clients trustpilot`, { count: 10, ...options });
|
|
133
|
-
for (const r of reviews.results) {
|
|
134
|
-
if (mentions.some(m => m.url === r.url)) continue;
|
|
135
|
-
const sentiment = analyzeSentiment(r.title + ' ' + r.snippet);
|
|
136
|
-
if (/trustpilot|avis|review|capterra|g2\.com|glassdoor/.test(r.url + r.title)) {
|
|
137
|
-
mentions.push({
|
|
138
|
-
source: 'review',
|
|
139
|
-
url: r.url,
|
|
140
|
-
domain: r.domain,
|
|
141
|
-
title: r.title,
|
|
142
|
-
snippet: r.snippet?.substring(0, 300),
|
|
143
|
-
age: r.age,
|
|
144
|
-
sentiment: sentiment.label,
|
|
145
|
-
sentimentScore: sentiment.score,
|
|
146
|
-
category: 'review',
|
|
147
|
-
});
|
|
148
|
-
}
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
// ── Relevance filter: drop results that don't actually mention the brand ──
|
|
152
|
-
const brandLower = brandName.toLowerCase().trim();
|
|
153
|
-
const brandWords = brandLower.split(/\s+/);
|
|
154
|
-
const filtered = mentions.filter(m => {
|
|
155
|
-
const text = ((m.title || '') + ' ' + (m.snippet || '') + ' ' + (m.domain || '')).toLowerCase();
|
|
156
|
-
// Must contain the exact brand name OR all words of the brand
|
|
157
|
-
if (text.includes(brandLower)) return true;
|
|
158
|
-
if (brandWords.length > 1 && brandWords.every(w => text.includes(w))) return true;
|
|
159
|
-
// Fuzzy: allow 1 char difference for short names (e.g. "Endrix" vs "Endrick" should be EXCLUDED)
|
|
160
|
-
return false;
|
|
161
|
-
});
|
|
162
|
-
|
|
163
|
-
return {
|
|
164
|
-
brandName,
|
|
165
|
-
checkedAt: new Date().toISOString(),
|
|
166
|
-
mentions: filtered,
|
|
167
|
-
mentionCount: filtered.length,
|
|
168
|
-
unfilteredCount: mentions.length,
|
|
169
|
-
error: news.error || web.error || null,
|
|
170
|
-
};
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
/**
|
|
174
|
-
* Search SERP rankings for a keyword
|
|
175
|
-
*/
|
|
176
|
-
export async function searchKeywordRankings(keyword, options = {}) {
|
|
177
|
-
const search = await braveWebSearch(keyword, { count: 20, ...options });
|
|
178
|
-
|
|
179
|
-
return search.results.map((r, i) => ({
|
|
180
|
-
position: i + 1,
|
|
181
|
-
url: r.url,
|
|
182
|
-
domain: r.domain,
|
|
183
|
-
title: r.title,
|
|
184
|
-
snippet: r.snippet,
|
|
185
|
-
}));
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
/**
|
|
189
|
-
* Social media search via Brave — filters by platform.
|
|
190
|
-
* platforms: array of 'twitter', 'reddit', 'linkedin'
|
|
191
|
-
*/
|
|
192
|
-
export async function searchSocial(query, platforms = ['twitter', 'reddit', 'linkedin'], options = {}) {
|
|
193
|
-
const apiKey = getApiKey();
|
|
194
|
-
if (!apiKey) return { results: [], byPlatform: {}, error: 'No BRAVE_API_KEY set' };
|
|
195
|
-
|
|
196
|
-
const siteFilters = {
|
|
197
|
-
twitter: 'site:x.com OR site:twitter.com',
|
|
198
|
-
reddit: 'site:reddit.com',
|
|
199
|
-
linkedin: 'site:linkedin.com',
|
|
200
|
-
};
|
|
201
|
-
|
|
202
|
-
const siteQuery = platforms
|
|
203
|
-
.map(p => siteFilters[p])
|
|
204
|
-
.filter(Boolean)
|
|
205
|
-
.join(' OR ');
|
|
206
|
-
|
|
207
|
-
const fullQuery = `${query} (${siteQuery})`;
|
|
208
|
-
|
|
209
|
-
const search = await braveWebSearch(fullQuery, { count: options.count || 15, ...options });
|
|
210
|
-
|
|
211
|
-
const results = (search.results || []).map(r => {
|
|
212
|
-
let platform = 'other';
|
|
213
|
-
const urlLower = r.url.toLowerCase();
|
|
214
|
-
if (urlLower.includes('x.com') || urlLower.includes('twitter.com')) platform = 'twitter';
|
|
215
|
-
else if (urlLower.includes('reddit.com')) platform = 'reddit';
|
|
216
|
-
else if (urlLower.includes('linkedin.com')) platform = 'linkedin';
|
|
217
|
-
return { ...r, platform };
|
|
218
|
-
});
|
|
219
|
-
|
|
220
|
-
const byPlatform = {};
|
|
221
|
-
for (const r of results) {
|
|
222
|
-
if (!byPlatform[r.platform]) byPlatform[r.platform] = [];
|
|
223
|
-
byPlatform[r.platform].push(r);
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
return { results, byPlatform, error: search.error };
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
/**
|
|
230
|
-
* Extract review ratings from search snippets
|
|
231
|
-
*/
|
|
232
|
-
export function extractRatingsFromResults(results) {
|
|
233
|
-
const platforms = [];
|
|
234
|
-
|
|
235
|
-
for (const r of results) {
|
|
236
|
-
const text = `${r.title} ${r.snippet}`.toLowerCase();
|
|
237
|
-
|
|
238
|
-
// Trustpilot pattern
|
|
239
|
-
if (/trustpilot/.test(r.url) || /trustpilot/.test(text)) {
|
|
240
|
-
const ratingMatch = text.match(/(\d[.,]\d)\s*(?:\/\s*5|sur\s*5|out of 5|stars?|étoiles?)/);
|
|
241
|
-
const countMatch = text.match(/([\d\s,.]+)\s*(?:avis|reviews?|évaluations?)/);
|
|
242
|
-
if (ratingMatch || countMatch) {
|
|
243
|
-
platforms.push({
|
|
244
|
-
name: 'Trustpilot',
|
|
245
|
-
url: r.url,
|
|
246
|
-
rating: ratingMatch ? parseFloat(ratingMatch[1].replace(',', '.')) : null,
|
|
247
|
-
reviewCount: countMatch ? countMatch[1].replace(/\s/g, '').replace(',', '') : null,
|
|
248
|
-
});
|
|
249
|
-
}
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
// Google reviews pattern
|
|
253
|
-
if (/google/.test(text) && /avis|review/.test(text)) {
|
|
254
|
-
const ratingMatch = text.match(/(\d[.,]\d)\s*(?:\/\s*5|sur\s*5|stars?|étoiles?)/);
|
|
255
|
-
const countMatch = text.match(/([\d\s,.]+)\s*(?:avis|reviews?|évaluations?)/);
|
|
256
|
-
if (ratingMatch) {
|
|
257
|
-
platforms.push({
|
|
258
|
-
name: 'Google',
|
|
259
|
-
url: r.url,
|
|
260
|
-
rating: parseFloat(ratingMatch[1].replace(',', '.')),
|
|
261
|
-
reviewCount: countMatch ? countMatch[1].replace(/\s/g, '') : null,
|
|
262
|
-
});
|
|
263
|
-
}
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
// Glassdoor (employer reputation)
|
|
267
|
-
if (/glassdoor/.test(r.url)) {
|
|
268
|
-
const ratingMatch = text.match(/(\d[.,]\d)\s*(?:\/\s*5|sur\s*5|stars?)/);
|
|
269
|
-
if (ratingMatch) {
|
|
270
|
-
platforms.push({
|
|
271
|
-
name: 'Glassdoor',
|
|
272
|
-
url: r.url,
|
|
273
|
-
rating: parseFloat(ratingMatch[1].replace(',', '.')),
|
|
274
|
-
reviewCount: null,
|
|
275
|
-
});
|
|
276
|
-
}
|
|
277
|
-
}
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
return platforms;
|
|
281
|
-
}
|