intelwatch 1.3.2 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,486 @@
1
+ /**
2
+ * SearXNG Search Provider — drop-in replacement for brave-search.js
3
+ *
4
+ * Strategy: SearXNG (self-hosted or public instance) as primary,
5
+ * Serper (Google API, cheap key) as optional premium fallback.
6
+ * Zero-cost by default. No mandatory API key.
7
+ *
8
+ * Env vars:
9
+ * SEARXNG_URL — custom SearXNG instance URL (default: public instance)
10
+ * SERPER_API_KEY — optional Serper.dev API key for premium Google results
11
+ */
12
+ import axios from 'axios';
13
+ import { handleError, retry } from '../utils/error-handler.js';
14
+ import { analyzeSentiment, categorizeMention } from '../utils/sentiment.js';
15
+
16
+ // ── Instance management ──────────────────────────────────────────────────────
17
+
18
+ const DEFAULT_PUBLIC_INSTANCES = [
19
+ 'https://search.sapti.me',
20
+ 'https://searx.be',
21
+ 'https://search.bus-hit.me',
22
+ 'https://searxng.ch',
23
+ 'https://search.mdosch.de',
24
+ ];
25
+
26
+ const SEARXNG_TIMEOUT = 12000;
27
+ const SERPER_TIMEOUT = 10000;
28
+
29
+ function getSearxngUrl() {
30
+ return process.env.SEARXNG_URL || null;
31
+ }
32
+
33
+ function getSerperKey() {
34
+ return process.env.SERPER_API_KEY || null;
35
+ }
36
+
37
+ /**
38
+ * Probe a SearXNG instance for availability. Returns true if reachable.
39
+ */
40
+ async function probeInstance(url) {
41
+ try {
42
+ const resp = await axios.get(url, {
43
+ params: { q: 'test', format: 'json', pageno: 1 },
44
+ timeout: 5000,
45
+ validateStatus: () => true,
46
+ });
47
+ return resp.status === 200;
48
+ } catch {
49
+ return false;
50
+ }
51
+ }
52
+
53
+ /**
54
+ * Find a working SearXNG instance. Tries custom URL first, then public list.
55
+ * Caches the working instance for the session.
56
+ */
57
+ let _cachedInstance = null;
58
+
59
+ async function findWorkingInstance() {
60
+ if (_cachedInstance) return _cachedInstance;
61
+
62
+ // 1. Custom instance from env
63
+ const customUrl = getSearxngUrl();
64
+ if (customUrl) {
65
+ if (await probeInstance(customUrl)) {
66
+ _cachedInstance = customUrl;
67
+ return customUrl;
68
+ }
69
+ // Custom failed — fall through to public
70
+ }
71
+
72
+ // 2. Try public instances (race first 3)
73
+ const candidates = shuffleArray([...DEFAULT_PUBLIC_INSTANCES]).slice(0, 3);
74
+ for (const url of candidates) {
75
+ if (await probeInstance(url)) {
76
+ _cachedInstance = url;
77
+ return url;
78
+ }
79
+ }
80
+
81
+ return null;
82
+ }
83
+
84
+ function shuffleArray(arr) {
85
+ for (let i = arr.length - 1; i > 0; i--) {
86
+ const j = Math.floor(Math.random() * (i + 1));
87
+ [arr[i], arr[j]] = [arr[j], arr[i]];
88
+ }
89
+ return arr;
90
+ }
91
+
92
+ // ── SearXNG API calls ────────────────────────────────────────────────────────
93
+
94
+ /**
95
+ * Generic SearXNG search. Returns normalized results.
96
+ */
97
+ async function searxngSearch(query, options = {}) {
98
+ const {
99
+ categories = 'general',
100
+ count = 20,
101
+ language = 'fr',
102
+ timeRange = null, // 'day', 'week', 'month', 'year'
103
+ pageno = 1,
104
+ } = options;
105
+
106
+ const instanceUrl = await findWorkingInstance();
107
+ if (!instanceUrl) {
108
+ return { results: [], error: 'No SearXNG instance available. Set SEARXNG_URL or check connectivity.' };
109
+ }
110
+
111
+ try {
112
+ const params = {
113
+ q: query,
114
+ format: 'json',
115
+ categories,
116
+ language,
117
+ pageno,
118
+ pageno: String(pageno),
119
+ };
120
+ if (count) params.num = String(count);
121
+ if (timeRange) params.time_range = timeRange;
122
+
123
+ const resp = await retry(
124
+ () => axios.get(instanceUrl, {
125
+ params,
126
+ timeout: SEARXNG_TIMEOUT,
127
+ headers: { 'Accept': 'application/json' },
128
+ validateStatus: status => status < 500,
129
+ }),
130
+ { maxAttempts: 2, baseDelay: 1000 }
131
+ );
132
+
133
+ if (resp.status === 429) {
134
+ return { results: [], error: 'SearXNG rate limited. Try again later.' };
135
+ }
136
+
137
+ if (resp.status !== 200) {
138
+ return { results: [], error: `SearXNG returned HTTP ${resp.status}` };
139
+ }
140
+
141
+ const data = resp.data;
142
+ const results = (data.results || []).slice(0, count).map(r => {
143
+ let domain = '';
144
+ try {
145
+ domain = new URL(r.url).hostname.replace('www.', '');
146
+ } catch {}
147
+
148
+ return {
149
+ title: r.title || '',
150
+ url: r.url || '',
151
+ domain,
152
+ snippet: r.content || '',
153
+ age: r.publishedDate || null,
154
+ engine: r.engine || null,
155
+ category: r.category || categories,
156
+ };
157
+ });
158
+
159
+ return { results, error: null };
160
+ } catch (err) {
161
+ // Reset cached instance on failure so next call rediscovers
162
+ _cachedInstance = null;
163
+ handleError(err, 'searxngSearch');
164
+ return { results: [], error: err.message };
165
+ }
166
+ }
167
+
168
+ // ── Serper (premium fallback) ────────────────────────────────────────────────
169
+
170
+ async function serperSearch(query, options = {}) {
171
+ const apiKey = getSerperKey();
172
+ if (!apiKey) return { results: [], error: 'No SERPER_API_KEY set' };
173
+
174
+ const { count = 20, gl = 'fr', hl = 'fr', tbs = null } = options;
175
+
176
+ try {
177
+ const body = { q: query, num: count, gl, hl };
178
+ if (tbs) body.tbs = tbs;
179
+
180
+ const resp = await axios.post('https://google.serper.dev/search', body, {
181
+ headers: { 'X-API-KEY': apiKey, 'Content-Type': 'application/json' },
182
+ timeout: SERPER_TIMEOUT,
183
+ });
184
+
185
+ const organic = (resp.data.organic || []).map(r => ({
186
+ title: r.title || '',
187
+ url: r.link || '',
188
+ domain: r.link ? (() => { try { return new URL(r.link).hostname.replace('www.', ''); } catch { return ''; } })() : '',
189
+ snippet: r.snippet || '',
190
+ age: r.date || null,
191
+ position: r.position || 0,
192
+ }));
193
+
194
+ const news = (resp.data.news || []).map(r => ({
195
+ title: r.title || '',
196
+ url: r.link || '',
197
+ domain: r.link ? (() => { try { return new URL(r.link).hostname.replace('www.', ''); } catch { return ''; } })() : '',
198
+ snippet: r.snippet || '',
199
+ age: r.date || null,
200
+ source: r.source || '',
201
+ }));
202
+
203
+ return { results: [...organic, ...news], error: null };
204
+ } catch (err) {
205
+ handleError(err, 'serperSearch');
206
+ return { results: [], error: err.message };
207
+ }
208
+ }
209
+
210
+ // ── Unified search with automatic fallback ────────────────────────────────────
211
+
212
+ /**
213
+ * Web search: SearXNG → Serper fallback
214
+ */
215
+ export async function webSearch(query, options = {}) {
216
+ // Try SearXNG first (free)
217
+ const searxResult = await searxngSearch(query, { categories: 'general', ...options });
218
+ if (searxResult.results.length > 0 || !getSerperKey()) {
219
+ return searxResult;
220
+ }
221
+
222
+ // Serper fallback
223
+ const serperResult = await serperSearch(query, options);
224
+ if (serperResult.results.length > 0) {
225
+ return serperResult;
226
+ }
227
+
228
+ // Both failed — return SearXNG result (with its error for diagnostics)
229
+ return searxResult;
230
+ }
231
+
232
+ /**
233
+ * News search: SearXNG news category → Serper fallback
234
+ */
235
+ export async function newsSearch(query, options = {}) {
236
+ const searxResult = await searxngSearch(query, {
237
+ categories: 'news',
238
+ timeRange: options.timeRange || 'month',
239
+ ...options,
240
+ });
241
+ if (searxResult.results.length > 0 || !getSerperKey()) {
242
+ return searxResult;
243
+ }
244
+
245
+ // Serper fallback (news)
246
+ const serperResult = await serperSearch(query, { tbs: 'qdr:m', ...options });
247
+ return serperResult.results.length > 0 ? serperResult : searxResult;
248
+ }
249
+
250
+ // ── High-level API (drop-in compatible with brave-search.js exports) ─────────
251
+
252
+ /**
253
+ * Full press & mentions search for a brand/company.
254
+ * Combines news + web results, analyzes sentiment, categorizes.
255
+ */
256
+ export async function searchPressMentions(brandName, options = {}) {
257
+ const mentions = [];
258
+
259
+ // 1. News search
260
+ const news = await newsSearch(brandName, { timeRange: 'month', ...options });
261
+ for (const r of news.results) {
262
+ const sentiment = analyzeSentiment(r.title + ' ' + r.snippet);
263
+ mentions.push({
264
+ source: 'news',
265
+ url: r.url,
266
+ domain: r.domain || r.source || '',
267
+ title: r.title,
268
+ snippet: r.snippet?.substring(0, 300),
269
+ age: r.age,
270
+ sentiment: sentiment.label,
271
+ sentimentScore: sentiment.score,
272
+ category: categorizeMention(r.url, r.title, r.snippet),
273
+ });
274
+ }
275
+
276
+ // 2. Web search for recent mentions
277
+ await new Promise(r => setTimeout(r, 500));
278
+ const web = await webSearch(`"${brandName}" avis OR actualité OR news`, { timeRange: 'week', ...options });
279
+ for (const r of web.results) {
280
+ if (mentions.some(m => m.url === r.url)) continue; // dedupe
281
+ const sentiment = analyzeSentiment(r.title + ' ' + r.snippet);
282
+ mentions.push({
283
+ source: 'web',
284
+ url: r.url,
285
+ domain: r.domain,
286
+ title: r.title,
287
+ snippet: r.snippet?.substring(0, 300),
288
+ age: r.age,
289
+ sentiment: sentiment.label,
290
+ sentimentScore: sentiment.score,
291
+ category: categorizeMention(r.url, r.title, r.snippet),
292
+ });
293
+ }
294
+
295
+ // 3. Search for reviews specifically
296
+ await new Promise(r => setTimeout(r, 500));
297
+ const reviews = await webSearch(`"${brandName}" avis clients trustpilot`, { count: 10, ...options });
298
+ for (const r of reviews.results) {
299
+ if (mentions.some(m => m.url === r.url)) continue;
300
+ const sentiment = analyzeSentiment(r.title + ' ' + r.snippet);
301
+ if (/trustpilot|avis|review|capterra|g2\.com|glassdoor/.test(r.url + r.title)) {
302
+ mentions.push({
303
+ source: 'review',
304
+ url: r.url,
305
+ domain: r.domain,
306
+ title: r.title,
307
+ snippet: r.snippet?.substring(0, 300),
308
+ age: r.age,
309
+ sentiment: sentiment.label,
310
+ sentimentScore: sentiment.score,
311
+ category: 'review',
312
+ });
313
+ }
314
+ }
315
+
316
+ // ── Relevance filter ──
317
+ const brandLower = brandName.toLowerCase().trim();
318
+ const brandWords = brandLower.split(/\s+/);
319
+ const filtered = mentions.filter(m => {
320
+ const text = ((m.title || '') + ' ' + (m.snippet || '') + ' ' + (m.domain || '')).toLowerCase();
321
+ if (text.includes(brandLower)) return true;
322
+ if (brandWords.length > 1 && brandWords.every(w => text.includes(w))) return false;
323
+ return text.includes(brandLower);
324
+ });
325
+
326
+ return {
327
+ brandName,
328
+ checkedAt: new Date().toISOString(),
329
+ mentions: filtered,
330
+ mentionCount: filtered.length,
331
+ unfilteredCount: mentions.length,
332
+ error: news.error || web.error || null,
333
+ };
334
+ }
335
+
336
+ /**
337
+ * Search SERP rankings for a keyword
338
+ */
339
+ export async function searchKeywordRankings(keyword, options = {}) {
340
+ const search = await webSearch(keyword, { count: 20, ...options });
341
+ return search.results.map((r, i) => ({
342
+ position: r.position || i + 1,
343
+ url: r.url,
344
+ domain: r.domain,
345
+ title: r.title,
346
+ snippet: r.snippet,
347
+ }));
348
+ }
349
+
350
+ /**
351
+ * Social media search — filters by platform.
352
+ * platforms: array of 'twitter', 'reddit', 'linkedin'
353
+ */
354
+ export async function searchSocial(query, platforms = ['twitter', 'reddit', 'linkedin'], options = {}) {
355
+ const siteFilters = {
356
+ twitter: 'site:x.com OR site:twitter.com',
357
+ reddit: 'site:reddit.com',
358
+ linkedin: 'site:linkedin.com',
359
+ };
360
+
361
+ const siteQuery = platforms
362
+ .map(p => siteFilters[p])
363
+ .filter(Boolean)
364
+ .join(' OR ');
365
+
366
+ const fullQuery = `${query} (${siteQuery})`;
367
+ const search = await webSearch(fullQuery, { count: options.count || 15, ...options });
368
+
369
+ const results = (search.results || []).map(r => {
370
+ let platform = 'other';
371
+ const urlLower = r.url.toLowerCase();
372
+ if (urlLower.includes('x.com') || urlLower.includes('twitter.com')) platform = 'twitter';
373
+ else if (urlLower.includes('reddit.com')) platform = 'reddit';
374
+ else if (urlLower.includes('linkedin.com')) platform = 'linkedin';
375
+ return { ...r, platform };
376
+ });
377
+
378
+ const byPlatform = {};
379
+ for (const r of results) {
380
+ if (!byPlatform[r.platform]) byPlatform[r.platform] = [];
381
+ byPlatform[r.platform].push(r);
382
+ }
383
+
384
+ return { results, byPlatform, error: search.error };
385
+ }
386
+
387
+ /**
388
+ * Extract review ratings from search snippets
389
+ */
390
+ export function extractRatingsFromResults(results) {
391
+ const platforms = [];
392
+
393
+ for (const r of results) {
394
+ const text = `${r.title} ${r.snippet}`.toLowerCase();
395
+
396
+ // Trustpilot pattern
397
+ if (/trustpilot/.test(r.url) || /trustpilot/.test(text)) {
398
+ const ratingMatch = text.match(/(\d[.,]\d)\s*(?:\/\s*5|sur\s*5|out of 5|stars?|étoiles?)/);
399
+ const countMatch = text.match(/(\d[\d\s,.]*)\s*(?:avis|reviews?|évaluations?)/);
400
+ if (ratingMatch) {
401
+ platforms.push({
402
+ name: 'Trustpilot', // Correction du nom de plateforme
403
+ url: r.url,
404
+ rating: parseFloat(ratingMatch[1].replace(',', '.')),
405
+ reviewCount: countMatch ? countMatch[1].replace(/\s/g, '').replace(',', '') : null,
406
+ });
407
+ }
408
+ }
409
+
410
+ // Google reviews pattern
411
+ if (/google/.test(text) && /avis|review/.test(text)) {
412
+ const ratingMatch = text.match(/(\d[.,]\d)\s*(?:\/\s*5|sur\s*5|stars?|étoiles?)/);
413
+ const countMatch = text.match(/([\d\s,.]+)\s*(?:avis|reviews?|évaluations?)/);
414
+ if (ratingMatch) {
415
+ platforms.push({
416
+ name: 'Google',
417
+ url: r.url,
418
+ rating: parseFloat(ratingMatch[1].replace(',', '.')),
419
+ reviewCount: countMatch ? countMatch[1].replace(/\s/g, '') : null,
420
+ });
421
+ }
422
+ }
423
+
424
+ // Glassdoor (employer reputation)
425
+ if (/glassdoor/.test(r.url)) {
426
+ const ratingMatch = text.match(/(\d[.,]\d)\s*(?:\/\s*5|sur\s*5|stars?)/);
427
+ if (ratingMatch) {
428
+ platforms.push({
429
+ name: 'Glassdoor',
430
+ url: r.url,
431
+ rating: parseFloat(ratingMatch[1].replace(',', '.')),
432
+ reviewCount: null,
433
+ });
434
+ }
435
+ }
436
+ }
437
+
438
+ return platforms;
439
+ }
440
+
441
+ // ── Backward-compatible aliases (brave-search.js API surface) ─────────────────
442
+
443
+ /** @deprecated Use webSearch() */
444
+ export async function braveWebSearch(query, options = {}) {
445
+ return webSearch(query, options);
446
+ }
447
+
448
+ /** @deprecated Use newsSearch() */
449
+ export async function braveNewsSearch(query, options = {}) {
450
+ const opts = {};
451
+ if (options.freshness === 'pd') opts.timeRange = 'day';
452
+ else if (options.freshness === 'pw') opts.timeRange = 'week';
453
+ else if (options.freshness === 'pm') opts.timeRange = 'month';
454
+ return newsSearch(query, { ...opts, ...options });
455
+ }
456
+
457
+ // ── Instance health check (useful for CLI status commands) ───────────────────
458
+
459
+ export async function getProviderStatus() {
460
+ const customUrl = getSearxngUrl();
461
+ const instanceUrl = customUrl || _cachedInstance;
462
+
463
+ let searxStatus = 'unavailable';
464
+ if (instanceUrl) {
465
+ const ok = await probeInstance(instanceUrl);
466
+ searxStatus = ok ? 'ok' : 'down';
467
+ } else {
468
+ // Try to discover
469
+ const found = await findWorkingInstance();
470
+ searxStatus = found ? 'ok' : 'unavailable';
471
+ }
472
+
473
+ const serperKey = getSerperKey();
474
+
475
+ return {
476
+ primary: { provider: 'searxng', instance: instanceUrl || 'none', status: searxStatus },
477
+ fallback: { provider: 'serper', configured: !!serperKey, status: serperKey ? 'configured' : 'none' },
478
+ };
479
+ }
480
+
481
+ /**
482
+ * Reset cached instance (for testing or after connectivity change)
483
+ */
484
+ export function resetInstanceCache() {
485
+ _cachedInstance = null;
486
+ }
@@ -1,6 +1,6 @@
1
1
  import { analyzeSite, analyzeKeyPages } from '../scrapers/site-analyzer.js';
2
2
  import { scrapeNewsMentions } from '../scrapers/google-news.js';
3
- import { searchPressMentions, extractRatingsFromResults } from '../scrapers/brave-search.js';
3
+ import { searchPressMentions, extractRatingsFromResults } from '../scrapers/searxng-search.js';
4
4
  import { lookupCompany, resolveProvider } from '../providers/registry.js';
5
5
  import { diffTechStacks } from '../utils/tech-detect.js';
6
6
  import { fetch } from '../utils/fetcher.js';
@@ -23,11 +23,11 @@ export async function runCompetitorCheck(tracker) {
23
23
  let reputation = { reviews: [], avgRating: null, platforms: [] };
24
24
 
25
25
  try {
26
- // Try Brave Search API first (reliable, no rate limiting)
26
+ // Try SearXNG/Serper first (reliable, no rate limiting)
27
27
  const braveData = await searchPressMentions(brandName);
28
28
 
29
29
  if (!braveData.error && braveData.mentions.length > 0) {
30
- // Brave API worked
30
+ // SearXNG/Serper API worked
31
31
  const allMentions = braveData.mentions;
32
32
  const pressArticles = allMentions.filter(m => m.category === 'press' || m.source === 'news');
33
33
  const forumMentions = allMentions.filter(m => m.category === 'forum' || m.category === 'social');
@@ -1,10 +1,10 @@
1
1
  import { scrapeSerp } from '../scrapers/google.js';
2
- import { searchKeywordRankings } from '../scrapers/brave-search.js';
2
+ import { searchKeywordRankings } from '../scrapers/searxng-search.js';
3
3
 
4
4
  export async function runKeywordCheck(tracker) {
5
5
  const { keyword } = tracker;
6
6
 
7
- // Try Brave Search API first
7
+ // Try SearXNG/Serper first
8
8
  let results = [];
9
9
  let error = null;
10
10
 
@@ -17,7 +17,7 @@ export async function runKeywordCheck(tracker) {
17
17
  }));
18
18
  }
19
19
  } catch (e) {
20
- // Brave failed, try Google fallback
20
+ // SearXNG/Serper failed, try Google fallback
21
21
  }
22
22
 
23
23
  if (results.length === 0) {
@@ -1,4 +1,4 @@
1
- import { braveNewsSearch, braveWebSearch, searchSocial } from '../scrapers/brave-search.js';
1
+ import { newsSearch, webSearch, searchSocial } from '../scrapers/searxng-search.js';
2
2
  import { analyzeSentiment, categorizeMention } from '../utils/sentiment.js';
3
3
 
4
4
  export async function runPersonCheck(tracker) {
@@ -8,7 +8,7 @@ export async function runPersonCheck(tracker) {
8
8
  const mentions = [];
9
9
 
10
10
  // 1. News search
11
- const news = await braveNewsSearch(personName, { freshness: 'pm' });
11
+ const news = await newsSearch(personName, { timeRange: 'month' });
12
12
  for (const r of news.results) {
13
13
  // Filter by org if provided (keep results that mention org or have no org context)
14
14
  if (org) {
@@ -31,7 +31,7 @@ export async function runPersonCheck(tracker) {
31
31
 
32
32
  // 2. Web search for recent mentions
33
33
  await new Promise(r => setTimeout(r, 500));
34
- const web = await braveWebSearch(query, { freshness: 'pw' });
34
+ const web = await webSearch(query, { timeRange: 'week' });
35
35
  for (const r of web.results) {
36
36
  if (mentions.some(m => m.url === r.url)) continue;
37
37
  const sentiment = analyzeSentiment(r.title + ' ' + r.snippet);