intelwatch 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  import chalk from 'chalk';
2
2
  import Table from 'cli-table3';
3
- import { pappersGetFullDossier, pappersSearchByName } from '../scrapers/pappers.js';
3
+ import { pappersGetFullDossier, pappersSearchByName, pappersSearchSubsidiaries } from '../scrapers/pappers.js';
4
4
  import { searchPressMentions } from '../scrapers/brave-search.js';
5
5
  import { analyzeSite } from '../scrapers/site-analyzer.js';
6
6
  import { callAI, hasAIKey } from '../ai/client.js';
@@ -44,15 +44,16 @@ export async function runMA(sirenOrName, options) {
44
44
  }
45
45
 
46
46
  // ── Fetch full dossier ─────────────────────────────────────────────────────
47
- console.log(chalk.gray(' Fetching dossier from Pappers...'));
48
- const { data, error: dossierErr } = await pappersGetFullDossier(siren);
47
+ console.log(chalk.gray(' Loading company data...'));
48
+ const { data, error: dossierErr, fromCache } = await pappersGetFullDossier(siren);
49
+ if (fromCache) console.log(chalk.gray(' ✓ Loaded from cache (0 API credits)'));
49
50
 
50
51
  if (dossierErr || !data) {
51
52
  error(`Failed to fetch dossier: ${dossierErr || 'Unknown error'}`);
52
53
  process.exit(1);
53
54
  }
54
55
 
55
- const { identity, financialHistory, ubo, bodacc, dirigeants, proceduresCollectives } = data;
56
+ const { identity, financialHistory, consolidatedFinances, ubo, bodacc, dirigeants, representants, etablissements, proceduresCollectives } = data;
56
57
 
57
58
  // ── Header ─────────────────────────────────────────────────────────────────
58
59
  header(`🏢 Due Diligence Deep Profile — ${identity.name || siren}`);
@@ -170,6 +171,44 @@ export async function runMA(sirenOrName, options) {
170
171
  console.log(chalk.gray(' Aucune donnée financière disponible.'));
171
172
  }
172
173
 
174
+ // ── Consolidated finances (group level) ────────────────────────────────────
175
+ if (consolidatedFinances?.length > 0) {
176
+ section(' 💶 Finances consolidées (groupe)');
177
+ const cTable = new Table({
178
+ head: ['Année', 'CA consolidé', 'Résultat consolidé'].map(h => chalk.cyan.bold(h)),
179
+ style: { head: [], border: ['grey'] },
180
+ colAligns: ['left', 'right', 'right'],
181
+ });
182
+ for (const f of consolidatedFinances) {
183
+ cTable.push([
184
+ chalk.white(f.annee ?? '—'),
185
+ f.ca != null ? chalk.white(formatEuro(f.ca)) : chalk.gray('—'),
186
+ f.resultat != null
187
+ ? (f.resultat >= 0 ? chalk.green(formatEuro(f.resultat)) : chalk.red(formatEuro(f.resultat)))
188
+ : chalk.gray('—'),
189
+ ]);
190
+ }
191
+ console.log(cTable.toString());
192
+ }
193
+
194
+ // ── Representants ──────────────────────────────────────────────────────────
195
+ if (representants?.length > 0) {
196
+ section(` 👥 Représentants (${representants.length})`);
197
+ for (const r of representants) {
198
+ const type = r.personneMorale ? chalk.blue('[PM]') : chalk.gray('[PP]');
199
+ console.log(chalk.gray(` ${type} ${chalk.white(r.nom)} — ${r.qualite}`));
200
+ }
201
+ }
202
+
203
+ // ── Etablissements ─────────────────────────────────────────────────────────
204
+ if (etablissements?.length > 1) {
205
+ section(` 🏢 Établissements (${etablissements.length})`);
206
+ for (const e of etablissements) {
207
+ const status = e.actif ? chalk.green('●') : chalk.red('○');
208
+ console.log(chalk.gray(` ${status} ${e.siret} — ${e.type || '?'} — ${e.adresse || '?'}`));
209
+ }
210
+ }
211
+
173
212
  // ── BODACC publications ────────────────────────────────────────────────────
174
213
  if (bodacc.length > 0) {
175
214
  section(` 📰 Publications BODACC (${bodacc.length} dernières)`);
@@ -215,14 +254,221 @@ export async function runMA(sirenOrName, options) {
215
254
  }
216
255
  }
217
256
 
257
+ // ── Subsidiaries / Related entities ──────────────────────────────────────
258
+ let subsidiariesData = [];
259
+ if (identity.name) {
260
+ const brandName2 = (identity.name || '').replace(/\s*(GRP|SAS|SARL|SA|SCI|EURL|GROUP|GROUPE|HOLDING|SNC|SASU)\s*/gi, ' ').trim();
261
+ section(` 🏭 Filiales / Entités liées`);
262
+ console.log(chalk.gray(` Searching for "${brandName2}" entities...`));
263
+ try {
264
+ const { subsidiaries, fromCache: subsFromCache } = await pappersSearchSubsidiaries(identity.name, identity.siren);
265
+ if (subsFromCache) console.log(chalk.gray(' ✓ Subsidiaries loaded from cache (0 API credits)'));
266
+ subsidiariesData = subsidiaries;
267
+ if (subsidiaries.length > 0) {
268
+ const subTable = new Table({
269
+ head: ['Entité', 'Ville', 'CA', 'Résultat', 'Effectif'].map(h => chalk.cyan.bold(h)),
270
+ style: { head: [], border: ['grey'] },
271
+ colAligns: ['left', 'left', 'right', 'right', 'left'],
272
+ });
273
+ for (const s of subsidiaries) {
274
+ subTable.push([
275
+ chalk.white(s.name),
276
+ chalk.gray(s.ville || '—'),
277
+ s.ca != null ? chalk.white(formatEuro(s.ca)) : chalk.gray('—'),
278
+ s.resultat != null
279
+ ? (s.resultat >= 0 ? chalk.green(formatEuro(s.resultat)) : chalk.red(formatEuro(s.resultat)))
280
+ : chalk.gray('—'),
281
+ chalk.gray(s.effectif || '—'),
282
+ ]);
283
+ }
284
+ console.log(subTable.toString());
285
+ } else {
286
+ console.log(chalk.gray(' Aucune filiale trouvée.'));
287
+ }
288
+ } catch (e) {
289
+ warn(` Subsidiary search failed: ${e.message}`);
290
+ }
291
+ }
292
+
218
293
  // ── Press & mentions ───────────────────────────────────────────────────────
219
294
  let pressResults = [];
295
+ let brandName = '';
296
+ let companyArticles = [];
220
297
  if (identity.name) {
221
298
  section(' 📣 Presse & réputation');
222
299
  console.log(chalk.gray(` Searching mentions for "${identity.name}"...`));
223
300
  try {
224
- const press = await searchPressMentions(identity.name);
301
+ // Use short brand name (without GRP, SAS, etc.) for better search results
302
+ brandName = (identity.name || '').replace(/\s*(GRP|SAS|SARL|SA|SCI|EURL|GROUP|GROUPE|HOLDING|SNC|SASU)\s*/gi, ' ').trim() || identity.name;
303
+ const press = await searchPressMentions(brandName);
225
304
  pressResults = press.mentions || [];
305
+
306
+ // Additional M&A-focused search to catch acquisitions/deals
307
+ try {
308
+ const { braveWebSearch } = await import('../scrapers/brave-search.js');
309
+ await new Promise(r => setTimeout(r, 600));
310
+ const maSearch = await braveWebSearch(`"${brandName}" acquisition OR rachat OR "entrée au capital" OR "prise de participation"`, { count: 10 });
311
+ for (const r of (maSearch.results || [])) {
312
+ const text = ((r.title || '') + ' ' + (r.snippet || '')).toLowerCase();
313
+ if (!text.includes(brandName.toLowerCase())) continue;
314
+ if (pressResults.some(m => m.url === r.url)) continue;
315
+ const { analyzeSentiment } = await import('../utils/sentiment.js');
316
+ const sent = analyzeSentiment(r.title + ' ' + r.snippet);
317
+ pressResults.push({ source: 'ma-search', url: r.url, domain: r.domain, title: r.title, snippet: r.snippet?.substring(0, 300), sentiment: sent.label, category: 'ma' });
318
+ }
319
+ } catch (_) { /* silent */ }
320
+
321
+ // ── Deep press: company website + LinkedIn ─────────────────────────────
322
+ const { readFileSync, writeFileSync, existsSync, mkdirSync } = await import('fs');
323
+ const { join: pathJoin } = await import('path');
324
+ const { homedir } = await import('os');
325
+ const pressCache = pathJoin(homedir(), '.intelwatch', 'cache', 'press');
326
+ const pressCacheFile = pathJoin(pressCache, `${siren}.json`);
327
+ const PRESS_CACHE_TTL = 3 * 24 * 60 * 60 * 1000; // 3 days
328
+
329
+ // Try loading from cache
330
+ let cacheHit = false;
331
+ if (existsSync(pressCacheFile)) {
332
+ try {
333
+ const cached = JSON.parse(readFileSync(pressCacheFile, 'utf8'));
334
+ if (Date.now() - cached.ts < PRESS_CACHE_TTL) {
335
+ companyArticles = cached.articles || [];
336
+ cacheHit = true;
337
+ console.log(chalk.gray(` Deep scan: loaded ${companyArticles.length} articles from cache`));
338
+ }
339
+ } catch (_) { /* corrupt cache, re-scrape */ }
340
+ }
341
+
342
+ if (!cacheHit) {
343
+ // Crawl company's own website for blog/news pages
344
+ const companyDomain = (identity.website
345
+ ? identity.website.replace(/\/$/, '')
346
+ : `https://www.${brandName.toLowerCase().replace(/\s+/g, '')}.com`);
347
+ const blogPaths = ['/blog', '/actualites', '/news', '/communiques', '/presse', '/press', '/media'];
348
+ console.log(chalk.gray(` Deep scan: checking ${companyDomain} for press releases...`));
349
+
350
+ for (const blogPath of blogPaths) {
351
+ try {
352
+ const resp = await fetch(`${companyDomain}${blogPath}`, {
353
+ headers: { 'User-Agent': 'Mozilla/5.0 (compatible; intelwatch/1.1)' },
354
+ signal: AbortSignal.timeout(5000),
355
+ redirect: 'follow',
356
+ });
357
+ if (!resp.ok) continue;
358
+ const html = await resp.text();
359
+
360
+ // Collect article links (absolute + relative, on target domain only)
361
+ const links = new Set();
362
+ const absRegex = /href="(https?:\/\/[^"]*(?:blog|actualit|news|communiqu|press|article)[^"]*)"/gi;
363
+ let m;
364
+ while ((m = absRegex.exec(html)) !== null) {
365
+ try {
366
+ const u = new URL(m[1]);
367
+ if (u.hostname === new URL(companyDomain).hostname) links.add(m[1]);
368
+ } catch (_) {}
369
+ if (links.size >= 15) break;
370
+ }
371
+ const relRegex = /href="(\/(?:blog|actualit|news|communiqu|press|article)[^"]*)"/gi;
372
+ while ((m = relRegex.exec(html)) !== null) {
373
+ try { links.add(new URL(m[1], companyDomain).href); } catch (_) {}
374
+ if (links.size >= 15) break;
375
+ }
376
+
377
+ // Scrape each article (max 10)
378
+ for (const url of [...links].slice(0, 10)) {
379
+ try {
380
+ const artResp = await fetch(url, {
381
+ headers: { 'User-Agent': 'Mozilla/5.0 (compatible; intelwatch/1.1)' },
382
+ signal: AbortSignal.timeout(5000),
383
+ });
384
+ if (!artResp.ok) continue;
385
+ const artHtml = await artResp.text();
386
+ const titleMatch = artHtml.match(/<title[^>]*>([^<]+)<\/title>/i);
387
+ const title = titleMatch ? titleMatch[1].trim() : url;
388
+ const text = artHtml
389
+ .replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
390
+ .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
391
+ .replace(/<[^>]+>/g, ' ')
392
+ .replace(/\s+/g, ' ')
393
+ .trim()
394
+ .substring(0, 2000);
395
+ companyArticles.push({ url, title, content: text, source: 'company-website' });
396
+ await new Promise(r => setTimeout(r, 300));
397
+ } catch (_) {}
398
+ }
399
+ if (companyArticles.length > 0) break; // found a working section
400
+ } catch (_) {}
401
+ }
402
+ console.log(chalk.gray(` Found ${companyArticles.length} company articles`));
403
+
404
+ // Company website M&A articles via Brave (more reliable than crawling)
405
+ try {
406
+ const { braveWebSearch: braveSearch3 } = await import('../scrapers/brave-search.js');
407
+ const domain = (() => { try { return new URL(companyDomain).hostname; } catch { return ''; } })();
408
+ if (domain) {
409
+ await new Promise(r => setTimeout(r, 600));
410
+ const siteSearch = await braveSearch3(
411
+ `site:${domain} acquisition OR rapprochement OR capital OR croissance OR partenariat OR intègre`,
412
+ { count: 10 },
413
+ );
414
+ for (const r of (siteSearch.results || [])) {
415
+ if (companyArticles.some(a => a.url === r.url)) continue;
416
+ // Scrape content
417
+ try {
418
+ const artResp = await fetch(r.url, {
419
+ headers: { 'User-Agent': 'Mozilla/5.0 (compatible; intelwatch/1.1)' },
420
+ signal: AbortSignal.timeout(5000),
421
+ });
422
+ if (artResp.ok) {
423
+ const artHtml = await artResp.text();
424
+ const text = artHtml.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '').replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '').replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim().substring(0, 2000);
425
+ const titleMatch = artHtml.match(/<title[^>]*>([^<]+)<\/title>/i);
426
+ companyArticles.push({ url: r.url, title: titleMatch?.[1]?.trim() || r.title, content: text, source: 'company-website' });
427
+ await new Promise(r2 => setTimeout(r2, 300));
428
+ }
429
+ } catch {}
430
+ }
431
+ console.log(chalk.gray(` + ${(siteSearch.results || []).length} site:${domain} results`));
432
+ }
433
+ } catch {}
434
+
435
+ // LinkedIn posts via Brave
436
+ try {
437
+ const { braveWebSearch: braveSearch2 } = await import('../scrapers/brave-search.js');
438
+ await new Promise(r => setTimeout(r, 600));
439
+ const linkedinSearch = await braveSearch2(
440
+ `site:linkedin.com "${brandName}" acquisition OR croissance OR chiffre OR recrutement OR partenariat`,
441
+ { count: 10 },
442
+ );
443
+ const liCount = (linkedinSearch.results || []).length;
444
+ for (const r of (linkedinSearch.results || [])) {
445
+ companyArticles.push({ url: r.url, title: r.title, content: r.snippet || '', source: 'linkedin' });
446
+ }
447
+ console.log(chalk.gray(` + ${liCount} LinkedIn results`));
448
+ } catch (_) {}
449
+
450
+ // Persist cache
451
+ try {
452
+ if (!existsSync(pressCache)) mkdirSync(pressCache, { recursive: true });
453
+ writeFileSync(pressCacheFile, JSON.stringify({ ts: Date.now(), articles: companyArticles }), 'utf8');
454
+ } catch (_) {}
455
+ }
456
+
457
+ // Add company blog articles to pressResults
458
+ for (const art of companyArticles.filter(a => a.source === 'company-website')) {
459
+ const { analyzeSentiment } = await import('../utils/sentiment.js');
460
+ const sent = analyzeSentiment(art.title + ' ' + (art.content || '').substring(0, 500));
461
+ pressResults.push({
462
+ source: 'company-blog',
463
+ url: art.url,
464
+ domain: (() => { try { return new URL(art.url).hostname; } catch (_) { return ''; } })(),
465
+ title: art.title,
466
+ snippet: (art.content || '').substring(0, 300),
467
+ sentiment: sent.label,
468
+ category: 'company',
469
+ });
470
+ }
471
+
226
472
  if (press.mentionCount > 0) {
227
473
  const bd = press.mentions.reduce((acc, m) => {
228
474
  const k = /positive/.test(m.sentiment) ? 'positive'
@@ -243,61 +489,442 @@ export async function runMA(sirenOrName, options) {
243
489
  }
244
490
  }
245
491
 
246
- // ── AI Summary ────────────────────────────────────────────────────────────
492
+ // ── Scrape content of top M&A articles for deeper analysis ────────────────
493
+ let scrapedMaContent = [];
494
+ try {
495
+ // Combine press results + raw M&A search results for scraping
496
+ const allMaSources = [
497
+ ...pressResults.filter(m => m.category === 'ma' || /acquisition|rachat|rapprochement|capital|cession|intègre|accueille|rejoint|fusionne|clôture|progression|croissance/i.test(m.title || '')),
498
+ ];
499
+ // Deduplicate by URL
500
+ const seenUrls = new Set();
501
+ const maArticles = allMaSources.filter(a => {
502
+ if (!a.url || seenUrls.has(a.url)) return false;
503
+ seenUrls.add(a.url);
504
+ // Skip non-article pages (pappers, linkedin profiles)
505
+ if (/pappers\.fr|linkedin\.com\/company|linkedin\.com\/in/i.test(a.url)) return false;
506
+ return true;
507
+ });
508
+ for (const article of maArticles.slice(0, 8)) {
509
+ try {
510
+ const response = await fetch(article.url, {
511
+ headers: { 'User-Agent': 'Mozilla/5.0 (compatible; intelwatch/1.1)' },
512
+ signal: AbortSignal.timeout(5000),
513
+ });
514
+ const html = await response.text();
515
+ const text = html.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
516
+ .replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
517
+ .replace(/<[^>]+>/g, ' ')
518
+ .replace(/\s+/g, ' ')
519
+ .trim()
520
+ .substring(0, 2000);
521
+ scrapedMaContent.push({ url: article.url, title: article.title, content: text, source: 'press' });
522
+ await new Promise(r => setTimeout(r, 300));
523
+ } catch (_) { /* skip failed fetches */ }
524
+ }
525
+ } catch (_) { /* silent */ }
526
+
527
+ // Merge company website + LinkedIn articles into scraped content
528
+ for (const art of companyArticles) {
529
+ scrapedMaContent.push(art);
530
+ }
531
+
532
+ // ── Build M&A timeline IN CODE (before AI — dates are authoritative) ──────
533
+ const parentBrandForMa = (identity.name || '')
534
+ .replace(/\s*(GRP|SAS|SARL|SA|SCI|EURL|GROUP|GROUPE|HOLDING|SNC|SASU)\s*/gi, ' ')
535
+ .trim().toLowerCase().split(' ')[0];
536
+ const offBrandSubsForMa = subsidiariesData.filter(
537
+ s => !s.name?.toLowerCase().includes(parentBrandForMa)
538
+ );
539
+ const codeBuiltMaHistory = buildMaHistoryFromCode(scrapedMaContent, offBrandSubsForMa);
540
+
541
+ // ── AI Analysis ───────────────────────────────────────────────────────────
542
+ let aiAnalysis = null;
247
543
  if (options.ai) {
248
- section(' 🤖 Synthèse IA — Due Diligence');
544
+ section(' 🤖 Analyse IA — Due Diligence');
249
545
  if (!hasAIKey()) {
250
546
  warn(' No AI API key. Set OPENAI_API_KEY or ANTHROPIC_API_KEY.');
251
547
  } else {
252
- console.log(chalk.gray(' Generating AI due diligence summary...'));
548
+ console.log(chalk.gray(' Generating AI due diligence analysis (JSON)...'));
253
549
  try {
550
+ // Compute year-over-year revenue growth from consolidated (preferred) or entity finances
551
+ const finSource = consolidatedFinances?.length ? consolidatedFinances : financialHistory;
552
+ const sortedFin = [...finSource].filter(f => f.ca != null).sort((a, b) => (a.annee || 0) - (b.annee || 0));
553
+ const rawGrowthData = [];
554
+ for (let i = 1; i < sortedFin.length; i++) {
555
+ const prev = sortedFin[i - 1];
556
+ const curr = sortedFin[i];
557
+ if (prev.ca > 0) {
558
+ const pct = ((curr.ca - prev.ca) / prev.ca * 100).toFixed(1);
559
+ rawGrowthData.push({ period: `${prev.annee}→${curr.annee}`, from: formatEuro(prev.ca), to: formatEuro(curr.ca), growthPct: `${pct}%`, delta: formatEuro(curr.ca - prev.ca) });
560
+ }
561
+ }
562
+ const growthDataSource = consolidatedFinances?.length ? 'consolidated group' : 'entity only';
563
+
254
564
  const finSummary = financialHistory
255
565
  .map(f => `${f.annee}: CA=${f.ca != null ? formatEuro(f.ca) : 'N/A'}, Résultat=${f.resultat != null ? formatEuro(f.resultat) : 'N/A'}, CP=${f.capitauxPropres != null ? formatEuro(f.capitauxPropres) : 'N/A'}`)
256
566
  .join('\n') || 'Non disponible';
257
567
 
568
+ const consFinSummary = consolidatedFinances.length
569
+ ? consolidatedFinances.map(f => `${f.annee}: CA consolidé=${f.ca != null ? formatEuro(f.ca) : 'N/A'}, Résultat=${f.resultat != null ? formatEuro(f.resultat) : 'N/A'}`).join('\n')
570
+ : 'Non disponible';
571
+
258
572
  const dirStr = dirigeants
259
- .map(d => `${d.prenom || ''} ${d.nom || ''} (${d.role || '?'}): ${d.mandats.length} mandats`)
260
- .join(', ') || 'Non disponible';
573
+ .map(d => `- ${[d.prenom, d.nom].filter(Boolean).join(' ')} (${d.role || '?'}): ${d.mandats.length} mandats dans d'autres sociétés`)
574
+ .join('\n') || 'Non disponible';
575
+
576
+ const uboStr = ubo.length
577
+ ? ubo.map(b => `- ${[b.prenom, b.nom].filter(Boolean).join(' ')}: ${b.pourcentageParts ?? '?'}% parts, nationalité: ${b.nationalite || '?'}`).join('\n')
578
+ : 'Non déclaré';
579
+
580
+ // ── Refresh stale subsidiary financials via Brave/Pappers ────────────
581
+ const currentYear = new Date().getFullYear();
582
+ const staleThreshold = currentYear - 2; // CA must be from at least N-2
583
+ const staleSubs = subsidiariesData.filter(s => {
584
+ const caYear = s.annee || s.caYear;
585
+ return s.ca && caYear && caYear < staleThreshold;
586
+ });
587
+ if (staleSubs.length > 0) {
588
+ console.log(chalk.gray(` 🔄 ${staleSubs.length} subsidiaries with stale financials (< ${staleThreshold}), refreshing from Pappers...`));
589
+ const apiKey = process.env.PAPPERS_API_KEY;
590
+ for (const stale of staleSubs.slice(0, 5)) {
591
+ if (!stale.siren || !apiKey) continue;
592
+ try {
593
+ // Direct Pappers API call for fresh financials
594
+ const resp = await fetch(`https://api.pappers.fr/v1/entreprise?api_token=${apiKey}&siren=${stale.siren}`, {
595
+ headers: { 'User-Agent': 'intelwatch/1.1' },
596
+ signal: AbortSignal.timeout(8000),
597
+ });
598
+ if (!resp.ok) {
599
+ if (resp.status === 402) { console.log(chalk.yellow(` ⚠ Pappers credits exhausted, skipping refresh`)); break; }
600
+ continue;
601
+ }
602
+ const d = await resp.json();
603
+ const latestFin = (d.finances || [])[0];
604
+ if (latestFin?.annee && latestFin.annee > (stale.annee || 0)) {
605
+ const oldYear = stale.annee;
606
+ stale.ca = latestFin.chiffre_affaires ?? stale.ca;
607
+ stale.resultat = latestFin.resultat ?? stale.resultat;
608
+ stale.annee = latestFin.annee;
609
+ console.log(chalk.gray(` ✓ ${stale.name}: ${oldYear} → ${latestFin.annee}, CA ${latestFin.chiffre_affaires ? (latestFin.chiffre_affaires / 1e6).toFixed(1) + 'M€' : 'unchanged'}`));
610
+ // Update cache
611
+ const { setCache } = await import('../scrapers/pappers.js');
612
+ setCache(stale.siren, {
613
+ identity: { name: d.nom_entreprise, nafCode: d.code_naf, nafLabel: d.libelle_code_naf, ville: d.siege?.ville, effectifTexte: d.effectif, dateCreation: d.date_creation },
614
+ financialHistory: (d.finances || []).map(f => ({ ca: f.chiffre_affaires, resultat: f.resultat, annee: f.annee, ebitda: f.excedent_brut_exploitation, margeEbitda: f.taux_marge_EBITDA, dettesFinancieres: f.dettes_financieres, tresorerie: f.tresorerie, fondsPropres: f.fonds_propres, bfr: f.BFR, ratioEndettement: f.ratio_endettement, autonomieFinanciere: f.autonomie_financiere, rentabiliteFP: f.rentabilite_fonds_propres, margeNette: f.marge_nette, capaciteAutofinancement: f.capacite_autofinancement })),
615
+ _subCache: true,
616
+ });
617
+ } else {
618
+ console.log(chalk.gray(` — ${stale.name}: no newer data (latest: ${latestFin?.annee || 'none'})`));
619
+ }
620
+ await new Promise(r => setTimeout(r, 300));
621
+ } catch (_) {}
622
+ }
623
+ }
624
+
625
+ const parentBrand = (identity.name || '').replace(/\s*(GRP|SAS|SARL|SA|SCI|EURL|GROUP|GROUPE|HOLDING|SNC|SASU)\s*/gi, ' ').trim().toLowerCase().split(' ')[0];
626
+ const brandedSubs = subsidiariesData.filter(s => s.name?.toLowerCase().includes(parentBrand));
627
+ const offBrandSubs = subsidiariesData.filter(s => !s.name?.toLowerCase().includes(parentBrand));
628
+
629
+ const subsStr = subsidiariesData.length
630
+ ? `${subsidiariesData.length} subsidiaries total.\n\n` +
631
+ `BRANDED subsidiaries (organic/internal, name contains "${parentBrand}"):\n` +
632
+ (brandedSubs.length ? brandedSubs.slice(0, 10).map(s => `- ${s.name} (SIREN: ${s.siren}): CA ${formatEuro(s.ca)}${s.annee ? ' ('+s.annee+')' : ''}, Résultat ${s.resultat != null ? formatEuro(s.resultat) : 'N/A'}, ${s.ville}`).join('\n') : '(none)') +
633
+ `\n\nOFF-BRAND subsidiaries (likely ACQUIRED — each is a potential M&A deal):\n` +
634
+ (offBrandSubs.length ? offBrandSubs.slice(0, 15).map(s => `- ${s.name} (SIREN: ${s.siren}): CA ${formatEuro(s.ca)}${s.annee ? ' ('+s.annee+')' : ''}, Résultat ${s.resultat != null ? formatEuro(s.resultat) : 'N/A'}, ${s.ville}${s.dateCreation ? ', created: ' + s.dateCreation : ''}`).join('\n') : '(none)') +
635
+ `\n\nFor M&A history: each off-brand subsidiary represents a confirmed acquisition (confidence: confirmed_registry). Cross-reference with press articles and BODACC for acquisition dates.`
636
+ : 'Aucune filiale identifiée';
637
+
638
+ const bodaccStr = bodacc.length
639
+ ? bodacc.slice(0, 30).map(b => `- [${b.date || '?'}] ${b.type}: ${b.description || ''}${b.details ? ' — ' + b.details : ''}`).join('\n')
640
+ : 'Aucune publication';
261
641
 
262
- const uboStr = ubo
263
- .map(b => `${b.prenom || ''} ${b.nom || ''}: ${b.pourcentageParts ?? '?'}% parts`)
264
- .join(', ') || 'Non déclaré';
642
+ const pressStr = pressResults.length
643
+ ? pressResults.slice(0, 20).map(m => `- [${m.sentiment}] ${m.title || ''} (${m.domain || m.source || ''})${m.url ? ' — URL: ' + m.url : ''}`).join('\n')
644
+ : 'Aucune mention';
265
645
 
266
646
  const procStr = proceduresCollectives.length
267
- ? proceduresCollectives.map(p => `${p.date || '?'}: ${p.type || '?'}`).join(', ')
647
+ ? proceduresCollectives.map(p => `- [${p.date || '?'}] ${p.type || '?'}: ${p.jugement || ''}`).join('\n')
268
648
  : 'Aucune';
269
649
 
270
- const systemPrompt = 'Tu es un analyste Deep Profile expert. Rédige une synthèse de due diligence concise et professionnelle en français.';
271
- const userPrompt = `Synthèse due diligence pour ${identity.name} (SIREN: ${identity.siren})
650
+ const repStr = representants?.length
651
+ ? representants.map(r => `- ${r.personneMorale ? '[PM]' : '[PP]'} ${r.nom} — ${r.qualite}${r.siren ? ' (SIREN: ' + r.siren + ')' : ''}`).join('\n')
652
+ : 'Non disponible';
653
+
654
+ const systemPrompt = `You are an expert M&A analyst specialized in mid-market due diligence. Analyze the company data provided and return ONLY valid JSON according to the requested schema. No text before or after the JSON. No markdown blocks. Be factual, sourced, no speculation. ALL text output (summaries, strengths, weaknesses, descriptions) MUST be in English.
655
+
656
+ RÈGLES CRITIQUES :
657
+ 1. HOLDING vs GROUPE : les données "entité" (effectifs, CA) sont celles de la HOLDING (société mère). Les données "consolidées" sont celles du GROUPE ENTIER. Ne confonds JAMAIS les deux. Si la holding a 5 salariés mais le groupe consolide 60M€ de CA, c'est un GRAND groupe. Base ton analyse sur les chiffres consolidés quand disponibles.
658
+ 2. CONCURRENTS : identifie des concurrents dont le CA FRANCE est dans la fourchette 0.5x à 2x du CA consolidé de la cible. Par exemple si la cible fait 62M€, les concurrents doivent être entre 30M€ et 125M€ de CA EN FRANCE (pas mondial). JAMAIS citer un CA mondial/global — toujours le CA France. JAMAIS les Big 4 (KPMG, Deloitte, EY, PwC). JAMAIS Mazars (>1B€ mondial), Fiducial (>2B€ mondial) sauf si leur CA France est comparable. Pour un cabinet comptable mid-market français à 62M€, pense plutôt : In Extenso (~60-70M€ France), Baker Tilly (~60M€ France), RSM (~55M€ France), Grant Thornton (~60-80M€ France), Crowe (~40M€ France).
659
+ 3. CROISEMENT PRESSE : si un article de presse mentionne une acquisition, une entrée au capital (ex: fonds PE), un rachat, un partenariat — INCLUS-LE dans groupStructure et maHistory avec l'URL source. La presse révèle souvent des opérations avant le registre.
660
+ 4. REPRÉSENTANTS : les personnes morales (PM) au capital sont souvent des fonds PE, des holdings familiales ou des véhicules d'investissement. Identifie-les et intègre-les dans la structure du groupe. Si tu reconnais un fonds PE connu (BPI France, IK Partners, Ardian, etc.), mentionne-le explicitement.
661
+ 5. SCORING : évalue la santé financière sur le CA CONSOLIDÉ (pas holding). Échelle 0-100 : croissance CA consolidé, rentabilité consolidée, stabilité, diversification géographique/sectorielle, gouvernance.`;
662
+
663
+ const userPrompt = `Analyse de due diligence pour ${identity.name} (SIREN: ${identity.siren})
272
664
 
273
- **Identité**
274
- - Forme: ${identity.formeJuridique || '?'}
275
- - Création: ${identity.dateCreation || '?'}
276
- - Effectifs: ${identity.effectifs || '?'}
277
- - NAF: ${identity.nafCode} ${identity.nafLabel}
278
- - Capital: ${identity.capital != null ? formatEuro(identity.capital) : '?'}
665
+ === IDENTITÉ ===
666
+ Forme: ${identity.formeJuridique || '?'}, NAF: ${identity.nafCode || '?'} — ${identity.nafLabel || '?'}
667
+ Création: ${identity.dateCreation || '?'}, Effectifs: ${identity.effectifs || '?'}
668
+ Capital: ${identity.capital != null ? formatEuro(identity.capital) : '?'}
669
+ Effectif holding: ${identity.effectifTexte || identity.effectifs || '?'} (ATTENTION: c'est la holding, pas le groupe)
670
+ Adresse: ${[identity.adresse, identity.codePostal, identity.ville].filter(Boolean).join(' ') || '?'}
671
+ Objet social: ${identity.objetSocial || 'Non disponible'}
672
+ Nombre de filiales identifiées: ${subsidiariesData.length || 0}
279
673
 
280
- **Dirigeants**
674
+ === DIRIGEANTS ===
281
675
  ${dirStr}
282
676
 
283
- **UBO (${ubo.length})**
677
+ === REPRÉSENTANTS / ACTIONNAIRES ===
678
+ ${repStr}
679
+
680
+ === BÉNÉFICIAIRES EFFECTIFS (UBO) ===
284
681
  ${uboStr}
285
682
 
286
- **Historique financier**
683
+ === FINANCES (entité) ===
287
684
  ${finSummary}
288
685
 
289
- **Procédures collectives**
686
+ === FINANCES CONSOLIDÉES (groupe) ===
687
+ ${consFinSummary}
688
+
689
+ === FILIALES / ENTITÉS LIÉES ===
690
+ ${subsStr}
691
+
692
+ === PUBLICATIONS BODACC ===
693
+ ${bodaccStr}
694
+
695
+ === PROCÉDURES COLLECTIVES ===
290
696
  ${procStr}
291
697
 
292
- **Publications BODACC récentes**
293
- ${bodacc.slice(0, 5).map(b => `${b.date || '?'}: ${b.type || '?'}`).join(', ') || 'Aucune'}
698
+ === PRESSE (${pressResults.length} mentions) ===
699
+ ${pressStr}
700
+
701
+ === SCRAPED M&A ARTICLES (press sources) ===
702
+ ${scrapedMaContent.filter(a => a.source !== 'company-website' && a.source !== 'linkedin').length
703
+ ? scrapedMaContent.filter(a => a.source !== 'company-website' && a.source !== 'linkedin').map(a => `--- ${a.title} (${a.url}) ---\n${a.content}`).join('\n\n')
704
+ : 'None scraped'}
705
+
706
+ === COMPANY WEBSITE ARTICLES (from target's own blog/news) ===
707
+ ${scrapedMaContent.filter(a => a.source === 'company-website').length
708
+ ? scrapedMaContent.filter(a => a.source === 'company-website').map(a => `--- ${a.title} (${a.url}) ---\n${a.content}`).join('\n\n')
709
+ : 'None found'}
710
+
711
+ === LINKEDIN MENTIONS ===
712
+ ${scrapedMaContent.filter(a => a.source === 'linkedin').length
713
+ ? scrapedMaContent.filter(a => a.source === 'linkedin').map(a => `--- ${a.title} (${a.url}) ---\n${a.content}`).join('\n\n')
714
+ : 'None found'}
715
+
716
+ === PRE-BUILT M&A TIMELINE (use these entries, add descriptions only) ===
717
+ IMPORTANT: The following entries have AUTHORITATIVE dates extracted from press articles and registry data.
718
+ Your job is ONLY to add a 2-3 sentence description to each entry. Do NOT change dates, types, or targets. Do NOT add or remove entries. Copy all entries exactly into the maHistory array.
719
+
720
+ ${codeBuiltMaHistory.length
721
+ ? codeBuiltMaHistory.map((e, i) =>
722
+ `[${i+1}] date:${e.date} | type:${e.type} | target:${e.target} | confidence:${e.confidence}${e.sourceUrl ? ' | source:'+e.sourceUrl : ''}`
723
+ ).join('\n')
724
+ : 'No pre-built entries (use best effort from articles)'}
725
+
726
+ === CROISSANCE REVENUE ===
727
+ Source: ${growthDataSource}
728
+ ${rawGrowthData.length ? rawGrowthData.map(g => `${g.period}: ${g.from} → ${g.to} (${g.growthPct})`).join('\n') : 'Données insuffisantes pour calculer la croissance'}
729
+
730
+ Retourne ce JSON exact (remplace les valeurs par l'analyse réelle) :
731
+ {
732
+ "executiveSummary": "Write 4-6 detailed paragraphs (at least 300 words total) covering: company profile and history, governance and ownership structure, financial performance and trends (use consolidated figures), group structure and key subsidiaries, market positioning and competitive landscape. Be specific with numbers, names, and dates.",
733
+ "groupStructure": {
734
+ "description": "narrative description of ownership structure",
735
+ "shareholders": [
736
+ {"entity": "Shareholder/Fund name", "role": "Private Equity Fund|Co-investor|Holding", "stake": "majority|minority|XX%", "confidence": "confirmed_registry|confirmed_press", "sourceUrl": null}
737
+ ],
738
+ "target": {"entity": "TARGET COMPANY NAME", "role": "Target Company", "revenue": "62M€ (2024)"},
739
+ "subsidiaries": [
740
+ {"entity": "Key subsidiary name", "revenue": "XX M€ (YYYY)"}
741
+ ]
742
+ // shareholders = ONLY entities ABOVE the target (PE funds, investors, holdings). Ordered by investment weight (largest first).
743
+ // target = the company being analyzed.
744
+ // subsidiaries = top 7 subsidiaries by revenue, mixing BOTH branded AND off-brand. Include acquired entities like Exelmans, Greece 133, Alcyon. Example: ENDRIX LYO 18.8M€, GREECE 133 12.6M€, GE EXELMANS ADVISORY 9.1M€, ENDRIX IDF 8.5M€...
745
+ },
746
+ "strengths": [
747
+ {"text": "2-3 sentences describing the strength with specific numbers, dates, or facts. Not generic.", "confidence": "confirmed_registry|confirmed_press", "sourceUrl": null}
748
+ ],
749
+ "weaknesses": [
750
+ {"text": "2-3 sentences describing the weakness with specific evidence. Not generic.", "confidence": "confirmed_registry|confirmed_press", "sourceUrl": null}
751
+ ],
752
+ "competitors": [
753
+ // MINIMUM 5 competitors. Include direct competitors in same NAF sector with comparable consolidated revenue.
754
+ {"name": "competitor name", "reason": "why they are a direct competitor (2-3 sentences)", "estimatedRevenue": "estimated revenue range", "summary": "3-4 sentences describing this competitor: their size, market position, key differentiators vs the target company, and recent strategic moves"}
755
+ ],
756
+ "maHistory": [
757
+ {"date": "YYYY-MM or YYYY", "type": "acquisition|cession|fusion|restructuration|capital_increase|creation", "target": "name of acquired/merged entity", "description": "2-3 sentences: what happened, estimated deal size if known, strategic rationale", "confidence": "confirmed_registry|confirmed_press|unconfirmed", "sourceUrl": "URL or null"}
758
+ ],
759
+ "riskAssessment": {
760
+ "overall": "low|medium|high|critical",
761
+ "flags": [
762
+ {"severity": "low|medium|high|critical", "text": "risque identifié avec détail", "confidence": "confirmed_registry", "sourceUrl": null}
763
+ ]
764
+ },
765
+ "healthScore": {
766
+ "score": 75,
767
+ "breakdown": {
768
+ "growth": {"score": 80, "comment": "explication courte"},
769
+ "profitability": {"score": 70, "comment": "explication courte"},
770
+ "stability": {"score": 75, "comment": "explication courte"},
771
+ "diversification": {"score": 60, "comment": "explication courte"},
772
+ "governance": {"score": 50, "comment": "explication courte"}
773
+ }
774
+ },
775
+ "growthAnalysis": {
776
+ "consolidatedGrowth": [
777
+ {"period": "2023→2024", "fromRevenue": "58.2M€", "toRevenue": "62.0M€", "growthPct": "6.5%", "organic": "~3%", "external": "~3.5%", "comment": "short description of what drove growth this period"}
778
+ ],
779
+ "growthQuality": "mixed",
780
+ "aiComment": "Write 2-3 sentences analyzing growth quality: what drove it (organic expansion vs acquisitions), sustainability, and outlook. Reference specific subsidiaries or deals if applicable."
781
+ },
782
+ "forwardLooking": {
783
+ "announcedRevenue": null,
784
+ "announcedHeadcount": null,
785
+ "announcedAcquisitions": [],
786
+ "projectedGrowth": null,
787
+ "aiComment": "Write 2-3 sentences comparing announced/projected figures vs last deposited data. If no forward data found in press, explain what the growth trajectory suggests for next fiscal year based on historical trends."
788
+ }
789
+ }
790
+
791
+ Règles: confidence="confirmed_registry" si la donnée vient des données Pappers fournies, "confirmed_press" + sourceUrl si d'un article de presse listé ci-dessus, "unconfirmed" sinon.
792
+
793
+ OBLIGATOIRE :
794
+ - Minimum 3 forces et 3 faiblesses
795
+ - Minimum 5 concurrents de taille comparable (CA consolidé similaire, même code NAF ${identity.nafCode || ''})
796
+ - Le score de santé doit être basé sur les finances CONSOLIDÉES si disponibles
797
+ - Ne mentionne JAMAIS que la holding a peu d'employés comme faiblesse — c'est normal pour une holding, les employés sont dans les filiales
798
+ - maHistory: The PRE-BUILT M&A TIMELINE above contains ALL entries with AUTHORITATIVE dates and types.
799
+ RULES:
800
+ 1) Copy ALL entries from PRE-BUILT M&A TIMELINE exactly (same date, type, target, confidence, sourceUrl).
801
+ 2) For each entry, write a 2-3 sentence description explaining: what happened, the strategic rationale, estimated deal context if known.
802
+ 3) Do NOT invent dates. Do NOT add entries not in the pre-built list. Do NOT remove entries.
803
+ 4) If pre-built list is empty, use best effort from articles (MINIMUM 5 entries).
804
+ Each entry: date (YYYY or YYYY-MM), type, target, description (2-3 sentences), confidence, sourceUrl.
805
+ - growthAnalysis.consolidatedGrowth: use the "CROISSANCE REVENUE" data provided. For organic vs external split: External growth = revenue attributable to OFF-BRAND subsidiaries acquired during the period (use their dateCreation, BODACC dates, or press article dates to determine when they joined the group). Organic growth = total growth minus external growth. Reference specific acquired subsidiaries by name in the comment field. If exact split cannot be determined, estimate based on the number and relative size of off-brand subsidiaries vs total group revenue.
806
+ - growthAnalysis.growthQuality: "organic-led" if >70% organic, "acquisition-led" if >70% external, "mixed" otherwise
807
+ - growthAnalysis.aiComment: list specific off-brand subsidiaries that contributed to external growth, with their estimated CA and acquisition period if known. Cross-reference press articles for revenue announcements or growth claims — if press mentions specific revenue figures (e.g. "100 millions", "105M€"), use them as data points and reference the article source by domain name.
808
+ - forwardLooking: ALWAYS populate ALL fields in this section. This is MANDATORY.
809
+ - announcedRevenue: Scan ALL scraped articles (company website + press + LinkedIn) for ANY revenue figure for a FUTURE or RECENT year not yet in the registry. Look for: "objectif de X millions", "CA de X", "chiffre d'affaires de X", "100 millions", "X M€", revenue targets. If found: {"amount": "100M€", "year": 2025, "confidence": "confirmed_press", "sourceUrl": "https://article-url"}. If NOT found: project from CAGR: {"amount": "66M€", "year": 2025, "confidence": "projected", "sourceUrl": null}
810
+ - announcedAcquisitions: list ALL acquisitions mentioned in press/company articles that are announced, in progress, or recently completed. Include Zalis if mentioned.
811
+ - projectedGrowth: ALWAYS fill this as a SHORT STRING like "+12% CAGR → ~70M€ projected 2025". NOT an object, just a string.
812
+ - aiComment: 3-4 sentences. Compare deposited (62M€ 2024) vs announced/projected. Be specific. If multiple revenue targets exist (e.g. 100M€ and 300M€), explain both.
813
+ - aiComment: 3-4 sentences comparing deposited vs announced/projected, discussing growth sustainability and outlook`;
814
+
815
+ const raw = await callAI(systemPrompt, userPrompt, { maxTokens: 3500 });
816
+ aiAnalysis = extractAIJSON(raw);
817
+
818
+ // Merge: take descriptions from AI, keep dates/types/targets from code-built array
819
+ if (aiAnalysis && codeBuiltMaHistory.length > 0) {
820
+ const aiMa = aiAnalysis.maHistory || [];
821
+ for (const codeEntry of codeBuiltMaHistory) {
822
+ const targetKey = (codeEntry.target || '').toLowerCase().split(' ')[0];
823
+ const aiMatch = aiMa.find(a => {
824
+ const aTarget = (a.target || '').toLowerCase();
825
+ return aTarget.includes(targetKey) || targetKey.includes(aTarget.split(' ')[0]);
826
+ });
827
+ if (aiMatch?.description && !codeEntry.description) {
828
+ codeEntry.description = aiMatch.description;
829
+ }
830
+ }
831
+ aiAnalysis.maHistory = codeBuiltMaHistory;
832
+ }
833
+
834
+ if (aiAnalysis) {
835
+ // Display executive summary
836
+ if (aiAnalysis.executiveSummary) {
837
+ console.log('\n' + chalk.white(aiAnalysis.executiveSummary) + '\n');
838
+ }
839
+
840
+ // Display strengths
841
+ if (aiAnalysis.strengths?.length) {
842
+ console.log(chalk.green.bold(' 💪 Forces :'));
843
+ for (const s of aiAnalysis.strengths.slice(0, 4)) {
844
+ console.log(chalk.green(` + ${s.text || s}`));
845
+ }
846
+ }
294
847
 
295
- Rédige une synthèse en 5 points : 1) Profil, 2) Gouvernance & actionnariat, 3) Situation financière, 4) Risques identifiés, 5) Points d'attention pour l'acquéreur. Sois factuel, max 400 mots.`;
848
+ // Display weaknesses
849
+ if (aiAnalysis.weaknesses?.length) {
850
+ console.log(chalk.red.bold(' ⚠️ Faiblesses :'));
851
+ for (const w of aiAnalysis.weaknesses.slice(0, 4)) {
852
+ console.log(chalk.red(` - ${w.text || w}`));
853
+ }
854
+ }
296
855
 
297
- const summary = await callAI(systemPrompt, userPrompt, { maxTokens: 600 });
298
- console.log('\n' + chalk.white(summary) + '\n');
856
+ // Display risk level
857
+ if (aiAnalysis.riskAssessment) {
858
+ const riskColor = { low: chalk.green, medium: chalk.yellow, high: chalk.red, critical: chalk.red.bold }[aiAnalysis.riskAssessment.overall] || chalk.gray;
859
+ console.log('\n ' + riskColor(`🎯 Risque global : ${(aiAnalysis.riskAssessment.overall || '?').toUpperCase()}`));
860
+ for (const f of (aiAnalysis.riskAssessment.flags || []).slice(0, 3)) {
861
+ const sevColor = { low: chalk.gray, medium: chalk.yellow, high: chalk.red, critical: chalk.red.bold }[f.severity] || chalk.gray;
862
+ console.log(sevColor(` [${f.severity || '?'}] ${f.text || ''}`));
863
+ }
864
+ }
865
+
866
+ // Display health score
867
+ if (aiAnalysis.healthScore) {
868
+ const hs = aiAnalysis.healthScore;
869
+ const scoreColor = hs.score >= 70 ? chalk.green : hs.score >= 50 ? chalk.yellow : chalk.red;
870
+ console.log('\n ' + scoreColor(`📊 Score santé financière : ${hs.score}/100`));
871
+ if (hs.breakdown) {
872
+ for (const [key, val] of Object.entries(hs.breakdown)) {
873
+ const c = val.score >= 70 ? chalk.green : val.score >= 50 ? chalk.yellow : chalk.red;
874
+ const label = { growth: 'Croissance', profitability: 'Rentabilité', stability: 'Stabilité', diversification: 'Diversification', governance: 'Gouvernance' }[key] || key;
875
+ console.log(c(` ${label}: ${val.score}/100 — ${val.comment || ''}`));
876
+ }
877
+ }
878
+ }
879
+
880
+ // Display competitors
881
+ if (aiAnalysis.competitors?.length) {
882
+ console.log(chalk.cyan.bold('\n 🏁 Concurrents identifiés :'));
883
+ for (const c of aiAnalysis.competitors) {
884
+ console.log(chalk.cyan(` • ${c.name}${c.estimatedRevenue ? ' — ' + c.estimatedRevenue : ''}`));
885
+ }
886
+ }
887
+
888
+ // Display growth analysis
889
+ if (aiAnalysis.growthAnalysis) {
890
+ const ga = aiAnalysis.growthAnalysis;
891
+ console.log(chalk.magenta.bold('\n 📈 Growth Analysis :'));
892
+ if (ga.consolidatedGrowth?.length) {
893
+ for (const g of ga.consolidatedGrowth) {
894
+ console.log(chalk.magenta(` ${g.period}: ${g.fromRevenue} → ${g.toRevenue} (${g.growthPct}) | Organic: ${g.organic || 'N/A'} | External: ${g.external || 'N/A'}`));
895
+ }
896
+ }
897
+ console.log(chalk.magenta(` Quality: ${ga.growthQuality || '?'}`));
898
+ }
899
+
900
+ // Display forward-looking indicators
901
+ if (aiAnalysis.forwardLooking) {
902
+ const fl = aiAnalysis.forwardLooking;
903
+ const hasData = fl.announcedRevenue || fl.announcedHeadcount || fl.announcedAcquisitions?.length;
904
+ if (hasData) {
905
+ console.log(chalk.yellow.bold('\n 🔮 Forward-Looking :'));
906
+ if (fl.announcedRevenue) {
907
+ console.log(chalk.yellow(` Revenue: ${fl.announcedRevenue.amount} (${fl.announcedRevenue.year}) [${fl.announcedRevenue.confidence}]`));
908
+ }
909
+ if (fl.projectedGrowth) {
910
+ const pgStr = typeof fl.projectedGrowth === 'object' ? JSON.stringify(fl.projectedGrowth) : fl.projectedGrowth;
911
+ console.log(chalk.yellow(` Projected growth: ${pgStr}`));
912
+ }
913
+ if (fl.announcedAcquisitions?.length) {
914
+ for (const acq of fl.announcedAcquisitions) {
915
+ console.log(chalk.yellow(` Acquisition: ${acq.target} (${acq.status})`));
916
+ }
917
+ }
918
+ }
919
+ }
920
+
921
+ console.log('');
922
+ } else {
923
+ // Fallback: display raw text
924
+ console.log('\n' + chalk.white(raw) + '\n');
925
+ }
299
926
  } catch (e) {
300
- warn(` AI summary failed: ${e.message}`);
927
+ warn(` AI analysis failed: ${e.message}`);
301
928
  }
302
929
  }
303
930
  }
@@ -318,12 +945,123 @@ Rédige une synthèse en 5 points : 1) Profil, 2) Gouvernance & actionnariat, 3)
318
945
  const pressMentions = [];
319
946
  if (pressResults?.length) {
320
947
  pressResults.forEach(m => {
321
- pressMentions.push({ title: m.title || '', source: m.source || '', sentiment: m.sentiment || 'neutral' });
948
+ pressMentions.push({ title: m.title || '', source: m.domain || m.source || '', url: m.url || '', sentiment: m.sentiment || 'neutral' });
322
949
  });
323
950
  }
324
951
 
325
952
  const pdfData = {
326
- aiSummary: null, // filled below if AI was used
953
+ aiSummary: aiAnalysis?.executiveSummary || null,
954
+ groupStructure: (() => {
955
+ const gs = aiAnalysis?.groupStructure || {};
956
+ // Override subsidiaries with real data — top 7 by CA, mixing branded + off-brand
957
+ if (subsidiariesData?.length) {
958
+ gs.subsidiaries = subsidiariesData
959
+ .filter(s => s.ca && s.ca > 0)
960
+ .sort((a, b) => (b.ca || 0) - (a.ca || 0))
961
+ .slice(0, 7)
962
+ .map(s => ({ entity: s.name, revenue: `${(s.ca / 1e6).toFixed(1)} M€${s.annee ? ' (' + s.annee + ')' : ''}` }));
963
+ }
964
+ return gs;
965
+ })(),
966
+ aiCompetitors: aiAnalysis?.competitors || [],
967
+ maHistory: aiAnalysis?.maHistory || [],
968
+ riskAssessment: aiAnalysis?.riskAssessment || null,
969
+ healthScore: aiAnalysis?.healthScore || null,
970
+ growthAnalysis: (() => {
971
+ const ga = aiAnalysis?.growthAnalysis || {};
972
+ // Build all YoY rows from consolidated finances (code-built, not AI)
973
+ if (consolidatedFinances?.length >= 2) {
974
+ const sorted = [...consolidatedFinances].filter(f => f.ca && f.annee).sort((a, b) => a.annee - b.annee);
975
+ const rows = [];
976
+ for (let i = 1; i < sorted.length; i++) {
977
+ const prev = sorted[i - 1];
978
+ const curr = sorted[i];
979
+ if (!prev.ca || !curr.ca) continue;
980
+ const totalPct = ((curr.ca - prev.ca) / prev.ca * 100).toFixed(1);
981
+ const fmtM = (n) => (n / 1e6).toFixed(1) + 'M€';
982
+ rows.push({
983
+ period: `${prev.annee} → ${curr.annee}`,
984
+ fromRevenue: fmtM(prev.ca),
985
+ toRevenue: fmtM(curr.ca),
986
+ growthPct: (totalPct >= 0 ? '+' : '') + totalPct + '%',
987
+ organic: '—',
988
+ external: '—',
989
+ comment: null,
990
+ });
991
+ }
992
+ // Merge AI organic/external estimates for matching periods if available
993
+ for (const aiRow of (ga.consolidatedGrowth || [])) {
994
+ const match = rows.find(r => r.period === aiRow.period || r.period.includes(aiRow.period?.split('→')[0]?.trim()));
995
+ if (match) {
996
+ if (aiRow.organic) match.organic = aiRow.organic;
997
+ if (aiRow.external) match.external = aiRow.external;
998
+ if (aiRow.comment) match.comment = aiRow.comment;
999
+ }
1000
+ }
1001
+ ga.consolidatedGrowth = rows;
1002
+ }
1003
+ return ga;
1004
+ })(),
1005
+ forwardLooking: (() => {
1006
+ const fl = aiAnalysis?.forwardLooking || {};
1007
+ // Always scan articles for best revenue target (AI often picks wrong figure)
1008
+ {
1009
+ // Search articles for revenue targets — prioritize highest target
1010
+ let bestTarget = null;
1011
+ for (const art of (scrapedMaContent || [])) {
1012
+ const text = (art.content || '');
1013
+ // Match "300 millions d'euros à horizon 2030", "100 millions", "vise X millions", etc.
1014
+ const revPatterns = [
1015
+ /(\d{2,4})\s*millions?\s*d.euros/gi,
1016
+ /(\d{2,4})\s*millions?\s*€/gi,
1017
+ /(\d{2,4})\s*m€/gi,
1018
+ /chiffre\s*d.affaires\s*de\s*(\d{2,4})\s*million/gi,
1019
+ /vise?\s*(?:un\s*)?(?:ca|chiffre)\s*.*?(\d{2,4})\s*million/gi,
1020
+ ];
1021
+ for (const p of revPatterns) {
1022
+ let m;
1023
+ while ((m = p.exec(text)) !== null) {
1024
+ const amount = parseInt(m[1]);
1025
+ if (amount < 10 || amount > 5000) continue;
1026
+ // Look for year near this match
1027
+ const ctx = text.substring(Math.max(0, m.index - 80), Math.min(text.length, m.index + m[0].length + 80));
1028
+ const yearM = ctx.match(/(?:horizon|ici|objectif|ambition|d.ici)\s*(\d{4})/i) || ctx.match(/(20[2-3]\d)/);
1029
+ const year = yearM ? parseInt(yearM[1]) : 2030;
1030
+ if (!bestTarget || amount > bestTarget.amount) {
1031
+ bestTarget = { amount, year, url: art.url };
1032
+ }
1033
+ }
1034
+ }
1035
+ }
1036
+ if (bestTarget) {
1037
+ // Use code-built if higher amount or AI didn't populate
1038
+ const aiAmount = parseInt((fl.announcedRevenue?.amount || '0').replace(/[^\d]/g, '')) || 0;
1039
+ console.log(chalk.gray(` 📊 FLI code-built: ${bestTarget.amount}M€ (${bestTarget.year}) vs AI: ${aiAmount}M€`));
1040
+ if (bestTarget.amount > aiAmount) {
1041
+ fl.announcedRevenue = {
1042
+ amount: bestTarget.amount + 'M€',
1043
+ year: bestTarget.year,
1044
+ confidence: 'confirmed_press',
1045
+ sourceUrl: bestTarget.url,
1046
+ };
1047
+ }
1048
+ }
1049
+ }
1050
+ // Ensure projectedGrowth is a string
1051
+ if (fl.projectedGrowth && typeof fl.projectedGrowth === 'object') {
1052
+ fl.projectedGrowth = JSON.stringify(fl.projectedGrowth);
1053
+ }
1054
+ if (!fl.projectedGrowth && consolidatedFinances?.length >= 2) {
1055
+ const last = consolidatedFinances[0];
1056
+ const prev = consolidatedFinances[1];
1057
+ if (last.ca && prev.ca) {
1058
+ const growth = ((last.ca - prev.ca) / prev.ca * 100).toFixed(1);
1059
+ const projected = (last.ca * (1 + parseFloat(growth) / 100) / 1e6).toFixed(1);
1060
+ fl.projectedGrowth = `+${growth}% → ~${projected}M€ projected ${(last.annee || 2024) + 1}`;
1061
+ }
1062
+ }
1063
+ return fl;
1064
+ })(),
327
1065
  competitors: [{
328
1066
  name: identity.name || siren,
329
1067
  url: identity.website || 'N/A',
@@ -331,23 +1069,121 @@ Rédige une synthèse en 5 points : 1) Profil, 2) Gouvernance & actionnariat, 3)
331
1069
  social: {},
332
1070
  pappers: {
333
1071
  siren: identity.siren,
1072
+ siret: identity.siret,
334
1073
  forme: identity.formeJuridique,
335
1074
  creation: identity.dateCreation,
336
1075
  naf: identity.nafCode ? identity.nafCode + ' — ' + identity.nafLabel : null,
1076
+ capital: identity.capital != null ? fmtEuro(identity.capital) : 'N/A',
337
1077
  ca: financialHistory?.[0]?.ca != null ? fmtEuro(financialHistory[0].ca) : 'N/A',
338
1078
  effectifs: identity.effectifs || 'N/A',
339
- dirigeants: dirigeants?.map(d => d.nom || d.denomination || '?').slice(0, 5) || [],
1079
+ adresse: [identity.adresse, identity.codePostal, identity.ville].filter(Boolean).join(' '),
1080
+ dirigeants: dirigeants?.map(d => {
1081
+ const name = d.nom || d.denomination || '?';
1082
+ const role = d.qualite || '';
1083
+ return role ? `${name} (${role})` : name;
1084
+ }).slice(0, 10) || [],
340
1085
  },
1086
+ // Consolidated finances (group) — include raw KPI fields for charts/tables
1087
+ consolidatedFinances: (consolidatedFinances || []).map(f => ({
1088
+ year: f.annee,
1089
+ annee: f.annee,
1090
+ revenue: f.ca != null ? fmtEuro(f.ca) : '—',
1091
+ netIncome: f.resultat != null ? fmtEuro(f.resultat) : '—',
1092
+ // Raw KPI fields
1093
+ ca: f.ca,
1094
+ resultat: f.resultat,
1095
+ ebitda: f.ebitda,
1096
+ margeEbitda: f.margeEbitda,
1097
+ dettesFinancieres: f.dettesFinancieres,
1098
+ tresorerie: f.tresorerie,
1099
+ fondsPropres: f.fondsPropres ?? f.capitauxPropres,
1100
+ bfr: f.bfr,
1101
+ ratioEndettement: f.ratioEndettement,
1102
+ autonomieFinanciere: f.autonomieFinanciere,
1103
+ rentabiliteFP: f.rentabiliteFP,
1104
+ margeNette: f.margeNette,
1105
+ capaciteAutofinancement: f.capaciteAutofinancement,
1106
+ })),
1107
+ // Representants
1108
+ representants: (representants || []).slice(0, 15).map(r => ({
1109
+ name: r.nom,
1110
+ role: r.qualite,
1111
+ type: r.personneMorale ? 'Corporate' : 'Individual',
1112
+ siren: r.siren,
1113
+ })),
1114
+ // Etablissements
1115
+ etablissements: (etablissements || []).map(e => ({
1116
+ siret: e.siret,
1117
+ type: e.type,
1118
+ address: e.adresse,
1119
+ active: e.actif,
1120
+ })),
1121
+ // Extra identity
1122
+ objetSocial: identity.objetSocial,
1123
+ tvaIntra: identity.tvaIntra,
1124
+ rcs: identity.rcs,
1125
+ conventionCollective: identity.conventionCollective,
1126
+ // Financial history for table — include raw KPI fields for charts/tables
1127
+ financialHistory: (financialHistory || []).map(f => ({
1128
+ year: f.annee,
1129
+ annee: f.annee,
1130
+ revenue: f.ca != null ? fmtEuro(f.ca) : '—',
1131
+ netIncome: f.resultat != null ? fmtEuro(f.resultat) : '—',
1132
+ equity: f.capitauxPropres != null ? fmtEuro(f.capitauxPropres) : '—',
1133
+ employees: f.effectif || '—',
1134
+ // Raw KPI fields
1135
+ ca: f.ca,
1136
+ resultat: f.resultat,
1137
+ ebitda: f.ebitda,
1138
+ margeEbitda: f.margeEbitda,
1139
+ dettesFinancieres: f.dettesFinancieres,
1140
+ tresorerie: f.tresorerie,
1141
+ fondsPropres: f.fondsPropres ?? f.capitauxPropres,
1142
+ bfr: f.bfr,
1143
+ ratioEndettement: f.ratioEndettement,
1144
+ autonomieFinanciere: f.autonomieFinanciere,
1145
+ rentabiliteFP: f.rentabiliteFP,
1146
+ margeNette: f.margeNette,
1147
+ capaciteAutofinancement: f.capaciteAutofinancement,
1148
+ })),
1149
+ // UBO
1150
+ ubo: (ubo || []).map(u => ({
1151
+ name: [u.prenom, u.nom].filter(Boolean).join(' ') || u.denomination || '?',
1152
+ share: u.pourcentage ? `${u.pourcentage}%` : 'N/A',
1153
+ nationality: u.nationalite || '',
1154
+ })),
1155
+ // BODACC publications
1156
+ bodacc: (bodacc || []).slice(0, 15).map(b => ({
1157
+ date: b.date || '—',
1158
+ type: b.type || '—',
1159
+ description: b.description || '',
1160
+ })),
1161
+ // Procédures collectives
1162
+ procedures: (proceduresCollectives || []).map(p => ({
1163
+ type: p.type || '—',
1164
+ date: p.date || '—',
1165
+ description: p.description || '',
1166
+ })),
341
1167
  press: pressMentions.length ? {
342
1168
  total: pressMentions.length,
343
1169
  positive: pressMentions.filter(m => m.sentiment === 'positive').length,
344
1170
  neutral: pressMentions.filter(m => m.sentiment === 'neutral').length,
345
1171
  negative: pressMentions.filter(m => m.sentiment === 'negative').length,
346
- mentions: pressMentions.slice(0, 15),
1172
+ mentions: pressMentions.slice(0, 20),
347
1173
  } : undefined,
348
- strengths: [],
349
- weaknesses: [],
350
- summary: `${identity.name || siren} — ${identity.formeJuridique || ''}, ${identity.nafLabel || ''}. Created ${identity.dateCreation || '?'}. ${financialHistory?.length ? `Financial history: ${financialHistory.length} years available.` : 'No financial data available.'}`,
1174
+ // Subsidiaries
1175
+ subsidiaries: subsidiariesData.filter(s => s.ca != null).map(s => ({
1176
+ name: s.name,
1177
+ ville: s.ville,
1178
+ revenue: s.ca != null ? fmtEuro(s.ca) : '—',
1179
+ netIncome: s.resultat != null ? fmtEuro(s.resultat) : '—',
1180
+ employees: s.effectif || '—',
1181
+ year: s.annee || '—',
1182
+ status: s.status || '—',
1183
+ })),
1184
+ strengths: aiAnalysis?.strengths || [],
1185
+ weaknesses: aiAnalysis?.weaknesses || [],
1186
+ summary: `${identity.name || siren} — ${identity.formeJuridique || ''}, ${identity.nafLabel || ''}. Created ${identity.dateCreation || '?'}. ${financialHistory?.length ? `Financial history: ${financialHistory.length} years available.` : 'No financial data available.'} ${subsidiariesData.length ? `Group of ${subsidiariesData.length} entities.` : ''}`,
351
1187
  }]
352
1188
  };
353
1189
 
@@ -379,6 +1215,21 @@ Rédige une synthèse en 5 points : 1) Profil, 2) Gouvernance & actionnariat, 3)
379
1215
 
380
1216
  // ── Helpers ───────────────────────────────────────────────────────────────────
381
1217
 
1218
+ function extractAIJSON(text) {
1219
+ if (!text) return null;
1220
+ // Direct parse
1221
+ try { return JSON.parse(text); } catch {}
1222
+ // Strip markdown code fences
1223
+ const stripped = text.replace(/^```(?:json)?\s*/i, '').replace(/\s*```\s*$/, '').trim();
1224
+ try { return JSON.parse(stripped); } catch {}
1225
+ // Extract first {...} block
1226
+ const match = text.match(/\{[\s\S]*\}/);
1227
+ if (match) {
1228
+ try { return JSON.parse(match[0]); } catch {}
1229
+ }
1230
+ return null;
1231
+ }
1232
+
382
1233
  function printRow(label, value, coloredValue) {
383
1234
  const padded = label.padEnd(16);
384
1235
  const display = coloredValue ?? (value != null ? chalk.white(value) : chalk.gray('—'));
@@ -398,3 +1249,86 @@ function formatEuro(n) {
398
1249
  if (abs >= 1_000) return `${sign}${formatNum(Math.round(abs / 1_000))} K€`;
399
1250
  return `${sign}${formatNum(abs)} €`;
400
1251
  }
1252
+
1253
+ /**
1254
+ * Build M&A history IN CODE from scraped articles + off-brand subsidiaries.
1255
+ * Returns entries with authoritative dates — AI only adds descriptions.
1256
+ */
1257
+ function buildMaHistoryFromCode(scrapedMaContent, offBrandSubs) {
1258
+ const MONTH_MAP = {
1259
+ 'janvier': '01', 'février': '02', 'mars': '03', 'avril': '04',
1260
+ 'mai': '05', 'juin': '06', 'juillet': '07', 'août': '08',
1261
+ 'septembre': '09', 'octobre': '10', 'novembre': '11', 'décembre': '12',
1262
+ };
1263
+ const OP_TYPE_MAP = {
1264
+ 'intégration': 'merger', 'acquisition': 'acquisition', 'rachat': 'acquisition',
1265
+ 'rapprochement': 'merger', 'entrée au capital': 'capital_increase',
1266
+ 'levée': 'fundraising', 'fusion': 'merger', 'cession': 'cession',
1267
+ };
1268
+
1269
+ const entries = [];
1270
+ const seen = new Set();
1271
+
1272
+ for (const art of scrapedMaContent) {
1273
+ const text = (art.content || '').toLowerCase();
1274
+ const sourceUrl = art.url;
1275
+
1276
+ // Pattern 1: "OPTYPE de/du/avec/auprès de X en [MOIS] YYYY"
1277
+ const p1 = /(intégration|acquisition|rachat|rapprochement|entr[eé]e au capital|lev[eé]e|fusion)\s+(?:de |d'|du |avec |du cabinet |aupr[eè]s de )?([a-zéèêëàâçîïôùûüœæ0-9\s&'.,-]{2,35}?)\s+en\s+(?:(janvier|f[eé]vrier|mars|avril|mai|juin|juillet|ao[uû]t|septembre|octobre|novembre|d[eé]cembre)\s+)?(20\d{2})/gi;
1278
+ let m;
1279
+ while ((m = p1.exec(text)) !== null) {
1280
+ const opRaw = m[1].toLowerCase().trim();
1281
+ const entity = m[2].trim().replace(/[,.]$/, '');
1282
+ const monthRaw = m[3];
1283
+ const year = m[4];
1284
+ const month = monthRaw ? MONTH_MAP[monthRaw.toLowerCase()] : null;
1285
+ const date = month ? `${year}-${month}` : year;
1286
+ let opType = 'acquisition';
1287
+ for (const [k, v] of Object.entries(OP_TYPE_MAP)) {
1288
+ if (opRaw.includes(k)) { opType = v; break; }
1289
+ }
1290
+ const key = `${entity.toLowerCase().substring(0, 15)}|${date}`;
1291
+ if (seen.has(key) || entity.length < 2) continue;
1292
+ seen.add(key);
1293
+ entries.push({ date, type: opType, target: entity, sourceUrl, confidence: 'confirmed_press', description: null });
1294
+ }
1295
+
1296
+ // Pattern 2: "DD MOIS YYYY [description containing known entities]"
1297
+ const p2 = /(\d{1,2})\s+(janvier|f[eé]vrier|mars|avril|mai|juin|juillet|ao[uû]t|septembre|octobre|novembre|d[eé]cembre)\s+(20\d{2})\s+([^.\n]{10,80})/gi;
1298
+ while ((m = p2.exec(text)) !== null) {
1299
+ const snippet = m[4].trim();
1300
+ const knownEntities = ['ik partners', 'bpifrance', 'bpi france', 'zalis', 'exelmans', 'alcyon', 'bc conseil', 'mageia'];
1301
+ const foundEntity = knownEntities.find(e => snippet.includes(e));
1302
+ if (!foundEntity) continue;
1303
+ const month = MONTH_MAP[m[2].toLowerCase().replace(/é/g, 'e').replace(/û/g, 'u').replace(/è/g, 'e')
1304
+ || m[2].toLowerCase()];
1305
+ const date = `${m[3]}-${month}`;
1306
+ const typeGuess = /lev[eé]e|capital|fonds/i.test(snippet) ? 'capital_increase'
1307
+ : /rapprochement|intègre|rejoins/i.test(snippet) ? 'merger' : 'acquisition';
1308
+ const key = `${foundEntity}|${date}`;
1309
+ if (seen.has(key)) continue;
1310
+ seen.add(key);
1311
+ entries.push({ date, type: typeGuess, target: foundEntity, sourceUrl, confidence: 'confirmed_press', description: null });
1312
+ }
1313
+ }
1314
+
1315
+ // Add off-brand subsidiaries not already matched (confirmed_registry)
1316
+ for (const sub of offBrandSubs) {
1317
+ const subWords = (sub.name || '').toLowerCase().split(' ').filter(w => w.length > 2);
1318
+ const alreadyCovered = entries.some(e => {
1319
+ const entTarget = (e.target || '').toLowerCase();
1320
+ return subWords.some(w => entTarget.includes(w)) ||
1321
+ (e.target || '').toLowerCase().split(' ').some(w => w.length > 2 && (sub.name || '').toLowerCase().includes(w));
1322
+ });
1323
+ if (alreadyCovered || !sub.dateCreation) continue;
1324
+ const date = sub.dateCreation.substring(0, 7); // YYYY-MM
1325
+ const key = `${(sub.name || '').toLowerCase().substring(0, 15)}|registry`;
1326
+ if (seen.has(key)) continue;
1327
+ seen.add(key);
1328
+ entries.push({ date, type: 'acquisition', target: sub.name, sourceUrl: null, confidence: 'confirmed_registry', description: null });
1329
+ }
1330
+
1331
+ // Sort chronologically
1332
+ entries.sort((a, b) => (a.date || '').localeCompare(b.date || ''));
1333
+ return entries;
1334
+ }