intelwatch 1.2.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,75 @@
1
+ /**
2
+ * Pappers Provider — France only.
3
+ *
4
+ * Wraps the existing src/scrapers/pappers.js into the provider interface.
5
+ * This is a thin adapter, not a rewrite.
6
+ */
7
+
8
+ import {
9
+ hasPappersKey,
10
+ pappersSearchByName,
11
+ pappersGetFullDossier,
12
+ pappersGetBySiren,
13
+ pappersLookup,
14
+ pappersSearchSubsidiaries,
15
+ } from '../scrapers/pappers.js';
16
+
17
+ const pappersProvider = {
18
+ name: 'pappers',
19
+ country: 'FR',
20
+ description: 'Pappers.fr — French company registry (SIREN, financials, BODACC, M&A)',
21
+
22
+ /**
23
+ * Check if PAPPERS_API_KEY is set.
24
+ */
25
+ isAvailable() {
26
+ return hasPappersKey();
27
+ },
28
+
29
+ /**
30
+ * Search companies by name.
31
+ * @param {string} query
32
+ * @param {{ count?: number }} options
33
+ * @returns {Promise<{ results: Array, error: string|null }>}
34
+ */
35
+ async search(query, options = {}) {
36
+ return pappersSearchByName(query, options);
37
+ },
38
+
39
+ /**
40
+ * Get full company profile by SIREN.
41
+ * @param {string} siren
42
+ * @param {{ preview?: boolean }} options
43
+ * @returns {Promise<{ data: object|null, error: string|null, fromCache?: boolean }>}
44
+ */
45
+ async getProfile(siren, options = {}) {
46
+ if (options.preview) {
47
+ // Preview mode: basic SIREN lookup only (identity + last year)
48
+ return pappersGetBySiren(siren);
49
+ }
50
+ // Full dossier: financials history, BODACC, subsidiaries, etc.
51
+ return pappersGetFullDossier(siren);
52
+ },
53
+
54
+ /**
55
+ * Get subsidiaries of a parent company.
56
+ * @param {string} parentName
57
+ * @param {string} parentSiren
58
+ * @param {object} options
59
+ * @returns {Promise<{ subsidiaries: Array, fromCache?: boolean }>}
60
+ */
61
+ async getSubsidiaries(parentName, parentSiren, options = {}) {
62
+ return pappersSearchSubsidiaries(parentName, parentSiren);
63
+ },
64
+
65
+ /**
66
+ * Quick lookup for competitor tracker (name → basic company info).
67
+ * @param {string} companyName
68
+ * @returns {Promise<object|null>}
69
+ */
70
+ async lookup(companyName) {
71
+ return pappersLookup(companyName);
72
+ },
73
+ };
74
+
75
+ export default pappersProvider;
@@ -0,0 +1,531 @@
1
+ /**
2
+ * Company Data Provider Registry.
3
+ *
4
+ * Routes company lookups to the right provider based on TLD / country.
5
+ * Each provider implements the same interface (see BaseProvider).
6
+ *
7
+ * Routing:
8
+ * SIREN/SIRET → Pappers (direct, numeric 9-14 digits)
9
+ * .fr → Pappers (France, full data)
10
+ * .co.uk, .uk → OpenCorporates (UK, basic)
11
+ * .com, .io, … → International provider (Apollo / Clearbit / OpenCorporates)
12
+ * ↳ if country = France → handoff to Pappers for deep data
13
+ * fallback → OpenCorporates
14
+ *
15
+ * License gate:
16
+ * Free tier: --preview only (identity + last year financials)
17
+ * Pro tier: full profile, subsidiaries, M&A, financials history
18
+ */
19
+
20
+ import { isPro, requirePro, getLimits, gatePro } from '../license.js';
21
+
22
+ // Providers that require Pro license for any API call
23
+ const PRO_ONLY_PROVIDERS = new Set(['pappers', 'apollo', 'clearbit']);
24
+
25
+ // ── TLD → Country mapping ────────────────────────────────────────────────────
26
+
27
+ const TLD_COUNTRY_MAP = {
28
+ '.fr': 'FR',
29
+ '.co.uk': 'GB',
30
+ '.uk': 'GB',
31
+ '.de': 'DE',
32
+ '.es': 'ES',
33
+ '.it': 'IT',
34
+ '.nl': 'NL',
35
+ '.be': 'BE',
36
+ '.ch': 'CH',
37
+ '.pt': 'PT',
38
+ '.at': 'AT',
39
+ '.ie': 'IE',
40
+ '.se': 'SE',
41
+ '.no': 'NO',
42
+ '.dk': 'DK',
43
+ '.fi': 'FI',
44
+ '.pl': 'PL',
45
+ '.com': 'INTL',
46
+ '.org': 'INTL',
47
+ '.net': 'INTL',
48
+ '.io': 'INTL',
49
+ '.co': 'INTL',
50
+ '.ai': 'INTL',
51
+ '.us': 'US',
52
+ '.ca': 'CA',
53
+ '.au': 'AU',
54
+ '.nz': 'NZ',
55
+ '.jp': 'JP',
56
+ '.cn': 'CN',
57
+ '.in': 'IN',
58
+ '.br': 'BR',
59
+ };
60
+
61
+ // ── Provider → Country mapping ───────────────────────────────────────────────
62
+
63
+ const PROVIDER_MAP = {
64
+ 'FR': 'pappers',
65
+ // All others → apollo for enrichment (extensible: add 'GB': 'companieshouse', etc.)
66
+ };
67
+
68
+ // Fallback chain for international domains (tried in order, first available wins)
69
+ const INTL_FALLBACK_CHAIN = ['apollo', 'clearbit', 'opencorporates'];
70
+
71
+ // ── SIREN/SIRET Detection ────────────────────────────────────────────────────
72
+
73
+ const SIREN_SIRET_RE = /^\d{9}(\d{5})?$/;
74
+
75
+ /**
76
+ * Check if a query is a SIREN (9 digits) or SIRET (14 digits).
77
+ * @param {string} query
78
+ * @returns {boolean}
79
+ */
80
+ export function isSirenOrSiret(query) {
81
+ return SIREN_SIRET_RE.test((query || '').trim());
82
+ }
83
+
84
+ // ── France Country Detection (for handoff) ───────────────────────────────────
85
+
86
+ const FRANCE_VARIANTS = new Set([
87
+ 'france', 'fr', 'france (metropolitan)', 'france métropolitaine',
88
+ 'france, metropolitan', 'république française',
89
+ ]);
90
+
91
+ /**
92
+ * Detect if a country value indicates France.
93
+ * @param {string|null|undefined} country
94
+ * @returns {boolean}
95
+ */
96
+ export function isFrenchCountry(country) {
97
+ if (!country) return false;
98
+ return FRANCE_VARIANTS.has(country.trim().toLowerCase());
99
+ }
100
+
101
+ /**
102
+ * Extract the country from an international provider profile response.
103
+ * Supports various field shapes: country, headquarters.country, location containing France.
104
+ * @param {object} profile — the data object from a provider response
105
+ * @returns {string|null}
106
+ */
107
+ function extractCountryFromProfile(profile) {
108
+ if (!profile) return null;
109
+ if (profile.country) return profile.country;
110
+ if (profile.headquarters?.country) return profile.headquarters.country;
111
+ if (profile.geo?.country) return profile.geo.country;
112
+ if (typeof profile.location === 'string' && /\bfrance\b/i.test(profile.location)) {
113
+ return 'France';
114
+ }
115
+ return null;
116
+ }
117
+
118
+ /**
119
+ * Extract company name from an international provider profile.
120
+ * @param {object} profile
121
+ * @returns {string|null}
122
+ */
123
+ function extractCompanyName(profile) {
124
+ if (!profile) return null;
125
+ return profile.name || profile.legalName || profile.companyName || null;
126
+ }
127
+
128
+ /**
129
+ * Merge international provider data with Pappers data.
130
+ * Pappers values overwrite international ones when defined (deeper French data).
131
+ * International-only fields (techStack, social, etc.) are preserved.
132
+ * @param {object} intlData — from Apollo/Clearbit/OpenCorporates
133
+ * @param {object} pappersData — from Pappers
134
+ * @returns {object}
135
+ */
136
+ export function mergeWithPappers(intlData, pappersData) {
137
+ if (!pappersData) return { ...intlData, _handoff: 'pappers_failed' };
138
+ if (!intlData) return { ...pappersData, source: 'pappers' };
139
+
140
+ const merged = { ...intlData };
141
+
142
+ // Pappers overwrites for deeper French data
143
+ for (const [key, value] of Object.entries(pappersData)) {
144
+ if (value !== null && value !== undefined && value !== '') {
145
+ merged[key] = value;
146
+ }
147
+ }
148
+
149
+ // Preserve international-only enrichment fields that Pappers doesn't have
150
+ const intlOnlyFields = ['techStack', 'tags', 'linkedin', 'twitter', 'facebook', 'logo', 'estimatedRevenue', 'raised', 'subIndustry'];
151
+ for (const field of intlOnlyFields) {
152
+ if (intlData[field] && (!pappersData[field] || (Array.isArray(pappersData[field]) && pappersData[field].length === 0))) {
153
+ merged[field] = intlData[field];
154
+ }
155
+ }
156
+
157
+ merged.source = 'pappers+' + (intlData.source || 'international');
158
+ merged._handoff = 'france_detected';
159
+
160
+ return merged;
161
+ }
162
+
163
+ /**
164
+ * Attempt France handoff: query Pappers with the company name from an international profile.
165
+ * Returns merged data if France detected, otherwise returns original data unchanged.
166
+ * @param {object} intlProfileData — the data field from international provider response
167
+ * @param {object} options — { preview }
168
+ * @returns {Promise<{ data: object, handoff: boolean }>}
169
+ */
170
+ async function attemptFranceHandoff(intlProfileData, options = {}) {
171
+ const country = extractCountryFromProfile(intlProfileData);
172
+
173
+ if (!isFrenchCountry(country)) {
174
+ return { data: intlProfileData, handoff: false };
175
+ }
176
+
177
+ // French company detected on international TLD — get deep data from Pappers
178
+ const pappersProvider = providers['pappers'];
179
+ if (!pappersProvider || !pappersProvider.isAvailable()) {
180
+ return {
181
+ data: { ...intlProfileData, _handoff: 'pappers_unavailable' },
182
+ handoff: false,
183
+ };
184
+ }
185
+
186
+ const companyName = extractCompanyName(intlProfileData);
187
+ if (!companyName) {
188
+ return {
189
+ data: { ...intlProfileData, _handoff: 'no_company_name' },
190
+ handoff: false,
191
+ };
192
+ }
193
+
194
+ try {
195
+ // Search Pappers by company name to find the SIREN
196
+ const searchResult = await pappersProvider.search(companyName, { count: 1 });
197
+ const topResult = searchResult?.results?.[0];
198
+
199
+ if (!topResult?.siren) {
200
+ return {
201
+ data: { ...intlProfileData, _handoff: 'pappers_no_match' },
202
+ handoff: false,
203
+ };
204
+ }
205
+
206
+ // Get full Pappers profile by SIREN
207
+ const isPreview = options.preview || false;
208
+ const pappersProfile = isPreview
209
+ ? await pappersProvider.getProfile(topResult.siren, { preview: true })
210
+ : await pappersProvider.getProfile(topResult.siren, { preview: false });
211
+
212
+ const pappersData = pappersProfile?.data;
213
+ const merged = mergeWithPappers(intlProfileData, pappersData);
214
+
215
+ return { data: merged, handoff: true };
216
+ } catch {
217
+ return {
218
+ data: { ...intlProfileData, _handoff: 'pappers_error' },
219
+ handoff: false,
220
+ };
221
+ }
222
+ }
223
+
224
+ // ── Registry ─────────────────────────────────────────────────────────────────
225
+
226
+ const providers = {};
227
+
228
+ /**
229
+ * Register a provider by name.
230
+ * @param {string} name — e.g. 'pappers', 'opencorporates', 'clearbit'
231
+ * @param {object} provider — must implement { search, getProfile, getSubsidiaries, isAvailable }
232
+ */
233
+ export function registerProvider(name, provider) {
234
+ providers[name] = provider;
235
+ }
236
+
237
+ /**
238
+ * Detect country code from a domain or TLD.
239
+ * @param {string} domainOrUrl
240
+ * @returns {string} ISO country code or 'INTL'
241
+ */
242
+ export function detectCountry(domainOrUrl) {
243
+ let hostname;
244
+ try {
245
+ hostname = new URL(domainOrUrl.startsWith('http') ? domainOrUrl : `https://${domainOrUrl}`).hostname;
246
+ } catch {
247
+ hostname = domainOrUrl;
248
+ }
249
+
250
+ // Match longest TLD first (e.g. .co.uk before .uk)
251
+ const sorted = Object.keys(TLD_COUNTRY_MAP).sort((a, b) => b.length - a.length);
252
+ for (const tld of sorted) {
253
+ if (hostname.endsWith(tld)) {
254
+ return TLD_COUNTRY_MAP[tld];
255
+ }
256
+ }
257
+ return 'INTL';
258
+ }
259
+
260
+ /**
261
+ * Get the best provider for a domain/country.
262
+ * @param {string} domainOrUrl
263
+ * @returns {{ provider: object|null, providerName: string, country: string }}
264
+ */
265
+ export function resolveProvider(domainOrUrl) {
266
+ const country = detectCountry(domainOrUrl);
267
+ const mapped = PROVIDER_MAP[country];
268
+
269
+ if (mapped && providers[mapped]) {
270
+ return { provider: providers[mapped], providerName: mapped, country };
271
+ }
272
+
273
+ // International: try fallback chain, pick first available
274
+ for (const name of INTL_FALLBACK_CHAIN) {
275
+ const p = providers[name];
276
+ if (p && p.isAvailable()) {
277
+ return { provider: p, providerName: name, country };
278
+ }
279
+ }
280
+
281
+ // Last resort: opencorporates (always available)
282
+ const fallback = providers['opencorporates'] || null;
283
+ return { provider: fallback, providerName: 'opencorporates', country };
284
+ }
285
+
286
+ /**
287
+ * High-level: search for a company across the right provider.
288
+ * Supports SIREN/SIRET direct routing and France handoff for international TLDs.
289
+ * @param {string} query — company name or identifier
290
+ * @param {string} domainOrUrl — domain to determine country
291
+ * @param {object} options — { count, preview }
292
+ */
293
+ export async function searchCompany(query, domainOrUrl, options = {}) {
294
+ // ── SIREN/SIRET direct routing → Pappers immediately ──
295
+ if (isSirenOrSiret(query)) {
296
+ const pappersP = providers['pappers'];
297
+ const providerName = 'pappers';
298
+ const country = 'FR';
299
+
300
+ if (!isPro()) {
301
+ return {
302
+ results: [],
303
+ provider: providerName,
304
+ country,
305
+ error: `Business Data (${providerName}) requires an Intelwatch Pro license.`,
306
+ licenseRequired: true,
307
+ };
308
+ }
309
+
310
+ if (!pappersP || !pappersP.isAvailable()) {
311
+ return {
312
+ results: [],
313
+ provider: providerName,
314
+ country,
315
+ error: `${providerName} API key not configured.`,
316
+ };
317
+ }
318
+
319
+ const results = await pappersP.search(query, options);
320
+ return { ...results, provider: providerName, country, _routing: 'siren_direct' };
321
+ }
322
+
323
+ const { provider, providerName, country } = resolveProvider(domainOrUrl);
324
+
325
+ // License gate: enrichment providers are Pro-only
326
+ if (PRO_ONLY_PROVIDERS.has(providerName) && !isPro()) {
327
+ return {
328
+ results: [],
329
+ provider: providerName,
330
+ country,
331
+ error: `Business Data (${providerName}) requires an Intelwatch Pro license.`,
332
+ licenseRequired: true,
333
+ };
334
+ }
335
+
336
+ if (!provider) {
337
+ return {
338
+ results: [],
339
+ provider: providerName,
340
+ country,
341
+ error: `No provider configured for ${providerName}. Set up ${providerName} credentials.`,
342
+ };
343
+ }
344
+
345
+ if (!provider.isAvailable()) {
346
+ return {
347
+ results: [],
348
+ provider: providerName,
349
+ country,
350
+ error: `${providerName} API key not configured.`,
351
+ };
352
+ }
353
+
354
+ const results = await provider.search(query, options);
355
+ return { ...results, provider: providerName, country };
356
+ }
357
+
358
+ /**
359
+ * High-level: get a company profile (license-gated).
360
+ * Supports SIREN/SIRET direct routing and France handoff for international TLDs.
361
+ * @param {string} identifier — SIREN, domain, company number, etc.
362
+ * @param {string} domainOrUrl — domain to determine country
363
+ * @param {object} options — { preview }
364
+ */
365
+ export async function getCompanyProfile(identifier, domainOrUrl, options = {}) {
366
+ // ── SIREN/SIRET direct routing → Pappers immediately ──
367
+ if (isSirenOrSiret(identifier)) {
368
+ const pappersP = providers['pappers'];
369
+ const providerName = 'pappers';
370
+ const country = 'FR';
371
+ const tier = isPro() ? 'pro' : 'free';
372
+ const isPreview = options.preview || !isPro();
373
+
374
+ if (!isPro()) {
375
+ return {
376
+ data: null,
377
+ provider: providerName,
378
+ country,
379
+ tier,
380
+ isPreview: true,
381
+ error: `Business Data (${providerName}) requires an Intelwatch Pro license.`,
382
+ licenseRequired: true,
383
+ };
384
+ }
385
+
386
+ if (!pappersP || !pappersP.isAvailable()) {
387
+ return {
388
+ data: null,
389
+ provider: providerName,
390
+ country,
391
+ tier,
392
+ isPreview,
393
+ error: `${providerName} API key not configured.`,
394
+ };
395
+ }
396
+
397
+ const profile = await pappersP.getProfile(identifier, { ...options, preview: isPreview });
398
+ return {
399
+ ...profile,
400
+ provider: providerName,
401
+ country,
402
+ tier,
403
+ isPreview,
404
+ _routing: 'siren_direct',
405
+ };
406
+ }
407
+
408
+ const { provider, providerName, country } = resolveProvider(domainOrUrl);
409
+
410
+ const tier = isPro() ? 'pro' : 'free';
411
+ const isPreview = options.preview || !isPro();
412
+
413
+ // License gate: enrichment providers are Pro-only
414
+ if (PRO_ONLY_PROVIDERS.has(providerName) && !isPro()) {
415
+ return {
416
+ data: null,
417
+ provider: providerName,
418
+ country,
419
+ tier,
420
+ isPreview: true,
421
+ error: `Business Data (${providerName}) requires an Intelwatch Pro license.`,
422
+ licenseRequired: true,
423
+ };
424
+ }
425
+
426
+ if (!provider) {
427
+ return {
428
+ data: null,
429
+ provider: providerName,
430
+ country,
431
+ tier,
432
+ isPreview,
433
+ error: `No provider configured for ${providerName}.`,
434
+ };
435
+ }
436
+
437
+ if (!provider.isAvailable()) {
438
+ return {
439
+ data: null,
440
+ provider: providerName,
441
+ country,
442
+ tier,
443
+ isPreview,
444
+ error: `${providerName} API key not configured.`,
445
+ };
446
+ }
447
+
448
+ // Get profile from international provider
449
+ const profile = await provider.getProfile(identifier, { ...options, preview: isPreview });
450
+
451
+ // ── France Handoff: if international provider detects France, enrich with Pappers ──
452
+ if (profile?.data && country !== 'FR') {
453
+ const { data: enrichedData, handoff } = await attemptFranceHandoff(profile.data, { preview: isPreview });
454
+ if (handoff) {
455
+ return {
456
+ ...profile,
457
+ data: enrichedData,
458
+ provider: 'pappers+' + providerName,
459
+ country: 'FR',
460
+ tier,
461
+ isPreview,
462
+ _routing: 'france_handoff',
463
+ };
464
+ }
465
+ // Even if handoff failed, annotate data with the reason (pappers_unavailable, etc.)
466
+ if (enrichedData?._handoff) {
467
+ return {
468
+ ...profile,
469
+ data: enrichedData,
470
+ provider: providerName,
471
+ country,
472
+ tier,
473
+ isPreview,
474
+ };
475
+ }
476
+ }
477
+
478
+ return {
479
+ ...profile,
480
+ provider: providerName,
481
+ country,
482
+ tier,
483
+ isPreview,
484
+ };
485
+ }
486
+
487
+ /**
488
+ * High-level: get subsidiaries (Pro only).
489
+ */
490
+ export async function getSubsidiaries(parentName, parentId, domainOrUrl, options = {}) {
491
+ requirePro('Subsidiary analysis');
492
+ const { provider, providerName, country } = resolveProvider(domainOrUrl);
493
+
494
+ if (!provider?.getSubsidiaries) {
495
+ return { subsidiaries: [], provider: providerName, country, error: `${providerName} does not support subsidiary lookup.` };
496
+ }
497
+
498
+ return { ...(await provider.getSubsidiaries(parentName, parentId, options)), provider: providerName, country };
499
+ }
500
+
501
+ /**
502
+ * High-level: quick lookup for competitor tracker (company name → basic info).
503
+ */
504
+ export async function lookupCompany(companyName, domainOrUrl) {
505
+ const { provider, providerName, country } = resolveProvider(domainOrUrl);
506
+
507
+ if (!provider?.lookup) {
508
+ return null;
509
+ }
510
+
511
+ if (!provider.isAvailable()) return null;
512
+
513
+ try {
514
+ return await provider.lookup(companyName);
515
+ } catch {
516
+ return null;
517
+ }
518
+ }
519
+
520
+ /**
521
+ * List all registered providers and their status.
522
+ */
523
+ export function listProviders() {
524
+ return Object.entries(providers).map(([name, p]) => ({
525
+ name,
526
+ available: p.isAvailable(),
527
+ countries: Object.entries(PROVIDER_MAP)
528
+ .filter(([, pName]) => pName === name)
529
+ .map(([country]) => country),
530
+ }));
531
+ }