@coldiq/mcp 0.3.3 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/executor.d.ts +6 -0
- package/dist/executor.d.ts.map +1 -1
- package/dist/executor.js +18 -1
- package/dist/executor.js.map +1 -1
- package/dist/registry.d.ts.map +1 -1
- package/dist/registry.js +25 -8
- package/dist/registry.js.map +1 -1
- package/dist/tools/find-emails.d.ts.map +1 -1
- package/dist/tools/find-emails.js +46 -1
- package/dist/tools/find-emails.js.map +1 -1
- package/dist/utils/provider-display.d.ts +6 -0
- package/dist/utils/provider-display.d.ts.map +1 -0
- package/dist/utils/provider-display.js +140 -0
- package/dist/utils/provider-display.js.map +1 -0
- package/dist/utils/selection-insight.d.ts +20 -0
- package/dist/utils/selection-insight.d.ts.map +1 -0
- package/dist/utils/selection-insight.js +295 -0
- package/dist/utils/selection-insight.js.map +1 -0
- package/package.json +1 -1
- package/src/executor.ts +23 -1
- package/src/registry.ts +30 -8
- package/src/tools/find-emails.ts +43 -1
- package/src/utils/provider-display.ts +150 -0
- package/src/utils/selection-insight.ts +347 -0
- package/tests/executor.test.ts +77 -0
- package/tests/registry-find-signals.test.ts +15 -3
- package/tests/registry-search-jobs.test.ts +21 -0
- package/tests/tools/find-emails.test.ts +7 -7
- package/tests/utils/provider-display.test.ts +40 -0
- package/tests/utils/selection-insight.test.ts +114 -0
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
import type { Capability } from '../registry.js'
|
|
2
|
+
import { providerDisplayName } from './provider-display.js'
|
|
3
|
+
|
|
4
|
+
// ---------------------------------------------------------------------------
|
|
5
|
+
// Selection insight — a short, human-readable, DATA-QUALITY-framed reason for
|
|
6
|
+
// why ColdIQ routed a request to a given provider. Surfaced in `_meta` so the
|
|
7
|
+
// chat layer can show the "we picked the best tool" intelligence.
|
|
8
|
+
//
|
|
9
|
+
// Rules baked in here (see CLAUDE.md + the plan):
|
|
10
|
+
// • Data-quality reasons ONLY — coverage / specialization / freshness / region.
|
|
11
|
+
// NEVER price, never "tried first", never waterfall position.
|
|
12
|
+
// • Honest about the two ways a provider wins:
|
|
13
|
+
// - capability routing (others gated out) → confident "Routed to X for …"
|
|
14
|
+
// - fallthrough (others returned nothing) → softened "X returned the match …"
|
|
15
|
+
// • Never fabricate a strength: when a provider has no curated edge we emit a
|
|
16
|
+
// plain, true line ("X matched this …") instead of inventing specialization.
|
|
17
|
+
// • Display names only (no raw slugs, no "Apify") — via providerDisplayName.
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
|
|
20
|
+
export type InsightCapability = Capability | 'find_emails'
|
|
21
|
+
|
|
22
|
+
export interface SelectionInsight {
|
|
23
|
+
insight: string
|
|
24
|
+
signals: string[]
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export interface InsightContext {
|
|
28
|
+
/** True when the chosen provider won only because higher-ranked providers
|
|
29
|
+
* returned no rows (not because it was the best-suited for the inputs). */
|
|
30
|
+
wasFallback: boolean
|
|
31
|
+
/** True when the caller pinned this provider via `use_providers`. */
|
|
32
|
+
pinnedByUser: boolean
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// ---------------------------------------------------------------------------
|
|
36
|
+
// Curated data-quality strengths, keyed by capability → providerId.
|
|
37
|
+
// Each value is a noun phrase that slots into "… for its <strength>". Phrasing
|
|
38
|
+
// is descriptive (deep / strong / broad / specialized), NOT superlative, so it
|
|
39
|
+
// stays defensible on the fallthrough path too.
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
const STRENGTHS: Partial<Record<InsightCapability, Record<string, string>>> = {
|
|
43
|
+
search_companies: {
|
|
44
|
+
companyenrich: 'broad firmographic coverage with full-text company matching',
|
|
45
|
+
apollo: 'an extensive global company database',
|
|
46
|
+
fullenrich: 'rich firmographic data with strong international coverage',
|
|
47
|
+
pdl: 'a large, structured global company dataset',
|
|
48
|
+
signalbase: 'fast firmographic search across keywords and industries',
|
|
49
|
+
blitzapi: 'LinkedIn-sourced company data',
|
|
50
|
+
limadata: 'LinkedIn-based company discovery',
|
|
51
|
+
predictleads: 'discovery scoped tightly by location and company size',
|
|
52
|
+
theirstack: 'deep technographic and buying-intent coverage',
|
|
53
|
+
sumble: 'technology- and keyword-driven company discovery',
|
|
54
|
+
'limadata-prospect-filter': 'LinkedIn headcount-bucketed prospecting',
|
|
55
|
+
'limadata-prospect-url': 'discovery straight from a LinkedIn Sales Navigator search',
|
|
56
|
+
'linkupapi-search': 'live LinkedIn company search',
|
|
57
|
+
'linkupapi-fundraising': 'a dedicated index of recently-funded companies',
|
|
58
|
+
'linkupapi-hiring': 'a dedicated index of actively-hiring companies',
|
|
59
|
+
'prospeo-search-company': 'keyword, industry and geo company search',
|
|
60
|
+
'ai-ark-companies': 'multi-filter company discovery',
|
|
61
|
+
},
|
|
62
|
+
find_people: {
|
|
63
|
+
leadsfactory: 'company-scoped contact discovery by persona',
|
|
64
|
+
apollo: 'an extensive global contact database',
|
|
65
|
+
pdl: 'a large, structured global people dataset',
|
|
66
|
+
companyenrich: 'domain-scoped employee lookups',
|
|
67
|
+
'linkupapi-search-profiles': 'live LinkedIn profile search',
|
|
68
|
+
'sumble-people-find': 'org-scoped people search by job function',
|
|
69
|
+
'prospeo-search-person': 'title- and company-based people search',
|
|
70
|
+
'ai-ark-people': 'multi-filter people discovery',
|
|
71
|
+
'fullenrich-people-search': 'domain-scoped people search',
|
|
72
|
+
'findymail-search-employees': 'domain-scoped employee discovery',
|
|
73
|
+
},
|
|
74
|
+
find_email: {
|
|
75
|
+
prospeo: 'strong work-email coverage',
|
|
76
|
+
fullenrich: 'strong international email coverage, especially across Europe',
|
|
77
|
+
findymail: 'high-accuracy verified work emails',
|
|
78
|
+
icypeas: 'broad email-finding coverage',
|
|
79
|
+
'limadata-work-email': 'name-and-domain work-email lookup',
|
|
80
|
+
blitzapi: 'LinkedIn-URL–based email finding',
|
|
81
|
+
'limadata-work-email-linkedin': 'LinkedIn-URL–based work-email lookup',
|
|
82
|
+
linkupapi: 'LinkedIn-sourced email finding',
|
|
83
|
+
},
|
|
84
|
+
find_emails: {
|
|
85
|
+
prospeo: 'strong work-email coverage',
|
|
86
|
+
fullenrich: 'strong international email coverage, especially across Europe',
|
|
87
|
+
findymail: 'high-accuracy verified work emails',
|
|
88
|
+
icypeas: 'broad email-finding coverage',
|
|
89
|
+
'limadata-work-email': 'name-and-domain work-email lookup',
|
|
90
|
+
blitzapi: 'LinkedIn-URL–based email finding',
|
|
91
|
+
'limadata-work-email-linkedin': 'LinkedIn-URL–based work-email lookup',
|
|
92
|
+
linkupapi: 'LinkedIn-sourced email finding',
|
|
93
|
+
},
|
|
94
|
+
verify_email: {
|
|
95
|
+
findymail: 'reliable deliverability verification',
|
|
96
|
+
icypeas: 'broad email verification',
|
|
97
|
+
instantly: 'deliverability-focused verification',
|
|
98
|
+
'linkupapi-validate': 'LinkedIn-aware email validation',
|
|
99
|
+
},
|
|
100
|
+
find_phone: {
|
|
101
|
+
findymail: 'verified mobile-number coverage',
|
|
102
|
+
limadata: 'phone lookup by LinkedIn URL or name + company',
|
|
103
|
+
'ai-ark': 'phone lookup across multiple identifiers',
|
|
104
|
+
},
|
|
105
|
+
enrich_company: {
|
|
106
|
+
companyenrich: 'broad firmographic enrichment from a domain',
|
|
107
|
+
apollo: 'deep firmographic enrichment',
|
|
108
|
+
pdl: 'structured firmographic data',
|
|
109
|
+
findymail: 'domain-based company enrichment',
|
|
110
|
+
wiza: 'company enrichment from name or domain',
|
|
111
|
+
limadata: 'LinkedIn-based company enrichment',
|
|
112
|
+
prospeo: 'company enrichment',
|
|
113
|
+
companyenrich_props: 'company enrichment from name or LinkedIn',
|
|
114
|
+
blitzapi: 'LinkedIn-URL company enrichment',
|
|
115
|
+
icypeas: 'LinkedIn-URL company enrichment',
|
|
116
|
+
builtwith: 'technology-stack detection for a domain',
|
|
117
|
+
openmart: 'local-business firmographics',
|
|
118
|
+
'linkupapi-by-domain': 'live LinkedIn company data',
|
|
119
|
+
'linkupapi-by-url': 'live LinkedIn company data',
|
|
120
|
+
},
|
|
121
|
+
enrich_person: {
|
|
122
|
+
'linkupapi-profile-enrich': 'live LinkedIn profile enrichment',
|
|
123
|
+
'linkupapi-email-reverse': 'reverse-email lookup from LinkedIn data',
|
|
124
|
+
'pdl-person-enrich': 'structured person enrichment',
|
|
125
|
+
'apollo-people-match': 'deep person enrichment',
|
|
126
|
+
'blitzapi-reverse-email': 'reverse-email profile lookup',
|
|
127
|
+
'findymail-business-profile': 'LinkedIn profile enrichment',
|
|
128
|
+
'findymail-reverse-email': 'reverse-email lookup',
|
|
129
|
+
'icypeas-scrape-profile': 'LinkedIn profile scraping',
|
|
130
|
+
'icypeas-url-search-profile': 'profile lookup by name and company',
|
|
131
|
+
'ai-ark-reverse-lookup': 'reverse lookup from email or phone',
|
|
132
|
+
'icypeas-reverse-email-lookup': 'reverse-email lookup',
|
|
133
|
+
'pdl-person-identify': 'person identity resolution',
|
|
134
|
+
},
|
|
135
|
+
search_web: {
|
|
136
|
+
serper: 'fast Google search results',
|
|
137
|
+
exa: 'neural, meaning-based web search',
|
|
138
|
+
limadata: 'general web search',
|
|
139
|
+
jina: 'web search and page reading',
|
|
140
|
+
},
|
|
141
|
+
search_jobs: {
|
|
142
|
+
career_site_jobs: 'jobs sourced directly from company career sites',
|
|
143
|
+
linkedin_jobs_api: 'LinkedIn job listings',
|
|
144
|
+
'theirstack-jobs': 'jobs enriched with company and tech-stack data',
|
|
145
|
+
},
|
|
146
|
+
search_ads: {
|
|
147
|
+
google_ads: 'Google ad-transparency data',
|
|
148
|
+
linkedin_ad_library: 'LinkedIn Ad Library coverage',
|
|
149
|
+
meta_ads: 'Meta (Facebook/Instagram) ad-library coverage',
|
|
150
|
+
twitter_ads: 'X ad coverage',
|
|
151
|
+
reddit_ads: 'Reddit ad coverage',
|
|
152
|
+
},
|
|
153
|
+
search_places: {
|
|
154
|
+
openmart: 'rich local-business data across the US, CA, AU, PR and NZ',
|
|
155
|
+
google_maps: 'broad global places coverage from Google Maps',
|
|
156
|
+
},
|
|
157
|
+
get_place_reviews: {
|
|
158
|
+
google_maps_reviews: 'Google Maps review data',
|
|
159
|
+
},
|
|
160
|
+
find_influencers: {
|
|
161
|
+
influencers_similar: 'lookalike creator discovery from a seed handle',
|
|
162
|
+
influencers_discovery: 'creator discovery by topic and audience',
|
|
163
|
+
},
|
|
164
|
+
search_reddit: {
|
|
165
|
+
reddit: 'Reddit post and comment search',
|
|
166
|
+
},
|
|
167
|
+
find_signals: {
|
|
168
|
+
'signalbase-funding': 'real-time funding-round signals',
|
|
169
|
+
'signalbase-acquisition': 'acquisition signals',
|
|
170
|
+
'signalbase-hiring': 'hiring signals',
|
|
171
|
+
'signalbase-job-change': 'job-change signals',
|
|
172
|
+
'theirstack-hiring': 'hiring signals from job-posting data',
|
|
173
|
+
'theirstack-intent-discovery': 'buying-intent discovery',
|
|
174
|
+
'theirstack-buying-intents': 'buying-intent signals from tech and job data',
|
|
175
|
+
'predictleads-financing': 'financing-event signals',
|
|
176
|
+
'predictleads-news': 'company-news signals',
|
|
177
|
+
'predictleads-startup-posts': 'startup-announcement signals',
|
|
178
|
+
},
|
|
179
|
+
fetch_page_content: {
|
|
180
|
+
'exa-contents': 'clean page-content extraction',
|
|
181
|
+
},
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// ---------------------------------------------------------------------------
|
|
185
|
+
// Per-capability noun used in the plain fallback line ("X matched this <noun>").
|
|
186
|
+
// ---------------------------------------------------------------------------
|
|
187
|
+
|
|
188
|
+
const CAPABILITY_NOUN: Record<InsightCapability, string> = {
|
|
189
|
+
search_companies: 'company search',
|
|
190
|
+
find_people: 'people search',
|
|
191
|
+
find_email: 'email lookup',
|
|
192
|
+
find_emails: 'email lookup',
|
|
193
|
+
verify_email: 'email verification',
|
|
194
|
+
find_phone: 'phone lookup',
|
|
195
|
+
enrich_company: 'company enrichment',
|
|
196
|
+
enrich_person: 'person enrichment',
|
|
197
|
+
search_web: 'web search',
|
|
198
|
+
search_jobs: 'job search',
|
|
199
|
+
search_ads: 'ad search',
|
|
200
|
+
search_places: 'places search',
|
|
201
|
+
get_place_reviews: 'reviews lookup',
|
|
202
|
+
find_influencers: 'influencer search',
|
|
203
|
+
search_reddit: 'Reddit search',
|
|
204
|
+
search_seo: 'SEO lookup',
|
|
205
|
+
find_signals: 'signal search',
|
|
206
|
+
fetch_page_content: 'page fetch',
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// ---------------------------------------------------------------------------
|
|
210
|
+
// Signal detection — which input dimensions drove routing, in human terms.
|
|
211
|
+
// Ordered most-distinctive-first so signals[0] is the routing-relevant one.
|
|
212
|
+
// ---------------------------------------------------------------------------
|
|
213
|
+
|
|
214
|
+
function arr(v: unknown): boolean {
|
|
215
|
+
return Array.isArray(v) && v.length > 0
|
|
216
|
+
}
|
|
217
|
+
function num(v: unknown): boolean {
|
|
218
|
+
return typeof v === 'number'
|
|
219
|
+
}
|
|
220
|
+
function str(v: unknown): boolean {
|
|
221
|
+
return typeof v === 'string' && v.length > 0
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
function detectSignals(capability: InsightCapability, input: Record<string, unknown>): string[] {
|
|
225
|
+
const out: string[] = []
|
|
226
|
+
const push = (cond: boolean, label: string) => {
|
|
227
|
+
if (cond && !out.includes(label)) out.push(label)
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
switch (capability) {
|
|
231
|
+
case 'search_companies':
|
|
232
|
+
push(arr(input.technologies), 'tech-stack filter')
|
|
233
|
+
push(
|
|
234
|
+
arr(input.funding_stages) ||
|
|
235
|
+
num(input.min_funding_amount) || num(input.max_funding_amount) ||
|
|
236
|
+
num(input.min_funding_year) || num(input.max_funding_year),
|
|
237
|
+
'funding filter',
|
|
238
|
+
)
|
|
239
|
+
push(num(input.min_revenue) || num(input.max_revenue), 'revenue filter')
|
|
240
|
+
push(arr(input.exclude_domains) || arr(input.exclude_industries) || arr(input.exclude_countries), 'exclusion list')
|
|
241
|
+
push(num(input.min_workforce_growth_pct), 'workforce-growth filter')
|
|
242
|
+
push(input.is_hiring === true, 'hiring signal')
|
|
243
|
+
push(num(input.min_founded_year) || num(input.max_founded_year), 'founding-year filter')
|
|
244
|
+
push(num(input.min_employees) || num(input.max_employees), 'company-size filter')
|
|
245
|
+
push(str(input.linkedin_search_url), 'LinkedIn Sales Navigator URL')
|
|
246
|
+
push(arr(input.industries), 'industry filter')
|
|
247
|
+
push(arr(input.keywords), 'keyword search')
|
|
248
|
+
push(arr(input.countries) || arr(input.locations), 'geo filter')
|
|
249
|
+
break
|
|
250
|
+
case 'find_people':
|
|
251
|
+
push(arr(input.company_domains), 'company-domain filter')
|
|
252
|
+
push(arr(input.company_linkedin_urls), 'company LinkedIn filter')
|
|
253
|
+
push(arr(input.job_titles), 'job-title filter')
|
|
254
|
+
push(arr(input.seniorities), 'seniority filter')
|
|
255
|
+
push(arr(input.keywords), 'keyword search')
|
|
256
|
+
push(arr(input.locations), 'geo filter')
|
|
257
|
+
break
|
|
258
|
+
case 'find_email':
|
|
259
|
+
push(str(input.linkedin_url), 'LinkedIn URL')
|
|
260
|
+
push(str(input.domain) || str(input.company_name), 'name + company')
|
|
261
|
+
break
|
|
262
|
+
case 'find_emails': {
|
|
263
|
+
const people = (input.people as Array<Record<string, unknown>> | undefined) ?? []
|
|
264
|
+
push(people.some((p) => str(p.linkedin_url)), 'LinkedIn URLs')
|
|
265
|
+
push(people.some((p) => str(p.domain)), 'name + domain')
|
|
266
|
+
break
|
|
267
|
+
}
|
|
268
|
+
case 'enrich_company':
|
|
269
|
+
push(str(input.domain), 'company domain')
|
|
270
|
+
push(str(input.linkedin_url), 'LinkedIn URL')
|
|
271
|
+
push(str(input.name), 'company name')
|
|
272
|
+
break
|
|
273
|
+
case 'enrich_person':
|
|
274
|
+
push(str(input.email), 'email')
|
|
275
|
+
push(str(input.linkedin_url), 'LinkedIn URL')
|
|
276
|
+
push(str(input.first_name) && str(input.last_name), 'name + company')
|
|
277
|
+
break
|
|
278
|
+
case 'find_signals':
|
|
279
|
+
if (str(input.signal_type)) out.push(`${String(input.signal_type)} signals`)
|
|
280
|
+
push(arr(input.companies) || arr(input.domains), 'company filter')
|
|
281
|
+
break
|
|
282
|
+
case 'search_jobs':
|
|
283
|
+
push(arr(input.technologies), 'tech-stack filter')
|
|
284
|
+
push(arr(input.company_domains), 'company-domain filter')
|
|
285
|
+
push(arr(input.job_titles) || arr(input.keywords), 'role filter')
|
|
286
|
+
push(arr(input.locations), 'geo filter')
|
|
287
|
+
break
|
|
288
|
+
case 'search_ads':
|
|
289
|
+
push(arr(input.search_urls), 'LinkedIn Ad Library URL')
|
|
290
|
+
push(arr(input.domains) || arr(input.advertiser_ids), 'advertiser filter')
|
|
291
|
+
push(str(input.query), 'keyword search')
|
|
292
|
+
break
|
|
293
|
+
default:
|
|
294
|
+
// Other capabilities: no curated signal vocabulary — routing is single-source
|
|
295
|
+
// or non-discriminating, so leave signals empty and rely on the base strength.
|
|
296
|
+
break
|
|
297
|
+
}
|
|
298
|
+
return out
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
// ---------------------------------------------------------------------------
|
|
302
|
+
// Builder
|
|
303
|
+
// ---------------------------------------------------------------------------
|
|
304
|
+
|
|
305
|
+
/**
|
|
306
|
+
* Build a data-quality-framed reason for why `providerId` was selected for
|
|
307
|
+
* `capability` given `input`. Returns undefined only when there is genuinely
|
|
308
|
+
* nothing safe to say.
|
|
309
|
+
*/
|
|
310
|
+
export function buildSelectionInsight(
|
|
311
|
+
capability: InsightCapability,
|
|
312
|
+
providerId: string,
|
|
313
|
+
input: Record<string, unknown>,
|
|
314
|
+
ctx: InsightContext,
|
|
315
|
+
): SelectionInsight | undefined {
|
|
316
|
+
const name = providerDisplayName(providerId)
|
|
317
|
+
const signals = detectSignals(capability, input)
|
|
318
|
+
|
|
319
|
+
// User pinned this provider — it wasn't our choice, so don't claim it was.
|
|
320
|
+
if (ctx.pinnedByUser) {
|
|
321
|
+
return { insight: `Used ${name} as requested.`, signals }
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
const strength = STRENGTHS[capability]?.[providerId]
|
|
325
|
+
const noun = CAPABILITY_NOUN[capability] ?? 'request'
|
|
326
|
+
const primary = signals[0]
|
|
327
|
+
|
|
328
|
+
// No curated edge for this provider — emit a plain, true line, never invent one.
|
|
329
|
+
if (!strength) {
|
|
330
|
+
const insight = primary
|
|
331
|
+
? `${name} matched your ${primary} for this ${noun}.`
|
|
332
|
+
: `${name} matched this ${noun}.`
|
|
333
|
+
return { insight, signals }
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
// Fallthrough: higher-ranked providers returned nothing. Soften — state the
|
|
337
|
+
// coverage fact without implying we judged it the single best fit upfront.
|
|
338
|
+
if (ctx.wasFallback) {
|
|
339
|
+
return { insight: `${name} returned the match here, with ${strength}.`, signals }
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
// Capability routing: chosen because it is best-suited to these inputs.
|
|
343
|
+
const insight = primary
|
|
344
|
+
? `Routed to ${name} for its ${strength}, matched to your ${primary}.`
|
|
345
|
+
: `Routed to ${name} for its ${strength}.`
|
|
346
|
+
return { insight, signals }
|
|
347
|
+
}
|
package/tests/executor.test.ts
CHANGED
|
@@ -954,3 +954,80 @@ describe('executor upstream_error propagation', () => {
|
|
|
954
954
|
}
|
|
955
955
|
})
|
|
956
956
|
})
|
|
957
|
+
|
|
958
|
+
// ---------------------------------------------------------------------------
|
|
959
|
+
// Selection insight + branded provider name in _meta
|
|
960
|
+
// ---------------------------------------------------------------------------
|
|
961
|
+
|
|
962
|
+
describe('executor selection insight _meta', () => {
|
|
963
|
+
const originalFetch = globalThis.fetch
|
|
964
|
+
|
|
965
|
+
beforeEach(() => {
|
|
966
|
+
initClient('http://test-api.local', 'test-key-123')
|
|
967
|
+
})
|
|
968
|
+
|
|
969
|
+
afterEach(() => {
|
|
970
|
+
globalThis.fetch = originalFetch
|
|
971
|
+
})
|
|
972
|
+
|
|
973
|
+
it('attaches branded provider_name + confident insight when first applicable provider wins', async () => {
|
|
974
|
+
stubProviders([
|
|
975
|
+
makeProvider({ id: 'theirstack', hasResult: (d) => (d as Record<string, unknown>).ok === true }),
|
|
976
|
+
])
|
|
977
|
+
globalThis.fetch = vi.fn(async () =>
|
|
978
|
+
new Response(JSON.stringify({ ok: true }), { status: 200 }),
|
|
979
|
+
) as typeof fetch
|
|
980
|
+
|
|
981
|
+
const result = await executeWithFallback('search_companies', { technologies: ['Salesforce'] })
|
|
982
|
+
|
|
983
|
+
expect('data' in result).toBe(true)
|
|
984
|
+
if ('data' in result) {
|
|
985
|
+
expect(result._meta.provider).toBe('theirstack')
|
|
986
|
+
expect(result._meta.provider_name).toBe('TheirStack')
|
|
987
|
+
expect(result._meta.selection_insight).toContain('Routed to TheirStack')
|
|
988
|
+
expect(result._meta.selection_insight).toContain('tech-stack filter')
|
|
989
|
+
expect(result._meta.selection_signals).toContain('tech-stack filter')
|
|
990
|
+
}
|
|
991
|
+
})
|
|
992
|
+
|
|
993
|
+
it('softens the insight when the winner is reached only after a provider fails', async () => {
|
|
994
|
+
stubProviders([
|
|
995
|
+
makeProvider({ id: 'companyenrich', hasResult: () => false }),
|
|
996
|
+
makeProvider({ id: 'fullenrich', hasResult: (d) => (d as Record<string, unknown>).ok === true }),
|
|
997
|
+
])
|
|
998
|
+
let n = 0
|
|
999
|
+
globalThis.fetch = vi.fn(async () => {
|
|
1000
|
+
n++
|
|
1001
|
+
return new Response(JSON.stringify({ ok: n > 1 }), { status: 200 })
|
|
1002
|
+
}) as typeof fetch
|
|
1003
|
+
|
|
1004
|
+
const result = await executeWithFallback('search_companies', { keywords: ['fintech'] })
|
|
1005
|
+
|
|
1006
|
+
expect('data' in result).toBe(true)
|
|
1007
|
+
if ('data' in result) {
|
|
1008
|
+
expect(result._meta.provider_name).toBe('FullEnrich')
|
|
1009
|
+
expect(result._meta.selection_insight).toContain('returned the match here')
|
|
1010
|
+
expect(result._meta.selection_insight!.startsWith('Routed to')).toBe(false)
|
|
1011
|
+
}
|
|
1012
|
+
})
|
|
1013
|
+
|
|
1014
|
+
it('says "as requested" when the provider was pinned by the caller', async () => {
|
|
1015
|
+
stubProviders([
|
|
1016
|
+
makeProvider({ id: 'theirstack', hasResult: (d) => (d as Record<string, unknown>).ok === true }),
|
|
1017
|
+
])
|
|
1018
|
+
globalThis.fetch = vi.fn(async () =>
|
|
1019
|
+
new Response(JSON.stringify({ ok: true }), { status: 200 }),
|
|
1020
|
+
) as typeof fetch
|
|
1021
|
+
|
|
1022
|
+
const result = await executeWithFallback(
|
|
1023
|
+
'search_companies',
|
|
1024
|
+
{ technologies: ['Salesforce'] },
|
|
1025
|
+
{ providers: ['theirstack'] },
|
|
1026
|
+
)
|
|
1027
|
+
|
|
1028
|
+
expect('data' in result).toBe(true)
|
|
1029
|
+
if ('data' in result) {
|
|
1030
|
+
expect(result._meta.selection_insight).toBe('Used TheirStack as requested.')
|
|
1031
|
+
}
|
|
1032
|
+
})
|
|
1033
|
+
})
|
|
@@ -317,14 +317,26 @@ describe('theirstack-hiring', () => {
|
|
|
317
317
|
expect(p().priority).toBeLessThan(get('signalbase-hiring').priority)
|
|
318
318
|
})
|
|
319
319
|
|
|
320
|
-
it('postFilter strips heavy job descriptions', () => {
|
|
320
|
+
it('postFilter strips heavy job descriptions and slims company_object', () => {
|
|
321
321
|
const filtered = p().postFilter!(
|
|
322
|
-
{
|
|
322
|
+
{
|
|
323
|
+
data: [{
|
|
324
|
+
title: 'AE', description: 'long...', description_html: '<p>long</p>',
|
|
325
|
+
company: 'HubSpot',
|
|
326
|
+
company_object: {
|
|
327
|
+
name: 'HubSpot', domain: 'hubspot.com',
|
|
328
|
+
technology_slugs: Array(1600).fill('x'), long_description: 'y'.repeat(3000),
|
|
329
|
+
},
|
|
330
|
+
}],
|
|
331
|
+
},
|
|
323
332
|
{ signal_type: 'hiring', companies: ['HubSpot'] },
|
|
324
|
-
) as { data: Array<Record<string, unknown
|
|
333
|
+
) as { data: Array<Record<string, Record<string, unknown>>> }
|
|
325
334
|
expect(filtered.data[0].description).toBeUndefined()
|
|
326
335
|
expect(filtered.data[0].description_html).toBeUndefined()
|
|
327
336
|
expect(filtered.data[0].title).toBe('AE')
|
|
337
|
+
expect(filtered.data[0].company_object.name).toBe('HubSpot')
|
|
338
|
+
expect(filtered.data[0].company_object.technology_slugs).toBeUndefined()
|
|
339
|
+
expect(filtered.data[0].company_object.long_description).toBeUndefined()
|
|
328
340
|
})
|
|
329
341
|
})
|
|
330
342
|
|
|
@@ -81,6 +81,27 @@ describe('theirstack-jobs', () => {
|
|
|
81
81
|
expect(out.data[0].company).toBe('ColdIQ')
|
|
82
82
|
})
|
|
83
83
|
|
|
84
|
+
it('postFilter slims the heavy company_object while keeping company identity', () => {
|
|
85
|
+
const resp = { data: [{
|
|
86
|
+
id: 1, title: 'Engineer',
|
|
87
|
+
company_object: {
|
|
88
|
+
name: 'HubSpot', domain: 'hubspot.com', industry: 'Software', employee_count: 12000,
|
|
89
|
+
technology_slugs: Array(1600).fill('x'), technology_names: Array(1600).fill('x'),
|
|
90
|
+
company_keywords: ['a', 'b'], company_tags: ['a'], keyword_slugs: ['a'],
|
|
91
|
+
long_description: 'y'.repeat(3000),
|
|
92
|
+
},
|
|
93
|
+
}] }
|
|
94
|
+
const out = p().postFilter!(resp, { title_keywords: ['engineer'] }) as { data: Array<Record<string, Record<string, unknown>>> }
|
|
95
|
+
const co = out.data[0].company_object
|
|
96
|
+
expect(co.name).toBe('HubSpot')
|
|
97
|
+
expect(co.domain).toBe('hubspot.com')
|
|
98
|
+
expect(co.industry).toBe('Software')
|
|
99
|
+
expect(co.employee_count).toBe(12000)
|
|
100
|
+
for (const k of ['technology_slugs', 'technology_names', 'company_keywords', 'company_tags', 'keyword_slugs', 'long_description']) {
|
|
101
|
+
expect(co[k]).toBeUndefined()
|
|
102
|
+
}
|
|
103
|
+
})
|
|
104
|
+
|
|
84
105
|
it('postFilter keeps descriptions when include_description is true', () => {
|
|
85
106
|
const resp = { data: [{ id: 1, title: 'Engineer', description: 'full text' }] }
|
|
86
107
|
const out = p().postFilter!(resp, { title_keywords: ['engineer'], include_description: true }) as { data: Array<Record<string, unknown>> }
|
|
@@ -43,8 +43,8 @@ describe('find_emails handler (bulk)', () => {
|
|
|
43
43
|
expect(parsed.data.found).toBe(2)
|
|
44
44
|
expect(parsed.data.total).toBe(2)
|
|
45
45
|
expect(parsed.data.results).toEqual([
|
|
46
|
-
{ id: 'p1', email: 'alice@example.com', provider: 'prospeo' },
|
|
47
|
-
{ id: 'p2', email: 'bob@example.com', provider: 'prospeo' },
|
|
46
|
+
{ id: 'p1', email: 'alice@example.com', provider: 'prospeo', provider_name: 'Prospeo' },
|
|
47
|
+
{ id: 'p2', email: 'bob@example.com', provider: 'prospeo', provider_name: 'Prospeo' },
|
|
48
48
|
])
|
|
49
49
|
})
|
|
50
50
|
|
|
@@ -77,7 +77,7 @@ describe('find_emails handler (bulk)', () => {
|
|
|
77
77
|
|
|
78
78
|
expect(fmCalled).toBe(true)
|
|
79
79
|
const parsed = JSON.parse(result.content[0].text)
|
|
80
|
-
expect(parsed.data.results[0]).toEqual({ id: 'p1', email: 'alice@example.com', provider: 'findymail' })
|
|
80
|
+
expect(parsed.data.results[0]).toEqual({ id: 'p1', email: 'alice@example.com', provider: 'findymail', provider_name: 'Findymail' })
|
|
81
81
|
})
|
|
82
82
|
|
|
83
83
|
it('falls back to IcyPeas when FindyMail also misses', async () => {
|
|
@@ -103,7 +103,7 @@ describe('find_emails handler (bulk)', () => {
|
|
|
103
103
|
})
|
|
104
104
|
|
|
105
105
|
const parsed = JSON.parse(result.content[0].text)
|
|
106
|
-
expect(parsed.data.results[0]).toEqual({ id: 'p1', email: 'alice@example.com', provider: 'icypeas' })
|
|
106
|
+
expect(parsed.data.results[0]).toEqual({ id: 'p1', email: 'alice@example.com', provider: 'icypeas', provider_name: 'Icypeas' })
|
|
107
107
|
})
|
|
108
108
|
|
|
109
109
|
it('returns email:null when all providers miss for a person', async () => {
|
|
@@ -117,7 +117,7 @@ describe('find_emails handler (bulk)', () => {
|
|
|
117
117
|
|
|
118
118
|
const parsed = JSON.parse(result.content[0].text)
|
|
119
119
|
expect(parsed.data.found).toBe(0)
|
|
120
|
-
expect(parsed.data.results[0]).toEqual({ id: 'p1', email: null, provider: null })
|
|
120
|
+
expect(parsed.data.results[0]).toEqual({ id: 'p1', email: null, provider: null, provider_name: null })
|
|
121
121
|
})
|
|
122
122
|
|
|
123
123
|
it('handles mixed results: some from prospeo, some from fallback, some missed', async () => {
|
|
@@ -241,7 +241,7 @@ describe('find_emails handler (bulk)', () => {
|
|
|
241
241
|
const body = feCreateBody as { data: Array<Record<string, unknown>> }
|
|
242
242
|
expect(body.data[0].custom_id).toBe('p1')
|
|
243
243
|
const parsed = JSON.parse(result.content[0].text)
|
|
244
|
-
expect(parsed.data.results[0]).toEqual({ id: 'p1', email: 'alice@example.com', provider: 'fullenrich' })
|
|
244
|
+
expect(parsed.data.results[0]).toEqual({ id: 'p1', email: 'alice@example.com', provider: 'fullenrich', provider_name: 'FullEnrich' })
|
|
245
245
|
})
|
|
246
246
|
|
|
247
247
|
it('parallel branches: faster FindyMail wins, slower FullEnrich does not overwrite', async () => {
|
|
@@ -284,7 +284,7 @@ describe('find_emails handler (bulk)', () => {
|
|
|
284
284
|
|
|
285
285
|
const parsed = JSON.parse(result.content[0].text)
|
|
286
286
|
// FindyMail won the race — its email + provider must persist
|
|
287
|
-
expect(parsed.data.results[0]).toEqual({ id: 'p1', email: 'fm@example.com', provider: 'findymail' })
|
|
287
|
+
expect(parsed.data.results[0]).toEqual({ id: 'p1', email: 'fm@example.com', provider: 'findymail', provider_name: 'Findymail' })
|
|
288
288
|
})
|
|
289
289
|
|
|
290
290
|
it('gracefully handles prospeo bulk failure and still tries fallbacks', async () => {
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import { providerDisplayName } from '../../src/utils/provider-display.js'
|
|
3
|
+
|
|
4
|
+
describe('providerDisplayName', () => {
|
|
5
|
+
it('maps known vendor ids to branded names', () => {
|
|
6
|
+
expect(providerDisplayName('fullenrich')).toBe('FullEnrich')
|
|
7
|
+
expect(providerDisplayName('theirstack')).toBe('TheirStack')
|
|
8
|
+
expect(providerDisplayName('pdl')).toBe('People Data Labs')
|
|
9
|
+
})
|
|
10
|
+
|
|
11
|
+
it('collapses vendor variants to one brand', () => {
|
|
12
|
+
expect(providerDisplayName('limadata-work-email')).toBe('LimaData')
|
|
13
|
+
expect(providerDisplayName('limadata-prospect-url')).toBe('LimaData')
|
|
14
|
+
expect(providerDisplayName('signalbase-funding')).toBe('SignalBase')
|
|
15
|
+
expect(providerDisplayName('linkupapi-by-domain')).toBe('LinkupAPI')
|
|
16
|
+
})
|
|
17
|
+
|
|
18
|
+
it('brands Apify-backed scrapers as their data source, never raw slug', () => {
|
|
19
|
+
expect(providerDisplayName('reddit_ads')).toBe('Reddit Ads')
|
|
20
|
+
expect(providerDisplayName('linkedin_ad_library')).toBe('LinkedIn Ad Library')
|
|
21
|
+
expect(providerDisplayName('google_maps')).toBe('Google Maps')
|
|
22
|
+
})
|
|
23
|
+
|
|
24
|
+
it('title-cases unmapped ids as a defensive fallback', () => {
|
|
25
|
+
expect(providerDisplayName('some-new-vendor')).toBe('Some New Vendor')
|
|
26
|
+
expect(providerDisplayName('good')).toBe('Good')
|
|
27
|
+
})
|
|
28
|
+
|
|
29
|
+
it('never returns a raw slug or the word "Apify"', () => {
|
|
30
|
+
const ids = [
|
|
31
|
+
'limadata-work-email', 'reddit_ads', 'fullenrich', 'ai-ark-people',
|
|
32
|
+
'linkupapi-validate', 'theirstack-buying-intents', 'completely_unknown_slug',
|
|
33
|
+
]
|
|
34
|
+
for (const id of ids) {
|
|
35
|
+
const name = providerDisplayName(id)
|
|
36
|
+
expect(name).not.toContain('_')
|
|
37
|
+
expect(name.toLowerCase()).not.toContain('apify')
|
|
38
|
+
}
|
|
39
|
+
})
|
|
40
|
+
})
|