@coldiq/mcp 0.3.4 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/executor.d.ts +6 -0
- package/dist/executor.d.ts.map +1 -1
- package/dist/executor.js +18 -1
- package/dist/executor.js.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/tools/find-emails.d.ts.map +1 -1
- package/dist/tools/find-emails.js +46 -1
- package/dist/tools/find-emails.js.map +1 -1
- package/dist/tools/list-data-sources.d.ts +20 -0
- package/dist/tools/list-data-sources.d.ts.map +1 -0
- package/dist/tools/list-data-sources.js +86 -0
- package/dist/tools/list-data-sources.js.map +1 -0
- package/dist/utils/provider-display.d.ts +6 -0
- package/dist/utils/provider-display.d.ts.map +1 -0
- package/dist/utils/provider-display.js +140 -0
- package/dist/utils/provider-display.js.map +1 -0
- package/dist/utils/selection-insight.d.ts +26 -0
- package/dist/utils/selection-insight.d.ts.map +1 -0
- package/dist/utils/selection-insight.js +303 -0
- package/dist/utils/selection-insight.js.map +1 -0
- package/package.json +1 -1
- package/src/executor.ts +23 -1
- package/src/index.ts +8 -0
- package/src/tools/find-emails.ts +43 -1
- package/src/tools/list-data-sources.ts +110 -0
- package/src/utils/provider-display.ts +150 -0
- package/src/utils/selection-insight.ts +359 -0
- package/tests/executor.test.ts +77 -0
- package/tests/tools/find-emails.test.ts +7 -7
- package/tests/tools/list-data-sources.test.ts +86 -0
- package/tests/utils/provider-display.test.ts +40 -0
- package/tests/utils/selection-insight.test.ts +114 -0
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// Provider display names
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
//
|
|
5
|
+
// Internal provider IDs (registry.ts + the find_emails waterfall) are a mix of
|
|
6
|
+
// clean vendor names ("apollo"), already-branded slugs ("reddit_ads"), and ugly
|
|
7
|
+
// internal slugs ("limadata-work-email"). User-facing surfaces — the selection
|
|
8
|
+
// insight and `_meta.provider_name` — must show a clean branded label instead.
|
|
9
|
+
//
|
|
10
|
+
// Naming the real data product (FullEnrich, TheirStack, …) is intentional: it IS
|
|
11
|
+
// the ColdIQ "we picked the best tool" intelligence on display. The ONLY thing
|
|
12
|
+
// that must never surface is the Apify backend — and since no ID contains the
|
|
13
|
+
// word "Apify", that constraint is satisfied by mapping the Apify-backed scrapers
|
|
14
|
+
// to their branded data-source name (e.g. "Reddit Ads").
|
|
15
|
+
// ---------------------------------------------------------------------------
|
|
16
|
+
|
|
17
|
+
const DISPLAY_NAMES: Record<string, string> = {
|
|
18
|
+
// --- B2B data vendors (shared across capabilities) -----------------------
|
|
19
|
+
companyenrich: 'CompanyEnrich',
|
|
20
|
+
companyenrich_props: 'CompanyEnrich',
|
|
21
|
+
apollo: 'Apollo',
|
|
22
|
+
'apollo-people-match': 'Apollo',
|
|
23
|
+
fullenrich: 'FullEnrich',
|
|
24
|
+
'fullenrich-people-search': 'FullEnrich',
|
|
25
|
+
pdl: 'People Data Labs',
|
|
26
|
+
'pdl-person-enrich': 'People Data Labs',
|
|
27
|
+
'pdl-person-identify': 'People Data Labs',
|
|
28
|
+
signalbase: 'SignalBase',
|
|
29
|
+
'signalbase-funding': 'SignalBase',
|
|
30
|
+
'signalbase-acquisition': 'SignalBase',
|
|
31
|
+
'signalbase-hiring': 'SignalBase',
|
|
32
|
+
'signalbase-job-change': 'SignalBase',
|
|
33
|
+
blitzapi: 'BlitzAPI',
|
|
34
|
+
'blitzapi-reverse-email': 'BlitzAPI',
|
|
35
|
+
limadata: 'LimaData',
|
|
36
|
+
'limadata-prospect-filter': 'LimaData',
|
|
37
|
+
'limadata-prospect-url': 'LimaData',
|
|
38
|
+
'limadata-work-email': 'LimaData',
|
|
39
|
+
'limadata-work-email-linkedin': 'LimaData',
|
|
40
|
+
predictleads: 'PredictLeads',
|
|
41
|
+
'predictleads-financing': 'PredictLeads',
|
|
42
|
+
'predictleads-news': 'PredictLeads',
|
|
43
|
+
'predictleads-startup-posts': 'PredictLeads',
|
|
44
|
+
theirstack: 'TheirStack',
|
|
45
|
+
'theirstack-jobs': 'TheirStack',
|
|
46
|
+
'theirstack-hiring': 'TheirStack',
|
|
47
|
+
'theirstack-intent-discovery': 'TheirStack',
|
|
48
|
+
'theirstack-buying-intents': 'TheirStack',
|
|
49
|
+
sumble: 'Sumble',
|
|
50
|
+
'sumble-people-find': 'Sumble',
|
|
51
|
+
prospeo: 'Prospeo',
|
|
52
|
+
'prospeo-search-company': 'Prospeo',
|
|
53
|
+
'prospeo-search-person': 'Prospeo',
|
|
54
|
+
'ai-ark': 'AI-ARK',
|
|
55
|
+
'ai-ark-companies': 'AI-ARK',
|
|
56
|
+
'ai-ark-people': 'AI-ARK',
|
|
57
|
+
'ai-ark-reverse-lookup': 'AI-ARK',
|
|
58
|
+
leadsfactory: 'LeadsFactory',
|
|
59
|
+
findymail: 'Findymail',
|
|
60
|
+
'findymail-search-employees': 'Findymail',
|
|
61
|
+
'findymail-business-profile': 'Findymail',
|
|
62
|
+
'findymail-reverse-email': 'Findymail',
|
|
63
|
+
icypeas: 'Icypeas',
|
|
64
|
+
'icypeas-scrape-profile': 'Icypeas',
|
|
65
|
+
'icypeas-url-search-profile': 'Icypeas',
|
|
66
|
+
'icypeas-reverse-email-lookup': 'Icypeas',
|
|
67
|
+
wiza: 'Wiza',
|
|
68
|
+
builtwith: 'BuiltWith',
|
|
69
|
+
openmart: 'Openmart',
|
|
70
|
+
instantly: 'Instantly',
|
|
71
|
+
|
|
72
|
+
// --- LinkupAPI (LinkedIn data) -------------------------------------------
|
|
73
|
+
linkupapi: 'LinkupAPI',
|
|
74
|
+
'linkupapi-search': 'LinkupAPI',
|
|
75
|
+
'linkupapi-fundraising': 'LinkupAPI',
|
|
76
|
+
'linkupapi-hiring': 'LinkupAPI',
|
|
77
|
+
'linkupapi-search-profiles': 'LinkupAPI',
|
|
78
|
+
'linkupapi-by-domain': 'LinkupAPI',
|
|
79
|
+
'linkupapi-by-url': 'LinkupAPI',
|
|
80
|
+
'linkupapi-profile-enrich': 'LinkupAPI',
|
|
81
|
+
'linkupapi-email-reverse': 'LinkupAPI',
|
|
82
|
+
'linkupapi-validate': 'LinkupAPI',
|
|
83
|
+
|
|
84
|
+
// --- search_web ----------------------------------------------------------
|
|
85
|
+
serper: 'Serper',
|
|
86
|
+
exa: 'Exa',
|
|
87
|
+
'exa-contents': 'Exa',
|
|
88
|
+
jina: 'Jina',
|
|
89
|
+
|
|
90
|
+
// --- search_jobs ---------------------------------------------------------
|
|
91
|
+
career_site_jobs: 'Career Site Jobs',
|
|
92
|
+
linkedin_jobs_api: 'LinkedIn Jobs',
|
|
93
|
+
|
|
94
|
+
// --- search_ads ----------------------------------------------------------
|
|
95
|
+
google_ads: 'Google Ads',
|
|
96
|
+
linkedin_ad_library: 'LinkedIn Ad Library',
|
|
97
|
+
meta_ads: 'Meta Ads',
|
|
98
|
+
twitter_ads: 'X Ads',
|
|
99
|
+
reddit_ads: 'Reddit Ads',
|
|
100
|
+
|
|
101
|
+
// --- search_places / reviews ---------------------------------------------
|
|
102
|
+
google_maps: 'Google Maps',
|
|
103
|
+
google_maps_reviews: 'Google Maps',
|
|
104
|
+
|
|
105
|
+
// --- find_influencers ----------------------------------------------------
|
|
106
|
+
influencers_similar: 'Influencer Discovery',
|
|
107
|
+
influencers_discovery: 'Influencer Discovery',
|
|
108
|
+
|
|
109
|
+
// --- search_reddit -------------------------------------------------------
|
|
110
|
+
reddit: 'Reddit',
|
|
111
|
+
|
|
112
|
+
// --- search_seo (functional sub-actions) ---------------------------------
|
|
113
|
+
kw_search_volume: 'Keyword Search Volume',
|
|
114
|
+
kw_trends: 'Keyword Trends',
|
|
115
|
+
serp_google: 'Google SERP',
|
|
116
|
+
serp_bing: 'Bing SERP',
|
|
117
|
+
serp_youtube: 'YouTube SERP',
|
|
118
|
+
bl_summary: 'Backlink Summary',
|
|
119
|
+
bl_backlinks: 'Backlinks',
|
|
120
|
+
bl_referring: 'Referring Domains',
|
|
121
|
+
domain_tech: 'Domain Technologies',
|
|
122
|
+
domain_whois: 'Domain WHOIS',
|
|
123
|
+
labs_rank_overview: 'Rank Overview',
|
|
124
|
+
labs_ranked_kw: 'Ranked Keywords',
|
|
125
|
+
labs_competitors: 'Competitor Domains',
|
|
126
|
+
labs_kw_ideas: 'Keyword Ideas',
|
|
127
|
+
page_lighthouse: 'Lighthouse Audit',
|
|
128
|
+
page_content: 'Page Content',
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Title-case an unmapped provider ID as a defensive fallback so a newly added
|
|
133
|
+
* provider still renders acceptably (and never leaks a raw slug). E.g.
|
|
134
|
+
* "some-new-vendor" → "Some New Vendor".
|
|
135
|
+
*/
|
|
136
|
+
function titleCaseId(id: string): string {
|
|
137
|
+
return id
|
|
138
|
+
.split(/[-_]/)
|
|
139
|
+
.filter(Boolean)
|
|
140
|
+
.map((word) => word.charAt(0).toUpperCase() + word.slice(1))
|
|
141
|
+
.join(' ')
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Map an internal provider ID to a clean, branded display name. Falls back to a
|
|
146
|
+
* title-cased form of the ID when unmapped. Never returns "Apify".
|
|
147
|
+
*/
|
|
148
|
+
export function providerDisplayName(id: string): string {
|
|
149
|
+
return DISPLAY_NAMES[id] ?? titleCaseId(id)
|
|
150
|
+
}
|
|
@@ -0,0 +1,359 @@
|
|
|
1
|
+
import type { Capability } from '../registry.js'
|
|
2
|
+
import { providerDisplayName } from './provider-display.js'
|
|
3
|
+
|
|
4
|
+
// ---------------------------------------------------------------------------
|
|
5
|
+
// Selection insight — a short, human-readable, DATA-QUALITY-framed reason for
|
|
6
|
+
// why ColdIQ routed a request to a given provider. Surfaced in `_meta` so the
|
|
7
|
+
// chat layer can show the "we picked the best tool" intelligence.
|
|
8
|
+
//
|
|
9
|
+
// Rules baked in here (see CLAUDE.md + the plan):
|
|
10
|
+
// • Data-quality reasons ONLY — coverage / specialization / freshness / region.
|
|
11
|
+
// NEVER price, never "tried first", never waterfall position.
|
|
12
|
+
// • Honest about the two ways a provider wins:
|
|
13
|
+
// - capability routing (others gated out) → confident "Routed to X for …"
|
|
14
|
+
// - fallthrough (others returned nothing) → softened "X returned the match …"
|
|
15
|
+
// • Never fabricate a strength: when a provider has no curated edge we emit a
|
|
16
|
+
// plain, true line ("X matched this …") instead of inventing specialization.
|
|
17
|
+
// • Display names only (no raw slugs, no "Apify") — via providerDisplayName.
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
|
|
20
|
+
export type InsightCapability = Capability | 'find_emails'
|
|
21
|
+
|
|
22
|
+
export interface SelectionInsight {
|
|
23
|
+
insight: string
|
|
24
|
+
signals: string[]
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export interface InsightContext {
|
|
28
|
+
/** True when the chosen provider won only because higher-ranked providers
|
|
29
|
+
* returned no rows (not because it was the best-suited for the inputs). */
|
|
30
|
+
wasFallback: boolean
|
|
31
|
+
/** True when the caller pinned this provider via `use_providers`. */
|
|
32
|
+
pinnedByUser: boolean
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// ---------------------------------------------------------------------------
|
|
36
|
+
// Curated data-quality strengths, keyed by capability → providerId.
|
|
37
|
+
// Each value is a noun phrase that slots into "… for its <strength>". Phrasing
|
|
38
|
+
// is descriptive (deep / strong / broad / specialized), NOT superlative, so it
|
|
39
|
+
// stays defensible on the fallthrough path too.
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
|
|
42
|
+
const STRENGTHS: Partial<Record<InsightCapability, Record<string, string>>> = {
|
|
43
|
+
search_companies: {
|
|
44
|
+
companyenrich: 'broad firmographic coverage with full-text company matching',
|
|
45
|
+
apollo: 'an extensive global company database',
|
|
46
|
+
fullenrich: 'rich firmographic data with strong international coverage',
|
|
47
|
+
pdl: 'a large, structured global company dataset',
|
|
48
|
+
signalbase: 'fast firmographic search across keywords and industries',
|
|
49
|
+
blitzapi: 'LinkedIn-sourced company data',
|
|
50
|
+
limadata: 'LinkedIn-based company discovery',
|
|
51
|
+
predictleads: 'discovery scoped tightly by location and company size',
|
|
52
|
+
theirstack: 'deep technographic and buying-intent coverage',
|
|
53
|
+
sumble: 'technology- and keyword-driven company discovery',
|
|
54
|
+
'limadata-prospect-filter': 'LinkedIn headcount-bucketed prospecting',
|
|
55
|
+
'limadata-prospect-url': 'discovery straight from a LinkedIn Sales Navigator search',
|
|
56
|
+
'linkupapi-search': 'live LinkedIn company search',
|
|
57
|
+
'linkupapi-fundraising': 'a dedicated index of recently-funded companies',
|
|
58
|
+
'linkupapi-hiring': 'a dedicated index of actively-hiring companies',
|
|
59
|
+
'prospeo-search-company': 'keyword, industry and geo company search',
|
|
60
|
+
'ai-ark-companies': 'multi-filter company discovery',
|
|
61
|
+
},
|
|
62
|
+
find_people: {
|
|
63
|
+
leadsfactory: 'company-scoped contact discovery by persona',
|
|
64
|
+
apollo: 'an extensive global contact database',
|
|
65
|
+
pdl: 'a large, structured global people dataset',
|
|
66
|
+
companyenrich: 'domain-scoped employee lookups',
|
|
67
|
+
'linkupapi-search-profiles': 'live LinkedIn profile search',
|
|
68
|
+
'sumble-people-find': 'org-scoped people search by job function',
|
|
69
|
+
'prospeo-search-person': 'title- and company-based people search',
|
|
70
|
+
'ai-ark-people': 'multi-filter people discovery',
|
|
71
|
+
'fullenrich-people-search': 'domain-scoped people search',
|
|
72
|
+
'findymail-search-employees': 'domain-scoped employee discovery',
|
|
73
|
+
},
|
|
74
|
+
find_email: {
|
|
75
|
+
prospeo: 'strong work-email coverage',
|
|
76
|
+
fullenrich: 'strong international email coverage, especially across Europe',
|
|
77
|
+
findymail: 'high-accuracy verified work emails',
|
|
78
|
+
icypeas: 'broad email-finding coverage',
|
|
79
|
+
'limadata-work-email': 'name-and-domain work-email lookup',
|
|
80
|
+
blitzapi: 'LinkedIn-URL–based email finding',
|
|
81
|
+
'limadata-work-email-linkedin': 'LinkedIn-URL–based work-email lookup',
|
|
82
|
+
linkupapi: 'LinkedIn-sourced email finding',
|
|
83
|
+
},
|
|
84
|
+
find_emails: {
|
|
85
|
+
prospeo: 'strong work-email coverage',
|
|
86
|
+
fullenrich: 'strong international email coverage, especially across Europe',
|
|
87
|
+
findymail: 'high-accuracy verified work emails',
|
|
88
|
+
icypeas: 'broad email-finding coverage',
|
|
89
|
+
'limadata-work-email': 'name-and-domain work-email lookup',
|
|
90
|
+
blitzapi: 'LinkedIn-URL–based email finding',
|
|
91
|
+
'limadata-work-email-linkedin': 'LinkedIn-URL–based work-email lookup',
|
|
92
|
+
linkupapi: 'LinkedIn-sourced email finding',
|
|
93
|
+
},
|
|
94
|
+
verify_email: {
|
|
95
|
+
findymail: 'reliable deliverability verification',
|
|
96
|
+
icypeas: 'broad email verification',
|
|
97
|
+
instantly: 'deliverability-focused verification',
|
|
98
|
+
'linkupapi-validate': 'LinkedIn-aware email validation',
|
|
99
|
+
},
|
|
100
|
+
find_phone: {
|
|
101
|
+
findymail: 'verified mobile-number coverage',
|
|
102
|
+
limadata: 'phone lookup by LinkedIn URL or name + company',
|
|
103
|
+
'ai-ark': 'phone lookup across multiple identifiers',
|
|
104
|
+
},
|
|
105
|
+
enrich_company: {
|
|
106
|
+
companyenrich: 'broad firmographic enrichment from a domain',
|
|
107
|
+
apollo: 'deep firmographic enrichment',
|
|
108
|
+
pdl: 'structured firmographic data',
|
|
109
|
+
findymail: 'domain-based company enrichment',
|
|
110
|
+
wiza: 'company enrichment from name or domain',
|
|
111
|
+
limadata: 'LinkedIn-based company enrichment',
|
|
112
|
+
prospeo: 'company enrichment',
|
|
113
|
+
companyenrich_props: 'company enrichment from name or LinkedIn',
|
|
114
|
+
blitzapi: 'LinkedIn-URL company enrichment',
|
|
115
|
+
icypeas: 'LinkedIn-URL company enrichment',
|
|
116
|
+
builtwith: 'technology-stack detection for a domain',
|
|
117
|
+
openmart: 'local-business firmographics',
|
|
118
|
+
'linkupapi-by-domain': 'live LinkedIn company data',
|
|
119
|
+
'linkupapi-by-url': 'live LinkedIn company data',
|
|
120
|
+
},
|
|
121
|
+
enrich_person: {
|
|
122
|
+
'linkupapi-profile-enrich': 'live LinkedIn profile enrichment',
|
|
123
|
+
'linkupapi-email-reverse': 'reverse-email lookup from LinkedIn data',
|
|
124
|
+
'pdl-person-enrich': 'structured person enrichment',
|
|
125
|
+
'apollo-people-match': 'deep person enrichment',
|
|
126
|
+
'blitzapi-reverse-email': 'reverse-email profile lookup',
|
|
127
|
+
'findymail-business-profile': 'LinkedIn profile enrichment',
|
|
128
|
+
'findymail-reverse-email': 'reverse-email lookup',
|
|
129
|
+
'icypeas-scrape-profile': 'LinkedIn profile scraping',
|
|
130
|
+
'icypeas-url-search-profile': 'profile lookup by name and company',
|
|
131
|
+
'ai-ark-reverse-lookup': 'reverse lookup from email or phone',
|
|
132
|
+
'icypeas-reverse-email-lookup': 'reverse-email lookup',
|
|
133
|
+
'pdl-person-identify': 'person identity resolution',
|
|
134
|
+
},
|
|
135
|
+
search_web: {
|
|
136
|
+
serper: 'fast Google search results',
|
|
137
|
+
exa: 'neural, meaning-based web search',
|
|
138
|
+
limadata: 'general web search',
|
|
139
|
+
jina: 'web search and page reading',
|
|
140
|
+
},
|
|
141
|
+
search_jobs: {
|
|
142
|
+
career_site_jobs: 'jobs sourced directly from company career sites',
|
|
143
|
+
linkedin_jobs_api: 'LinkedIn job listings',
|
|
144
|
+
'theirstack-jobs': 'jobs enriched with company and tech-stack data',
|
|
145
|
+
},
|
|
146
|
+
search_ads: {
|
|
147
|
+
google_ads: 'Google ad-transparency data',
|
|
148
|
+
linkedin_ad_library: 'LinkedIn Ad Library coverage',
|
|
149
|
+
meta_ads: 'Meta (Facebook/Instagram) ad-library coverage',
|
|
150
|
+
twitter_ads: 'X ad coverage',
|
|
151
|
+
reddit_ads: 'Reddit ad coverage',
|
|
152
|
+
},
|
|
153
|
+
search_places: {
|
|
154
|
+
openmart: 'rich local-business data across the US, CA, AU, PR and NZ',
|
|
155
|
+
google_maps: 'broad global places coverage from Google Maps',
|
|
156
|
+
},
|
|
157
|
+
get_place_reviews: {
|
|
158
|
+
google_maps_reviews: 'Google Maps review data',
|
|
159
|
+
},
|
|
160
|
+
find_influencers: {
|
|
161
|
+
influencers_similar: 'lookalike creator discovery from a seed handle',
|
|
162
|
+
influencers_discovery: 'creator discovery by topic and audience',
|
|
163
|
+
},
|
|
164
|
+
search_reddit: {
|
|
165
|
+
reddit: 'Reddit post and comment search',
|
|
166
|
+
},
|
|
167
|
+
find_signals: {
|
|
168
|
+
'signalbase-funding': 'real-time funding-round signals',
|
|
169
|
+
'signalbase-acquisition': 'acquisition signals',
|
|
170
|
+
'signalbase-hiring': 'hiring signals',
|
|
171
|
+
'signalbase-job-change': 'job-change signals',
|
|
172
|
+
'theirstack-hiring': 'hiring signals from job-posting data',
|
|
173
|
+
'theirstack-intent-discovery': 'buying-intent discovery',
|
|
174
|
+
'theirstack-buying-intents': 'buying-intent signals from tech and job data',
|
|
175
|
+
'predictleads-financing': 'financing-event signals',
|
|
176
|
+
'predictleads-news': 'company-news signals',
|
|
177
|
+
'predictleads-startup-posts': 'startup-announcement signals',
|
|
178
|
+
},
|
|
179
|
+
fetch_page_content: {
|
|
180
|
+
'exa-contents': 'clean page-content extraction',
|
|
181
|
+
},
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Read the curated data-quality strength for a provider within a capability,
|
|
186
|
+
* if one exists. Exposed for the `list_data_sources` catalog so the strengths
|
|
187
|
+
* stay defined in one place.
|
|
188
|
+
*/
|
|
189
|
+
export function getProviderStrength(
|
|
190
|
+
capability: InsightCapability,
|
|
191
|
+
providerId: string,
|
|
192
|
+
): string | undefined {
|
|
193
|
+
return STRENGTHS[capability]?.[providerId]
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// ---------------------------------------------------------------------------
|
|
197
|
+
// Per-capability noun used in the plain fallback line ("X matched this <noun>").
|
|
198
|
+
// ---------------------------------------------------------------------------
|
|
199
|
+
|
|
200
|
+
const CAPABILITY_NOUN: Record<InsightCapability, string> = {
|
|
201
|
+
search_companies: 'company search',
|
|
202
|
+
find_people: 'people search',
|
|
203
|
+
find_email: 'email lookup',
|
|
204
|
+
find_emails: 'email lookup',
|
|
205
|
+
verify_email: 'email verification',
|
|
206
|
+
find_phone: 'phone lookup',
|
|
207
|
+
enrich_company: 'company enrichment',
|
|
208
|
+
enrich_person: 'person enrichment',
|
|
209
|
+
search_web: 'web search',
|
|
210
|
+
search_jobs: 'job search',
|
|
211
|
+
search_ads: 'ad search',
|
|
212
|
+
search_places: 'places search',
|
|
213
|
+
get_place_reviews: 'reviews lookup',
|
|
214
|
+
find_influencers: 'influencer search',
|
|
215
|
+
search_reddit: 'Reddit search',
|
|
216
|
+
search_seo: 'SEO lookup',
|
|
217
|
+
find_signals: 'signal search',
|
|
218
|
+
fetch_page_content: 'page fetch',
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// ---------------------------------------------------------------------------
|
|
222
|
+
// Signal detection — which input dimensions drove routing, in human terms.
|
|
223
|
+
// Ordered most-distinctive-first so signals[0] is the routing-relevant one.
|
|
224
|
+
// ---------------------------------------------------------------------------
|
|
225
|
+
|
|
226
|
+
function arr(v: unknown): boolean {
|
|
227
|
+
return Array.isArray(v) && v.length > 0
|
|
228
|
+
}
|
|
229
|
+
function num(v: unknown): boolean {
|
|
230
|
+
return typeof v === 'number'
|
|
231
|
+
}
|
|
232
|
+
function str(v: unknown): boolean {
|
|
233
|
+
return typeof v === 'string' && v.length > 0
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
function detectSignals(capability: InsightCapability, input: Record<string, unknown>): string[] {
|
|
237
|
+
const out: string[] = []
|
|
238
|
+
const push = (cond: boolean, label: string) => {
|
|
239
|
+
if (cond && !out.includes(label)) out.push(label)
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
switch (capability) {
|
|
243
|
+
case 'search_companies':
|
|
244
|
+
push(arr(input.technologies), 'tech-stack filter')
|
|
245
|
+
push(
|
|
246
|
+
arr(input.funding_stages) ||
|
|
247
|
+
num(input.min_funding_amount) || num(input.max_funding_amount) ||
|
|
248
|
+
num(input.min_funding_year) || num(input.max_funding_year),
|
|
249
|
+
'funding filter',
|
|
250
|
+
)
|
|
251
|
+
push(num(input.min_revenue) || num(input.max_revenue), 'revenue filter')
|
|
252
|
+
push(arr(input.exclude_domains) || arr(input.exclude_industries) || arr(input.exclude_countries), 'exclusion list')
|
|
253
|
+
push(num(input.min_workforce_growth_pct), 'workforce-growth filter')
|
|
254
|
+
push(input.is_hiring === true, 'hiring signal')
|
|
255
|
+
push(num(input.min_founded_year) || num(input.max_founded_year), 'founding-year filter')
|
|
256
|
+
push(num(input.min_employees) || num(input.max_employees), 'company-size filter')
|
|
257
|
+
push(str(input.linkedin_search_url), 'LinkedIn Sales Navigator URL')
|
|
258
|
+
push(arr(input.industries), 'industry filter')
|
|
259
|
+
push(arr(input.keywords), 'keyword search')
|
|
260
|
+
push(arr(input.countries) || arr(input.locations), 'geo filter')
|
|
261
|
+
break
|
|
262
|
+
case 'find_people':
|
|
263
|
+
push(arr(input.company_domains), 'company-domain filter')
|
|
264
|
+
push(arr(input.company_linkedin_urls), 'company LinkedIn filter')
|
|
265
|
+
push(arr(input.job_titles), 'job-title filter')
|
|
266
|
+
push(arr(input.seniorities), 'seniority filter')
|
|
267
|
+
push(arr(input.keywords), 'keyword search')
|
|
268
|
+
push(arr(input.locations), 'geo filter')
|
|
269
|
+
break
|
|
270
|
+
case 'find_email':
|
|
271
|
+
push(str(input.linkedin_url), 'LinkedIn URL')
|
|
272
|
+
push(str(input.domain) || str(input.company_name), 'name + company')
|
|
273
|
+
break
|
|
274
|
+
case 'find_emails': {
|
|
275
|
+
const people = (input.people as Array<Record<string, unknown>> | undefined) ?? []
|
|
276
|
+
push(people.some((p) => str(p.linkedin_url)), 'LinkedIn URLs')
|
|
277
|
+
push(people.some((p) => str(p.domain)), 'name + domain')
|
|
278
|
+
break
|
|
279
|
+
}
|
|
280
|
+
case 'enrich_company':
|
|
281
|
+
push(str(input.domain), 'company domain')
|
|
282
|
+
push(str(input.linkedin_url), 'LinkedIn URL')
|
|
283
|
+
push(str(input.name), 'company name')
|
|
284
|
+
break
|
|
285
|
+
case 'enrich_person':
|
|
286
|
+
push(str(input.email), 'email')
|
|
287
|
+
push(str(input.linkedin_url), 'LinkedIn URL')
|
|
288
|
+
push(str(input.first_name) && str(input.last_name), 'name + company')
|
|
289
|
+
break
|
|
290
|
+
case 'find_signals':
|
|
291
|
+
if (str(input.signal_type)) out.push(`${String(input.signal_type)} signals`)
|
|
292
|
+
push(arr(input.companies) || arr(input.domains), 'company filter')
|
|
293
|
+
break
|
|
294
|
+
case 'search_jobs':
|
|
295
|
+
push(arr(input.technologies), 'tech-stack filter')
|
|
296
|
+
push(arr(input.company_domains), 'company-domain filter')
|
|
297
|
+
push(arr(input.job_titles) || arr(input.keywords), 'role filter')
|
|
298
|
+
push(arr(input.locations), 'geo filter')
|
|
299
|
+
break
|
|
300
|
+
case 'search_ads':
|
|
301
|
+
push(arr(input.search_urls), 'LinkedIn Ad Library URL')
|
|
302
|
+
push(arr(input.domains) || arr(input.advertiser_ids), 'advertiser filter')
|
|
303
|
+
push(str(input.query), 'keyword search')
|
|
304
|
+
break
|
|
305
|
+
default:
|
|
306
|
+
// Other capabilities: no curated signal vocabulary — routing is single-source
|
|
307
|
+
// or non-discriminating, so leave signals empty and rely on the base strength.
|
|
308
|
+
break
|
|
309
|
+
}
|
|
310
|
+
return out
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
// ---------------------------------------------------------------------------
|
|
314
|
+
// Builder
|
|
315
|
+
// ---------------------------------------------------------------------------
|
|
316
|
+
|
|
317
|
+
/**
|
|
318
|
+
* Build a data-quality-framed reason for why `providerId` was selected for
|
|
319
|
+
* `capability` given `input`. Returns undefined only when there is genuinely
|
|
320
|
+
* nothing safe to say.
|
|
321
|
+
*/
|
|
322
|
+
export function buildSelectionInsight(
|
|
323
|
+
capability: InsightCapability,
|
|
324
|
+
providerId: string,
|
|
325
|
+
input: Record<string, unknown>,
|
|
326
|
+
ctx: InsightContext,
|
|
327
|
+
): SelectionInsight | undefined {
|
|
328
|
+
const name = providerDisplayName(providerId)
|
|
329
|
+
const signals = detectSignals(capability, input)
|
|
330
|
+
|
|
331
|
+
// User pinned this provider — it wasn't our choice, so don't claim it was.
|
|
332
|
+
if (ctx.pinnedByUser) {
|
|
333
|
+
return { insight: `Used ${name} as requested.`, signals }
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
const strength = STRENGTHS[capability]?.[providerId]
|
|
337
|
+
const noun = CAPABILITY_NOUN[capability] ?? 'request'
|
|
338
|
+
const primary = signals[0]
|
|
339
|
+
|
|
340
|
+
// No curated edge for this provider — emit a plain, true line, never invent one.
|
|
341
|
+
if (!strength) {
|
|
342
|
+
const insight = primary
|
|
343
|
+
? `${name} matched your ${primary} for this ${noun}.`
|
|
344
|
+
: `${name} matched this ${noun}.`
|
|
345
|
+
return { insight, signals }
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
// Fallthrough: higher-ranked providers returned nothing. Soften — state the
|
|
349
|
+
// coverage fact without implying we judged it the single best fit upfront.
|
|
350
|
+
if (ctx.wasFallback) {
|
|
351
|
+
return { insight: `${name} returned the match here, with ${strength}.`, signals }
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
// Capability routing: chosen because it is best-suited to these inputs.
|
|
355
|
+
const insight = primary
|
|
356
|
+
? `Routed to ${name} for its ${strength}, matched to your ${primary}.`
|
|
357
|
+
: `Routed to ${name} for its ${strength}.`
|
|
358
|
+
return { insight, signals }
|
|
359
|
+
}
|
package/tests/executor.test.ts
CHANGED
|
@@ -954,3 +954,80 @@ describe('executor upstream_error propagation', () => {
|
|
|
954
954
|
}
|
|
955
955
|
})
|
|
956
956
|
})
|
|
957
|
+
|
|
958
|
+
// ---------------------------------------------------------------------------
|
|
959
|
+
// Selection insight + branded provider name in _meta
|
|
960
|
+
// ---------------------------------------------------------------------------
|
|
961
|
+
|
|
962
|
+
describe('executor selection insight _meta', () => {
|
|
963
|
+
const originalFetch = globalThis.fetch
|
|
964
|
+
|
|
965
|
+
beforeEach(() => {
|
|
966
|
+
initClient('http://test-api.local', 'test-key-123')
|
|
967
|
+
})
|
|
968
|
+
|
|
969
|
+
afterEach(() => {
|
|
970
|
+
globalThis.fetch = originalFetch
|
|
971
|
+
})
|
|
972
|
+
|
|
973
|
+
it('attaches branded provider_name + confident insight when first applicable provider wins', async () => {
|
|
974
|
+
stubProviders([
|
|
975
|
+
makeProvider({ id: 'theirstack', hasResult: (d) => (d as Record<string, unknown>).ok === true }),
|
|
976
|
+
])
|
|
977
|
+
globalThis.fetch = vi.fn(async () =>
|
|
978
|
+
new Response(JSON.stringify({ ok: true }), { status: 200 }),
|
|
979
|
+
) as typeof fetch
|
|
980
|
+
|
|
981
|
+
const result = await executeWithFallback('search_companies', { technologies: ['Salesforce'] })
|
|
982
|
+
|
|
983
|
+
expect('data' in result).toBe(true)
|
|
984
|
+
if ('data' in result) {
|
|
985
|
+
expect(result._meta.provider).toBe('theirstack')
|
|
986
|
+
expect(result._meta.provider_name).toBe('TheirStack')
|
|
987
|
+
expect(result._meta.selection_insight).toContain('Routed to TheirStack')
|
|
988
|
+
expect(result._meta.selection_insight).toContain('tech-stack filter')
|
|
989
|
+
expect(result._meta.selection_signals).toContain('tech-stack filter')
|
|
990
|
+
}
|
|
991
|
+
})
|
|
992
|
+
|
|
993
|
+
it('softens the insight when the winner is reached only after a provider fails', async () => {
|
|
994
|
+
stubProviders([
|
|
995
|
+
makeProvider({ id: 'companyenrich', hasResult: () => false }),
|
|
996
|
+
makeProvider({ id: 'fullenrich', hasResult: (d) => (d as Record<string, unknown>).ok === true }),
|
|
997
|
+
])
|
|
998
|
+
let n = 0
|
|
999
|
+
globalThis.fetch = vi.fn(async () => {
|
|
1000
|
+
n++
|
|
1001
|
+
return new Response(JSON.stringify({ ok: n > 1 }), { status: 200 })
|
|
1002
|
+
}) as typeof fetch
|
|
1003
|
+
|
|
1004
|
+
const result = await executeWithFallback('search_companies', { keywords: ['fintech'] })
|
|
1005
|
+
|
|
1006
|
+
expect('data' in result).toBe(true)
|
|
1007
|
+
if ('data' in result) {
|
|
1008
|
+
expect(result._meta.provider_name).toBe('FullEnrich')
|
|
1009
|
+
expect(result._meta.selection_insight).toContain('returned the match here')
|
|
1010
|
+
expect(result._meta.selection_insight!.startsWith('Routed to')).toBe(false)
|
|
1011
|
+
}
|
|
1012
|
+
})
|
|
1013
|
+
|
|
1014
|
+
it('says "as requested" when the provider was pinned by the caller', async () => {
|
|
1015
|
+
stubProviders([
|
|
1016
|
+
makeProvider({ id: 'theirstack', hasResult: (d) => (d as Record<string, unknown>).ok === true }),
|
|
1017
|
+
])
|
|
1018
|
+
globalThis.fetch = vi.fn(async () =>
|
|
1019
|
+
new Response(JSON.stringify({ ok: true }), { status: 200 }),
|
|
1020
|
+
) as typeof fetch
|
|
1021
|
+
|
|
1022
|
+
const result = await executeWithFallback(
|
|
1023
|
+
'search_companies',
|
|
1024
|
+
{ technologies: ['Salesforce'] },
|
|
1025
|
+
{ providers: ['theirstack'] },
|
|
1026
|
+
)
|
|
1027
|
+
|
|
1028
|
+
expect('data' in result).toBe(true)
|
|
1029
|
+
if ('data' in result) {
|
|
1030
|
+
expect(result._meta.selection_insight).toBe('Used TheirStack as requested.')
|
|
1031
|
+
}
|
|
1032
|
+
})
|
|
1033
|
+
})
|
|
@@ -43,8 +43,8 @@ describe('find_emails handler (bulk)', () => {
|
|
|
43
43
|
expect(parsed.data.found).toBe(2)
|
|
44
44
|
expect(parsed.data.total).toBe(2)
|
|
45
45
|
expect(parsed.data.results).toEqual([
|
|
46
|
-
{ id: 'p1', email: 'alice@example.com', provider: 'prospeo' },
|
|
47
|
-
{ id: 'p2', email: 'bob@example.com', provider: 'prospeo' },
|
|
46
|
+
{ id: 'p1', email: 'alice@example.com', provider: 'prospeo', provider_name: 'Prospeo' },
|
|
47
|
+
{ id: 'p2', email: 'bob@example.com', provider: 'prospeo', provider_name: 'Prospeo' },
|
|
48
48
|
])
|
|
49
49
|
})
|
|
50
50
|
|
|
@@ -77,7 +77,7 @@ describe('find_emails handler (bulk)', () => {
|
|
|
77
77
|
|
|
78
78
|
expect(fmCalled).toBe(true)
|
|
79
79
|
const parsed = JSON.parse(result.content[0].text)
|
|
80
|
-
expect(parsed.data.results[0]).toEqual({ id: 'p1', email: 'alice@example.com', provider: 'findymail' })
|
|
80
|
+
expect(parsed.data.results[0]).toEqual({ id: 'p1', email: 'alice@example.com', provider: 'findymail', provider_name: 'Findymail' })
|
|
81
81
|
})
|
|
82
82
|
|
|
83
83
|
it('falls back to IcyPeas when FindyMail also misses', async () => {
|
|
@@ -103,7 +103,7 @@ describe('find_emails handler (bulk)', () => {
|
|
|
103
103
|
})
|
|
104
104
|
|
|
105
105
|
const parsed = JSON.parse(result.content[0].text)
|
|
106
|
-
expect(parsed.data.results[0]).toEqual({ id: 'p1', email: 'alice@example.com', provider: 'icypeas' })
|
|
106
|
+
expect(parsed.data.results[0]).toEqual({ id: 'p1', email: 'alice@example.com', provider: 'icypeas', provider_name: 'Icypeas' })
|
|
107
107
|
})
|
|
108
108
|
|
|
109
109
|
it('returns email:null when all providers miss for a person', async () => {
|
|
@@ -117,7 +117,7 @@ describe('find_emails handler (bulk)', () => {
|
|
|
117
117
|
|
|
118
118
|
const parsed = JSON.parse(result.content[0].text)
|
|
119
119
|
expect(parsed.data.found).toBe(0)
|
|
120
|
-
expect(parsed.data.results[0]).toEqual({ id: 'p1', email: null, provider: null })
|
|
120
|
+
expect(parsed.data.results[0]).toEqual({ id: 'p1', email: null, provider: null, provider_name: null })
|
|
121
121
|
})
|
|
122
122
|
|
|
123
123
|
it('handles mixed results: some from prospeo, some from fallback, some missed', async () => {
|
|
@@ -241,7 +241,7 @@ describe('find_emails handler (bulk)', () => {
|
|
|
241
241
|
const body = feCreateBody as { data: Array<Record<string, unknown>> }
|
|
242
242
|
expect(body.data[0].custom_id).toBe('p1')
|
|
243
243
|
const parsed = JSON.parse(result.content[0].text)
|
|
244
|
-
expect(parsed.data.results[0]).toEqual({ id: 'p1', email: 'alice@example.com', provider: 'fullenrich' })
|
|
244
|
+
expect(parsed.data.results[0]).toEqual({ id: 'p1', email: 'alice@example.com', provider: 'fullenrich', provider_name: 'FullEnrich' })
|
|
245
245
|
})
|
|
246
246
|
|
|
247
247
|
it('parallel branches: faster FindyMail wins, slower FullEnrich does not overwrite', async () => {
|
|
@@ -284,7 +284,7 @@ describe('find_emails handler (bulk)', () => {
|
|
|
284
284
|
|
|
285
285
|
const parsed = JSON.parse(result.content[0].text)
|
|
286
286
|
// FindyMail won the race — its email + provider must persist
|
|
287
|
-
expect(parsed.data.results[0]).toEqual({ id: 'p1', email: 'fm@example.com', provider: 'findymail' })
|
|
287
|
+
expect(parsed.data.results[0]).toEqual({ id: 'p1', email: 'fm@example.com', provider: 'findymail', provider_name: 'Findymail' })
|
|
288
288
|
})
|
|
289
289
|
|
|
290
290
|
it('gracefully handles prospeo bulk failure and still tries fallbacks', async () => {
|