@coldiq/mcp 0.1.18 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/client.d.ts +2 -0
- package/dist/client.d.ts.map +1 -1
- package/dist/client.js +7 -1
- package/dist/client.js.map +1 -1
- package/dist/executor.d.ts +11 -0
- package/dist/executor.d.ts.map +1 -1
- package/dist/executor.js +72 -11
- package/dist/executor.js.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/registry.d.ts +1 -0
- package/dist/registry.d.ts.map +1 -1
- package/dist/registry.js +57 -8
- package/dist/registry.js.map +1 -1
- package/dist/tools/find-emails.d.ts +2 -7
- package/dist/tools/find-emails.d.ts.map +1 -1
- package/dist/tools/find-emails.js +193 -67
- package/dist/tools/find-emails.js.map +1 -1
- package/dist/tools/find-people.d.ts +3 -2
- package/dist/tools/find-people.d.ts.map +1 -1
- package/dist/tools/find-people.js +65 -7
- package/dist/tools/find-people.js.map +1 -1
- package/dist/tools/get-credit-balance.d.ts +17 -0
- package/dist/tools/get-credit-balance.d.ts.map +1 -0
- package/dist/tools/get-credit-balance.js +20 -0
- package/dist/tools/get-credit-balance.js.map +1 -0
- package/dist/utils/compact-people.d.ts +24 -0
- package/dist/utils/compact-people.d.ts.map +1 -0
- package/dist/utils/compact-people.js +306 -0
- package/dist/utils/compact-people.js.map +1 -0
- package/dist/utils/provider-resolver.d.ts.map +1 -1
- package/dist/utils/provider-resolver.js +15 -1
- package/dist/utils/provider-resolver.js.map +1 -1
- package/package.json +1 -1
- package/src/client.ts +9 -1
- package/src/executor.ts +89 -17
- package/src/index.ts +8 -0
- package/src/registry.ts +67 -8
- package/src/tools/find-emails.ts +251 -80
- package/src/tools/find-people.ts +70 -7
- package/src/tools/get-credit-balance.ts +24 -0
- package/src/utils/compact-people.ts +318 -0
- package/src/utils/provider-resolver.ts +15 -1
- package/tests/executor.test.ts +165 -0
- package/tests/live/fullenrich-upstream-probe.ts +55 -0
- package/tests/live/pdl-upstream-probe.ts +83 -0
- package/tests/registry-find-people.test.ts +198 -7
- package/tests/registry-search-companies.test.ts +46 -7
- package/tests/tools/find-emails.test.ts +267 -1
- package/tests/tools/find-people.test.ts +269 -5
- package/tests/tools/get-credit-balance.test.ts +56 -0
- package/tests/utils/compact-people.test.ts +462 -0
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
// Normalize find_people responses into a small, predictable shape.
|
|
2
|
+
//
|
|
3
|
+
// The find_people waterfall fans across 10 providers, each with a different
|
|
4
|
+
// payload shape (data.people[], data.data[], data.companies_personas[].personas[][],
|
|
5
|
+
// data.data.profiles[], data.content[], etc.). Verbose passthrough produces 30KB+
|
|
6
|
+
// per record from FullEnrich alone (full employment history, every company office,
|
|
7
|
+
// company description, specialties, skills, languages). Compact mode keeps only
|
|
8
|
+
// the fields agents actually need to act on a lead.
|
|
9
|
+
|
|
10
|
+
export interface CompactPerson {
|
|
11
|
+
full_name?: string
|
|
12
|
+
first_name?: string
|
|
13
|
+
last_name?: string
|
|
14
|
+
title?: string
|
|
15
|
+
seniority?: string
|
|
16
|
+
linkedin_url?: string
|
|
17
|
+
email?: string
|
|
18
|
+
company_name?: string
|
|
19
|
+
company_domain?: string
|
|
20
|
+
company_linkedin_url?: string
|
|
21
|
+
company_headcount?: number | string
|
|
22
|
+
location?: string
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function getPath(obj: unknown, path: string): unknown {
|
|
26
|
+
if (obj == null) return undefined
|
|
27
|
+
let cur: unknown = obj
|
|
28
|
+
for (const part of path.split('.')) {
|
|
29
|
+
if (cur == null) return undefined
|
|
30
|
+
if (Array.isArray(cur)) {
|
|
31
|
+
const idx = Number(part)
|
|
32
|
+
if (!Number.isInteger(idx)) return undefined
|
|
33
|
+
cur = cur[idx]
|
|
34
|
+
} else if (typeof cur === 'object') {
|
|
35
|
+
cur = (cur as Record<string, unknown>)[part]
|
|
36
|
+
} else {
|
|
37
|
+
return undefined
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
return cur
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function pick(obj: unknown, paths: string[]): unknown {
|
|
44
|
+
for (const p of paths) {
|
|
45
|
+
const v = getPath(obj, p)
|
|
46
|
+
if (v !== undefined && v !== null && v !== '') return v
|
|
47
|
+
}
|
|
48
|
+
return undefined
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function asString(v: unknown): string | undefined {
|
|
52
|
+
if (typeof v === 'string') return v.length > 0 ? v : undefined
|
|
53
|
+
if (typeof v === 'number') return String(v)
|
|
54
|
+
return undefined
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function flattenDeep(arr: unknown[]): unknown[] {
|
|
58
|
+
const out: unknown[] = []
|
|
59
|
+
for (const item of arr) {
|
|
60
|
+
if (Array.isArray(item)) out.push(...flattenDeep(item))
|
|
61
|
+
else out.push(item)
|
|
62
|
+
}
|
|
63
|
+
return out
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Wrapper-aware extraction. Providers that wrap people in atypical shapes get
|
|
67
|
+
// explicit handling; the rest fall through to common-key probing.
|
|
68
|
+
export function extractPeopleArray(data: unknown, providerId: string): unknown[] {
|
|
69
|
+
if (data == null) return []
|
|
70
|
+
if (Array.isArray(data)) return data
|
|
71
|
+
if (typeof data !== 'object') return []
|
|
72
|
+
const d = data as Record<string, unknown>
|
|
73
|
+
|
|
74
|
+
// LeadsFactory: companies_personas[i].personas is unknown[][] — nested arrays
|
|
75
|
+
// of personas per company. Flatten to a single contacts array.
|
|
76
|
+
if (providerId === 'leadsfactory' && Array.isArray(d.companies_personas)) {
|
|
77
|
+
const out: unknown[] = []
|
|
78
|
+
for (const group of d.companies_personas as Array<Record<string, unknown>>) {
|
|
79
|
+
const personas = group.personas
|
|
80
|
+
if (Array.isArray(personas)) out.push(...flattenDeep(personas))
|
|
81
|
+
}
|
|
82
|
+
return out
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// LinkUp wraps results under data.profiles.
|
|
86
|
+
if (providerId === 'linkupapi-search-profiles') {
|
|
87
|
+
const inner = d.data
|
|
88
|
+
if (inner && typeof inner === 'object') {
|
|
89
|
+
const profiles = (inner as Record<string, unknown>).profiles
|
|
90
|
+
if (Array.isArray(profiles)) return profiles
|
|
91
|
+
}
|
|
92
|
+
return []
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
for (const key of ['people', 'data', 'results', 'contacts', 'content', 'profiles', 'items']) {
|
|
96
|
+
const v = d[key]
|
|
97
|
+
if (Array.isArray(v)) return v
|
|
98
|
+
}
|
|
99
|
+
return []
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Providers nest person fields at different depths:
|
|
103
|
+
// - LeadsFactory wraps in `{ contact: {...person}, company: {...}, persona_index, ... }`
|
|
104
|
+
// - AI-Ark wraps in `{ id, profile: {...person} }`
|
|
105
|
+
// - Apollo / PDL / FullEnrich keep person fields at the top level
|
|
106
|
+
// Merging the nested objects into the outer scope lets the candidate-path resolver
|
|
107
|
+
// find person fields regardless of provider. The sibling `company` on LeadsFactory
|
|
108
|
+
// stays at the merged top level so `company.name`, `company.domain` paths resolve.
|
|
109
|
+
function unwrapRecord(record: unknown): Record<string, unknown> | null {
|
|
110
|
+
if (!record || typeof record !== 'object' || Array.isArray(record)) return null
|
|
111
|
+
const r = record as Record<string, unknown>
|
|
112
|
+
const merged: Record<string, unknown> = { ...r }
|
|
113
|
+
if (r.profile && typeof r.profile === 'object' && !Array.isArray(r.profile)) {
|
|
114
|
+
Object.assign(merged, r.profile as Record<string, unknown>)
|
|
115
|
+
}
|
|
116
|
+
if (r.contact && typeof r.contact === 'object' && !Array.isArray(r.contact)) {
|
|
117
|
+
Object.assign(merged, r.contact as Record<string, unknown>)
|
|
118
|
+
}
|
|
119
|
+
return merged
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Confirm a candidate value really points at LinkedIn before surfacing it as
|
|
123
|
+
// linkedin_url. Some providers (Sumble) keep their own profile URL under a bare
|
|
124
|
+
// `url` key — without this guard, agents pasting linkedin_url into LinkedIn
|
|
125
|
+
// lookups would silently fail. Accept linkedin domains and short canonical forms
|
|
126
|
+
// PDL stores (`linkedin.com/in/x`, no protocol).
|
|
127
|
+
function isLinkedInUrl(v: unknown): v is string {
|
|
128
|
+
if (typeof v !== 'string' || v.length === 0) return false
|
|
129
|
+
const lower = v.toLowerCase()
|
|
130
|
+
return lower.includes('linkedin.com/')
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function pickLinkedIn(obj: unknown, paths: string[]): string | undefined {
|
|
134
|
+
for (const p of paths) {
|
|
135
|
+
const v = getPath(obj, p)
|
|
136
|
+
if (isLinkedInUrl(v)) return v
|
|
137
|
+
}
|
|
138
|
+
return undefined
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
export function normalizePerson(record: unknown): CompactPerson | null {
|
|
142
|
+
const r = unwrapRecord(record)
|
|
143
|
+
if (!r) return null
|
|
144
|
+
|
|
145
|
+
const out: CompactPerson = {}
|
|
146
|
+
|
|
147
|
+
const fullName = asString(pick(r, ['full_name', 'fullName', 'name', 'displayName']))
|
|
148
|
+
const firstName = asString(pick(r, ['first_name', 'firstName', 'given_name']))
|
|
149
|
+
const lastName = asString(pick(r, ['last_name', 'lastName', 'family_name', 'surname']))
|
|
150
|
+
if (fullName) out.full_name = fullName
|
|
151
|
+
if (firstName) out.first_name = firstName
|
|
152
|
+
if (lastName) out.last_name = lastName
|
|
153
|
+
if (!out.full_name && (firstName || lastName)) {
|
|
154
|
+
out.full_name = [firstName, lastName].filter(Boolean).join(' ')
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const title = asString(pick(r, [
|
|
158
|
+
'employment.current.title',
|
|
159
|
+
'title',
|
|
160
|
+
'job_title',
|
|
161
|
+
'jobTitle',
|
|
162
|
+
'position',
|
|
163
|
+
'current_position.title',
|
|
164
|
+
'current_position',
|
|
165
|
+
'headline',
|
|
166
|
+
]))
|
|
167
|
+
if (title) out.title = title
|
|
168
|
+
|
|
169
|
+
const seniorityRaw = pick(r, [
|
|
170
|
+
'employment.current.seniority',
|
|
171
|
+
'seniority',
|
|
172
|
+
'job_title_levels.0',
|
|
173
|
+
'job_level', // Sumble
|
|
174
|
+
'level',
|
|
175
|
+
])
|
|
176
|
+
const seniority = asString(seniorityRaw)
|
|
177
|
+
if (seniority) out.seniority = seniority
|
|
178
|
+
|
|
179
|
+
// LinkedIn URL is the one field worth strict-validating: a wrong value here
|
|
180
|
+
// silently breaks downstream LinkedIn-driven flows. `url`/`profile_url`/`linkedin`
|
|
181
|
+
// are too ambiguous to trust without the linkedin.com substring check.
|
|
182
|
+
const linkedin = pickLinkedIn(r, [
|
|
183
|
+
'linkedin_url',
|
|
184
|
+
'linkedinUrl',
|
|
185
|
+
'social_profiles.professional_network.url',
|
|
186
|
+
'social_profiles.linkedin.url',
|
|
187
|
+
'profile_url',
|
|
188
|
+
'linkedin',
|
|
189
|
+
'url',
|
|
190
|
+
])
|
|
191
|
+
if (linkedin) out.linkedin_url = linkedin
|
|
192
|
+
|
|
193
|
+
const email = asString(pick(r, [
|
|
194
|
+
'email',
|
|
195
|
+
'work_email',
|
|
196
|
+
'personal_email',
|
|
197
|
+
'best_email',
|
|
198
|
+
'emails.0',
|
|
199
|
+
'contact.email',
|
|
200
|
+
'contact.emails.0',
|
|
201
|
+
]))
|
|
202
|
+
if (email) out.email = email
|
|
203
|
+
|
|
204
|
+
const country = asString(pick(r, [
|
|
205
|
+
'location.country',
|
|
206
|
+
'country',
|
|
207
|
+
'location_country',
|
|
208
|
+
'job_company_location_country',
|
|
209
|
+
]))
|
|
210
|
+
const city = asString(pick(r, [
|
|
211
|
+
'location.city',
|
|
212
|
+
'city',
|
|
213
|
+
'location_locality',
|
|
214
|
+
'job_company_location_locality',
|
|
215
|
+
]))
|
|
216
|
+
const locationCombined = [city, country].filter(Boolean).join(', ')
|
|
217
|
+
if (locationCombined) out.location = locationCombined
|
|
218
|
+
|
|
219
|
+
const companyName = asString(pick(r, [
|
|
220
|
+
'employment.current.company.name',
|
|
221
|
+
'organization.name',
|
|
222
|
+
'company.name',
|
|
223
|
+
'job_company_name',
|
|
224
|
+
'current_company.name',
|
|
225
|
+
'companyName',
|
|
226
|
+
]))
|
|
227
|
+
if (companyName) out.company_name = companyName
|
|
228
|
+
|
|
229
|
+
const companyDomain = asString(pick(r, [
|
|
230
|
+
'employment.current.company.domain',
|
|
231
|
+
'organization.primary_domain',
|
|
232
|
+
'organization.website_url',
|
|
233
|
+
'company.domain',
|
|
234
|
+
'company.website',
|
|
235
|
+
'job_company_website',
|
|
236
|
+
]))
|
|
237
|
+
if (companyDomain) out.company_domain = companyDomain
|
|
238
|
+
|
|
239
|
+
const companyLinkedIn = asString(pick(r, [
|
|
240
|
+
'employment.current.company.social_profiles.professional_network.url',
|
|
241
|
+
'employment.current.company.social_profiles.linkedin.url',
|
|
242
|
+
'employment.current.company.linkedin_url',
|
|
243
|
+
'organization.linkedin_url',
|
|
244
|
+
'company.linkedin_url',
|
|
245
|
+
'job_company_linkedin_url',
|
|
246
|
+
]))
|
|
247
|
+
if (companyLinkedIn) out.company_linkedin_url = companyLinkedIn
|
|
248
|
+
|
|
249
|
+
const headcountRaw = pick(r, [
|
|
250
|
+
'employment.current.company.headcount',
|
|
251
|
+
'employment.current.company.headcount_range',
|
|
252
|
+
'organization.estimated_num_employees',
|
|
253
|
+
'organization.num_employees',
|
|
254
|
+
'company.headcount',
|
|
255
|
+
'company.size',
|
|
256
|
+
'job_company_size',
|
|
257
|
+
'job_company_employee_count',
|
|
258
|
+
])
|
|
259
|
+
if (typeof headcountRaw === 'number' || (typeof headcountRaw === 'string' && headcountRaw.length > 0)) {
|
|
260
|
+
out.company_headcount = headcountRaw
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// Drop records that resolved to nothing meaningful.
|
|
264
|
+
if (Object.keys(out).length === 0) return null
|
|
265
|
+
return out
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
export interface CompactPayload {
|
|
269
|
+
people: CompactPerson[]
|
|
270
|
+
total?: number
|
|
271
|
+
gap_fill_provider?: string
|
|
272
|
+
revealed?: true
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
export function compactPayload(data: unknown, providerId: string): CompactPayload {
|
|
276
|
+
const mainArr = extractPeopleArray(data, providerId)
|
|
277
|
+
const main: CompactPerson[] = []
|
|
278
|
+
for (const r of mainArr) {
|
|
279
|
+
const p = normalizePerson(r)
|
|
280
|
+
if (p) main.push(p)
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
let gapFillProvider: string | undefined
|
|
284
|
+
let gapFillPeople: CompactPerson[] = []
|
|
285
|
+
if (data && typeof data === 'object' && !Array.isArray(data)) {
|
|
286
|
+
const gf = (data as Record<string, unknown>).gap_fill
|
|
287
|
+
if (gf && typeof gf === 'object') {
|
|
288
|
+
gapFillProvider = asString((gf as Record<string, unknown>).provider)
|
|
289
|
+
const gfArr = extractPeopleArray(gf, gapFillProvider ?? '')
|
|
290
|
+
for (const r of gfArr) {
|
|
291
|
+
const p = normalizePerson(r)
|
|
292
|
+
if (p) gapFillPeople.push(p)
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// Pull pagination total when present (FullEnrich exposes data.metadata.total).
|
|
298
|
+
let total: number | undefined
|
|
299
|
+
if (data && typeof data === 'object' && !Array.isArray(data)) {
|
|
300
|
+
const meta = (data as Record<string, unknown>).metadata
|
|
301
|
+
if (meta && typeof meta === 'object') {
|
|
302
|
+
const t = (meta as Record<string, unknown>).total
|
|
303
|
+
if (typeof t === 'number') total = t
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
const out: CompactPayload = { people: [...main, ...gapFillPeople] }
|
|
308
|
+
if (total !== undefined) out.total = total
|
|
309
|
+
if (gapFillProvider && gapFillPeople.length > 0) out.gap_fill_provider = gapFillProvider
|
|
310
|
+
// Preserve the revealed flag the Apollo reveal flow sets — it tells the caller
|
|
311
|
+
// whether the emails/full names in `people` came from /apollo/people/bulk-match
|
|
312
|
+
// (paid +1 credit per person) vs the obfuscated /apollo/people/search response.
|
|
313
|
+
if (data && typeof data === 'object' && !Array.isArray(data) &&
|
|
314
|
+
(data as Record<string, unknown>).revealed === true) {
|
|
315
|
+
out.revealed = true
|
|
316
|
+
}
|
|
317
|
+
return out
|
|
318
|
+
}
|
|
@@ -4,7 +4,18 @@ import { fuzzyMatch } from './fuzzy.js'
|
|
|
4
4
|
|
|
5
5
|
// find_emails uses a custom waterfall — its providers are not in the registry.
|
|
6
6
|
// Exported so find-emails.ts stays in sync without a second hardcoded list.
|
|
7
|
-
|
|
7
|
+
// Order = auto-route execution order: bulk providers first (Steps 1-3), then
|
|
8
|
+
// the single-find_email fallback providers used for stragglers (Step 4).
|
|
9
|
+
export const FIND_EMAILS_PROVIDERS = [
|
|
10
|
+
'prospeo',
|
|
11
|
+
'fullenrich',
|
|
12
|
+
'findymail',
|
|
13
|
+
'icypeas',
|
|
14
|
+
'limadata-work-email',
|
|
15
|
+
'blitzapi',
|
|
16
|
+
'limadata-work-email-linkedin',
|
|
17
|
+
'linkupapi',
|
|
18
|
+
]
|
|
8
19
|
|
|
9
20
|
export function getProvidersForCapability(capability: Capability | 'find_emails'): string[] {
|
|
10
21
|
if (capability === 'find_emails') return FIND_EMAILS_PROVIDERS
|
|
@@ -198,6 +209,9 @@ const GATED_DESCRIPTIONS: Partial<Record<Capability, Partial<Record<string, Gate
|
|
|
198
209
|
'prospeo-search-person': { kind: 'requires', fields: 'job_titles or company_domains' },
|
|
199
210
|
'ai-ark-people': { kind: 'requires', fields: 'job_titles, seniorities, or keywords' },
|
|
200
211
|
'findymail-search-employees': { kind: 'requires', fields: 'company_domains and job_titles' },
|
|
212
|
+
'fullenrich-people-search': { kind: 'requires', fields: 'company_domains (upstream does not accept LinkedIn URL filters)' },
|
|
213
|
+
pdl: { kind: 'requires', fields: 'company_domains, company_linkedin_urls, job_titles, or seniorities' },
|
|
214
|
+
companyenrich: { kind: 'incompatible_with', fields: 'company_linkedin_urls without company_domains (upstream supports domain filters only)' },
|
|
201
215
|
},
|
|
202
216
|
// -------------------------------------------------------------------------
|
|
203
217
|
find_email: {
|
package/tests/executor.test.ts
CHANGED
|
@@ -734,6 +734,56 @@ describe('executeWithFallback with options.providers', () => {
|
|
|
734
734
|
expect(result._meta.matchedFrom).toEqual({ prospec: 'prospeo' })
|
|
735
735
|
}
|
|
736
736
|
})
|
|
737
|
+
|
|
738
|
+
it('surfaces credits_charged + credits_remaining in success _meta when API emits credit headers', async () => {
|
|
739
|
+
stubProviders([
|
|
740
|
+
makeProvider({ id: 'prospeo', priority: 1, hasResult: () => true }),
|
|
741
|
+
])
|
|
742
|
+
|
|
743
|
+
globalThis.fetch = vi.fn(async () =>
|
|
744
|
+
new Response(JSON.stringify({ ok: true }), {
|
|
745
|
+
status: 200,
|
|
746
|
+
headers: {
|
|
747
|
+
'X-ColdIQ-Credits-Charged': '3',
|
|
748
|
+
'X-ColdIQ-Credits-Remaining': '197',
|
|
749
|
+
},
|
|
750
|
+
})
|
|
751
|
+
) as typeof fetch
|
|
752
|
+
|
|
753
|
+
const result = await executeWithFallback(
|
|
754
|
+
'enrich_company',
|
|
755
|
+
{ domain: 'coldiq.com' },
|
|
756
|
+
{ providers: ['prospeo'] },
|
|
757
|
+
)
|
|
758
|
+
|
|
759
|
+
expect('data' in result).toBe(true)
|
|
760
|
+
if ('data' in result) {
|
|
761
|
+
expect(result._meta.credits_charged).toBe(3)
|
|
762
|
+
expect(result._meta.credits_remaining).toBe(197)
|
|
763
|
+
}
|
|
764
|
+
})
|
|
765
|
+
|
|
766
|
+
it('omits credit fields from _meta when API does not emit credit headers', async () => {
|
|
767
|
+
stubProviders([
|
|
768
|
+
makeProvider({ id: 'prospeo', priority: 1, hasResult: () => true }),
|
|
769
|
+
])
|
|
770
|
+
|
|
771
|
+
globalThis.fetch = vi.fn(async () =>
|
|
772
|
+
new Response(JSON.stringify({ ok: true }), { status: 200 })
|
|
773
|
+
) as typeof fetch
|
|
774
|
+
|
|
775
|
+
const result = await executeWithFallback(
|
|
776
|
+
'enrich_company',
|
|
777
|
+
{ domain: 'coldiq.com' },
|
|
778
|
+
{ providers: ['prospeo'] },
|
|
779
|
+
)
|
|
780
|
+
|
|
781
|
+
expect('data' in result).toBe(true)
|
|
782
|
+
if ('data' in result) {
|
|
783
|
+
expect(result._meta.credits_charged).toBeUndefined()
|
|
784
|
+
expect(result._meta.credits_remaining).toBeUndefined()
|
|
785
|
+
}
|
|
786
|
+
})
|
|
737
787
|
})
|
|
738
788
|
|
|
739
789
|
// Note: the LeadsFactory backoff *schedule* itself is asserted against the live
|
|
@@ -789,3 +839,118 @@ describe('per-provider sync timeout cap', () => {
|
|
|
789
839
|
expect(elapsed).toBeLessThan(2000)
|
|
790
840
|
})
|
|
791
841
|
})
|
|
842
|
+
|
|
843
|
+
// ---------------------------------------------------------------------------
|
|
844
|
+
// upstream_error — structured upstream body survives the short-string flatten
|
|
845
|
+
// ---------------------------------------------------------------------------
|
|
846
|
+
|
|
847
|
+
describe('executor upstream_error propagation', () => {
|
|
848
|
+
const originalFetch = globalThis.fetch
|
|
849
|
+
|
|
850
|
+
beforeEach(() => {
|
|
851
|
+
initClient('http://test-api.local', 'test-key-123')
|
|
852
|
+
})
|
|
853
|
+
|
|
854
|
+
afterEach(() => {
|
|
855
|
+
globalThis.fetch = originalFetch
|
|
856
|
+
vi.restoreAllMocks()
|
|
857
|
+
})
|
|
858
|
+
|
|
859
|
+
it('preserves API `details` passthrough verbatim under providers_tried[i].upstream_error', async () => {
|
|
860
|
+
stubProviders([makeProvider({ id: 'prospeo', hasResult: () => false })])
|
|
861
|
+
|
|
862
|
+
globalThis.fetch = vi.fn(async () =>
|
|
863
|
+
new Response(
|
|
864
|
+
JSON.stringify({
|
|
865
|
+
error: "INVALID_REQUEST: Invalid value '[CMO]' for filter 'job_titles'",
|
|
866
|
+
details: {
|
|
867
|
+
error_code: 'INVALID_REQUEST',
|
|
868
|
+
filter_error: "Invalid value '[CMO]' for filter 'job_titles'",
|
|
869
|
+
},
|
|
870
|
+
}),
|
|
871
|
+
{ status: 400 },
|
|
872
|
+
),
|
|
873
|
+
) as typeof fetch
|
|
874
|
+
|
|
875
|
+
const result = await executeWithFallback('find_people', { company_domains: ['coldiq.com'] })
|
|
876
|
+
|
|
877
|
+
expect('error' in result).toBe(true)
|
|
878
|
+
if ('error' in result) {
|
|
879
|
+
expect(result.providers_tried).toHaveLength(1)
|
|
880
|
+
const tried = result.providers_tried[0]
|
|
881
|
+
expect(tried.status).toBe(400)
|
|
882
|
+
expect(tried.error).toContain('INVALID_REQUEST')
|
|
883
|
+
expect(tried.upstream_error).toEqual({
|
|
884
|
+
error_code: 'INVALID_REQUEST',
|
|
885
|
+
filter_error: "Invalid value '[CMO]' for filter 'job_titles'",
|
|
886
|
+
})
|
|
887
|
+
}
|
|
888
|
+
})
|
|
889
|
+
|
|
890
|
+
it('falls back to the full body when API has no `details` field', async () => {
|
|
891
|
+
stubProviders([makeProvider({ id: 'unmigrated', hasResult: () => false })])
|
|
892
|
+
|
|
893
|
+
globalThis.fetch = vi.fn(async () =>
|
|
894
|
+
new Response(
|
|
895
|
+
JSON.stringify({ error: true, error_code: 'X', filter_error: 'whatever' }),
|
|
896
|
+
{ status: 400 },
|
|
897
|
+
),
|
|
898
|
+
) as typeof fetch
|
|
899
|
+
|
|
900
|
+
const result = await executeWithFallback('find_people', { company_domains: ['coldiq.com'] })
|
|
901
|
+
|
|
902
|
+
expect('error' in result).toBe(true)
|
|
903
|
+
if ('error' in result) {
|
|
904
|
+
const tried = result.providers_tried[0]
|
|
905
|
+
// The short `error` string is the JSON-stringified boolean (the old behavior),
|
|
906
|
+
// but the structured body now rides along under `upstream_error` so the
|
|
907
|
+
// caller can still recover the detail.
|
|
908
|
+
expect(tried.error).toBe('true')
|
|
909
|
+
expect(tried.upstream_error).toMatchObject({
|
|
910
|
+
error_code: 'X',
|
|
911
|
+
filter_error: 'whatever',
|
|
912
|
+
})
|
|
913
|
+
}
|
|
914
|
+
})
|
|
915
|
+
|
|
916
|
+
it('defensively caps oversized upstream_error payloads to a 2KB string', async () => {
|
|
917
|
+
stubProviders([makeProvider({ id: 'noisy', hasResult: () => false })])
|
|
918
|
+
|
|
919
|
+
const huge = 'x'.repeat(5000)
|
|
920
|
+
globalThis.fetch = vi.fn(async () =>
|
|
921
|
+
new Response(
|
|
922
|
+
JSON.stringify({ error: 'bad', details: { blob: huge } }),
|
|
923
|
+
{ status: 400 },
|
|
924
|
+
),
|
|
925
|
+
) as typeof fetch
|
|
926
|
+
|
|
927
|
+
const result = await executeWithFallback('find_people', { company_domains: ['coldiq.com'] })
|
|
928
|
+
|
|
929
|
+
expect('error' in result).toBe(true)
|
|
930
|
+
if ('error' in result) {
|
|
931
|
+
const upstream = result.providers_tried[0].upstream_error
|
|
932
|
+
expect(typeof upstream).toBe('string')
|
|
933
|
+
expect((upstream as string).length).toBeLessThanOrEqual(2048)
|
|
934
|
+
}
|
|
935
|
+
})
|
|
936
|
+
|
|
937
|
+
it('surfaces the synthetic non-JSON envelope as upstream_error', async () => {
|
|
938
|
+
stubProviders([makeProvider({ id: 'empty', hasResult: () => false })])
|
|
939
|
+
|
|
940
|
+
// No JSON body — client.ts synthesizes { error: 'Non-JSON response from API' },
|
|
941
|
+
// which has no `details` field so the executor falls back to the full envelope.
|
|
942
|
+
// Surfacing it (rather than dropping to undefined) keeps "upstream returned
|
|
943
|
+
// garbage" debuggable end-to-end.
|
|
944
|
+
globalThis.fetch = vi.fn(async () =>
|
|
945
|
+
new Response('not json', { status: 500 }),
|
|
946
|
+
) as typeof fetch
|
|
947
|
+
|
|
948
|
+
const result = await executeWithFallback('find_people', { company_domains: ['coldiq.com'] })
|
|
949
|
+
|
|
950
|
+
expect('error' in result).toBe(true)
|
|
951
|
+
if ('error' in result) {
|
|
952
|
+
const tried = result.providers_tried[0]
|
|
953
|
+
expect(tried.upstream_error).toEqual({ error: 'Non-JSON response from API' })
|
|
954
|
+
}
|
|
955
|
+
})
|
|
956
|
+
})
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
// Direct upstream probe — documents what FullEnrich /people/search actually
|
|
2
|
+
// honors as a company filter. The ColdIQ wrapper schema at
|
|
3
|
+
// src/providers/fullenrich/schema.ts declares `current_company_linkedin_urls`,
|
|
4
|
+
// but the live API rejects every shape with `error.filters.empty` (400). Only
|
|
5
|
+
// `current_company_domains` and `current_company_names` actually scope results.
|
|
6
|
+
//
|
|
7
|
+
// This is why the registry mapper for `fullenrich-people-search` is gated
|
|
8
|
+
// behind `company_domains` only — adding the LinkedIn URL field caused the
|
|
9
|
+
// upstream to treat the request as filterless and return a 50k-result global
|
|
10
|
+
// titles-only search, masquerading as company-scoped.
|
|
11
|
+
//
|
|
12
|
+
// Run: FULLENRICH_API_KEY=… npx tsx mcp/tests/live/fullenrich-upstream-probe.ts
|
|
13
|
+
|
|
14
|
+
const KEY = process.env.FULLENRICH_API_KEY
|
|
15
|
+
if (!KEY) { console.error('FULLENRICH_API_KEY required'); process.exit(1) }
|
|
16
|
+
const URL = 'https://app.fullenrich.com/api/v2/people/search'
|
|
17
|
+
|
|
18
|
+
async function call(label: string, body: unknown) {
|
|
19
|
+
const t0 = Date.now()
|
|
20
|
+
const res = await fetch(URL, {
|
|
21
|
+
method: 'POST',
|
|
22
|
+
headers: { Authorization: `Bearer ${KEY}`, 'Content-Type': 'application/json' },
|
|
23
|
+
body: JSON.stringify(body),
|
|
24
|
+
})
|
|
25
|
+
const data: any = await res.json().catch(async () => ({ raw: await res.text() }))
|
|
26
|
+
const total = data.metadata?.total
|
|
27
|
+
const ok = res.status === 200
|
|
28
|
+
console.log(`[${ok ? 'OK ' : 'ERR'}] ${label.padEnd(60)} status=${res.status} ms=${Date.now() - t0} total=${total ?? '-'} ${ok ? '' : 'err=' + JSON.stringify(data).slice(0, 120)}`)
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
async function main() {
|
|
32
|
+
console.log('=== FullEnrich /people/search filter compatibility probe ===\n')
|
|
33
|
+
|
|
34
|
+
console.log('▸ Known-working filters (should all return 200 with total > 0)')
|
|
35
|
+
await call('current_company_domains', { current_company_domains: [{ value: 'spendesk.com' }], limit: 1 })
|
|
36
|
+
await call('current_company_names', { current_company_names: [{ value: 'Spendesk' }], limit: 1 })
|
|
37
|
+
|
|
38
|
+
console.log('\n▸ LinkedIn URL variants (declared in ColdIQ schema, NOT honored upstream)')
|
|
39
|
+
for (const [field, value] of [
|
|
40
|
+
['current_company_linkedin_urls', 'https://www.linkedin.com/company/spendesk'],
|
|
41
|
+
['current_company_linkedin_url', 'https://www.linkedin.com/company/spendesk'],
|
|
42
|
+
['current_company_linkedin_urls (short)', 'linkedin.com/company/spendesk'],
|
|
43
|
+
['current_company_linkedin_urls (slug)', 'spendesk'],
|
|
44
|
+
['current_company_linkedin_ids', 'spendesk'],
|
|
45
|
+
] as const) {
|
|
46
|
+
const key = field.split(' ')[0]
|
|
47
|
+
await call(field, { [key]: [{ value }], limit: 1 })
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
console.log('\n▸ No-filter baseline (the buggy path: 50k+ titles-only results)')
|
|
51
|
+
await call('titles-only, no company', { current_position_titles: [{ value: 'CEO' }], limit: 1 })
|
|
52
|
+
await call('completely empty (sanity)', { limit: 1 })
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
main().catch((e) => { console.error('FATAL', e); process.exit(1) })
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
// Direct upstream probe — verifies PDL's /person/search accepts
|
|
2
|
+
// `job_company_linkedin_url` as a filterable term, and that the returned people
|
|
3
|
+
// actually work at the company we asked for.
|
|
4
|
+
//
|
|
5
|
+
// PDL's Elasticsearch DSL is permissive: an unknown field name returns 0 hits
|
|
6
|
+
// rather than an error, so a silent miss looks identical to "no data." The
|
|
7
|
+
// probe compares a known-good baseline (job_company_website) against the
|
|
8
|
+
// LinkedIn-URL variant.
|
|
9
|
+
//
|
|
10
|
+
// Run: PDL_API_KEY=… npx tsx mcp/tests/live/pdl-upstream-probe.ts
|
|
11
|
+
|
|
12
|
+
const KEY = process.env.PDL_API_KEY
|
|
13
|
+
if (!KEY) { console.error('PDL_API_KEY required'); process.exit(1) }
|
|
14
|
+
const BASE = 'https://api.peopledatalabs.com/v5'
|
|
15
|
+
|
|
16
|
+
async function search(must: unknown[], size = 3) {
|
|
17
|
+
const t0 = Date.now()
|
|
18
|
+
const res = await fetch(`${BASE}/person/search`, {
|
|
19
|
+
method: 'POST',
|
|
20
|
+
headers: { 'X-API-Key': KEY!, 'Content-Type': 'application/json' },
|
|
21
|
+
body: JSON.stringify({ query: { bool: { must } }, size }),
|
|
22
|
+
signal: AbortSignal.timeout(30_000),
|
|
23
|
+
})
|
|
24
|
+
let data: any
|
|
25
|
+
try { data = await res.json() } catch { data = await res.text() }
|
|
26
|
+
return { ok: res.ok, status: res.status, ms: Date.now() - t0, data }
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function summarize(label: string, r: any) {
|
|
30
|
+
const data = r.data ?? {}
|
|
31
|
+
const total = data.total ?? '?'
|
|
32
|
+
const items = (data.data ?? []) as Array<any>
|
|
33
|
+
console.log(`\n[${label}] status=${r.status} ms=${r.ms} total=${total} returned=${items.length}`)
|
|
34
|
+
for (const p of items.slice(0, 5)) {
|
|
35
|
+
console.log(` - ${p.full_name} | ${p.job_title} | co=${p.job_company_name} | website=${p.job_company_website} | li=${p.job_company_linkedin_url}`)
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async function main() {
|
|
40
|
+
console.log('=== PDL upstream probe: verify job_company_linkedin_url ===')
|
|
41
|
+
|
|
42
|
+
// Use a well-known mid-sized B2B SaaS for the comparison. Aircall is a good fit:
|
|
43
|
+
// ~700 employees, indexed by both website and LinkedIn URL in PDL.
|
|
44
|
+
const TARGET_DOMAIN = 'aircall.io'
|
|
45
|
+
const TARGET_LINKEDIN = 'linkedin.com/company/aircall'
|
|
46
|
+
|
|
47
|
+
// Baseline: search by known-working field (job_company_website)
|
|
48
|
+
console.log(`\n▸ Baseline: search by job_company_website = ${TARGET_DOMAIN}`)
|
|
49
|
+
const byDomain = await search([{ terms: { job_company_website: [TARGET_DOMAIN] } }])
|
|
50
|
+
summarize('job_company_website', byDomain)
|
|
51
|
+
|
|
52
|
+
// Test: same search but by LinkedIn URL
|
|
53
|
+
console.log(`\n▸ Test: search by job_company_linkedin_url = ${TARGET_LINKEDIN}`)
|
|
54
|
+
const byLinkedIn = await search([{ terms: { job_company_linkedin_url: [TARGET_LINKEDIN] } }])
|
|
55
|
+
summarize('job_company_linkedin_url', byLinkedIn)
|
|
56
|
+
|
|
57
|
+
const baselineTotal = byDomain.data?.total ?? 0
|
|
58
|
+
const linkedinTotal = byLinkedIn.data?.total ?? 0
|
|
59
|
+
|
|
60
|
+
console.log(`\n=== Result ===`)
|
|
61
|
+
console.log(`baseline (by website): total=${baselineTotal}`)
|
|
62
|
+
console.log(`test (by linkedin_url): total=${linkedinTotal}`)
|
|
63
|
+
|
|
64
|
+
if (linkedinTotal > 0 && baselineTotal > 0) {
|
|
65
|
+
const ratio = linkedinTotal / baselineTotal
|
|
66
|
+
console.log(`ratio = ${ratio.toFixed(2)} (1.0 = same company, both filters work)`)
|
|
67
|
+
if (ratio >= 0.5) {
|
|
68
|
+
console.log('✅ job_company_linkedin_url WORKS — keep the mapping in registry.ts')
|
|
69
|
+
process.exit(0)
|
|
70
|
+
} else {
|
|
71
|
+
console.log('⚠️ linkedin_url returns far fewer matches than website — investigate')
|
|
72
|
+
process.exit(2)
|
|
73
|
+
}
|
|
74
|
+
} else if (linkedinTotal === 0 && baselineTotal > 0) {
|
|
75
|
+
console.log('❌ job_company_linkedin_url silently returns 0 — REMOVE the mapping clause for pdl in registry.ts and keep gate-only')
|
|
76
|
+
process.exit(1)
|
|
77
|
+
} else {
|
|
78
|
+
console.log('⚠️ Baseline itself returned 0 — pick a different test company')
|
|
79
|
+
process.exit(2)
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
main().catch((e) => { console.error('FATAL', e); process.exit(1) })
|