@coldiq/mcp 0.1.18 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/dist/client.d.ts +2 -0
  2. package/dist/client.d.ts.map +1 -1
  3. package/dist/client.js +7 -1
  4. package/dist/client.js.map +1 -1
  5. package/dist/executor.d.ts +11 -0
  6. package/dist/executor.d.ts.map +1 -1
  7. package/dist/executor.js +72 -11
  8. package/dist/executor.js.map +1 -1
  9. package/dist/index.js +2 -0
  10. package/dist/index.js.map +1 -1
  11. package/dist/registry.d.ts +1 -0
  12. package/dist/registry.d.ts.map +1 -1
  13. package/dist/registry.js +57 -8
  14. package/dist/registry.js.map +1 -1
  15. package/dist/tools/find-emails.d.ts +2 -7
  16. package/dist/tools/find-emails.d.ts.map +1 -1
  17. package/dist/tools/find-emails.js +193 -67
  18. package/dist/tools/find-emails.js.map +1 -1
  19. package/dist/tools/find-people.d.ts +3 -2
  20. package/dist/tools/find-people.d.ts.map +1 -1
  21. package/dist/tools/find-people.js +65 -7
  22. package/dist/tools/find-people.js.map +1 -1
  23. package/dist/tools/get-credit-balance.d.ts +17 -0
  24. package/dist/tools/get-credit-balance.d.ts.map +1 -0
  25. package/dist/tools/get-credit-balance.js +20 -0
  26. package/dist/tools/get-credit-balance.js.map +1 -0
  27. package/dist/utils/compact-people.d.ts +24 -0
  28. package/dist/utils/compact-people.d.ts.map +1 -0
  29. package/dist/utils/compact-people.js +306 -0
  30. package/dist/utils/compact-people.js.map +1 -0
  31. package/dist/utils/provider-resolver.d.ts.map +1 -1
  32. package/dist/utils/provider-resolver.js +15 -1
  33. package/dist/utils/provider-resolver.js.map +1 -1
  34. package/package.json +1 -1
  35. package/src/client.ts +9 -1
  36. package/src/executor.ts +89 -17
  37. package/src/index.ts +8 -0
  38. package/src/registry.ts +67 -8
  39. package/src/tools/find-emails.ts +251 -80
  40. package/src/tools/find-people.ts +70 -7
  41. package/src/tools/get-credit-balance.ts +24 -0
  42. package/src/utils/compact-people.ts +318 -0
  43. package/src/utils/provider-resolver.ts +15 -1
  44. package/tests/executor.test.ts +165 -0
  45. package/tests/live/fullenrich-upstream-probe.ts +55 -0
  46. package/tests/live/pdl-upstream-probe.ts +83 -0
  47. package/tests/registry-find-people.test.ts +198 -7
  48. package/tests/registry-search-companies.test.ts +46 -7
  49. package/tests/tools/find-emails.test.ts +267 -1
  50. package/tests/tools/find-people.test.ts +269 -5
  51. package/tests/tools/get-credit-balance.test.ts +56 -0
  52. package/tests/utils/compact-people.test.ts +462 -0
@@ -0,0 +1,318 @@
1
+ // Normalize find_people responses into a small, predictable shape.
2
+ //
3
+ // The find_people waterfall fans across 10 providers, each with a different
4
+ // payload shape (data.people[], data.data[], data.companies_personas[].personas[][],
5
+ // data.data.profiles[], data.content[], etc.). Verbose passthrough produces 30KB+
6
+ // per record from FullEnrich alone (full employment history, every company office,
7
+ // company description, specialties, skills, languages). Compact mode keeps only
8
+ // the fields agents actually need to act on a lead.
9
+
10
+ export interface CompactPerson {
11
+ full_name?: string
12
+ first_name?: string
13
+ last_name?: string
14
+ title?: string
15
+ seniority?: string
16
+ linkedin_url?: string
17
+ email?: string
18
+ company_name?: string
19
+ company_domain?: string
20
+ company_linkedin_url?: string
21
+ company_headcount?: number | string
22
+ location?: string
23
+ }
24
+
25
+ function getPath(obj: unknown, path: string): unknown {
26
+ if (obj == null) return undefined
27
+ let cur: unknown = obj
28
+ for (const part of path.split('.')) {
29
+ if (cur == null) return undefined
30
+ if (Array.isArray(cur)) {
31
+ const idx = Number(part)
32
+ if (!Number.isInteger(idx)) return undefined
33
+ cur = cur[idx]
34
+ } else if (typeof cur === 'object') {
35
+ cur = (cur as Record<string, unknown>)[part]
36
+ } else {
37
+ return undefined
38
+ }
39
+ }
40
+ return cur
41
+ }
42
+
43
+ function pick(obj: unknown, paths: string[]): unknown {
44
+ for (const p of paths) {
45
+ const v = getPath(obj, p)
46
+ if (v !== undefined && v !== null && v !== '') return v
47
+ }
48
+ return undefined
49
+ }
50
+
51
+ function asString(v: unknown): string | undefined {
52
+ if (typeof v === 'string') return v.length > 0 ? v : undefined
53
+ if (typeof v === 'number') return String(v)
54
+ return undefined
55
+ }
56
+
57
+ function flattenDeep(arr: unknown[]): unknown[] {
58
+ const out: unknown[] = []
59
+ for (const item of arr) {
60
+ if (Array.isArray(item)) out.push(...flattenDeep(item))
61
+ else out.push(item)
62
+ }
63
+ return out
64
+ }
65
+
66
+ // Wrapper-aware extraction. Providers that wrap people in atypical shapes get
67
+ // explicit handling; the rest fall through to common-key probing.
68
+ export function extractPeopleArray(data: unknown, providerId: string): unknown[] {
69
+ if (data == null) return []
70
+ if (Array.isArray(data)) return data
71
+ if (typeof data !== 'object') return []
72
+ const d = data as Record<string, unknown>
73
+
74
+ // LeadsFactory: companies_personas[i].personas is unknown[][] — nested arrays
75
+ // of personas per company. Flatten to a single contacts array.
76
+ if (providerId === 'leadsfactory' && Array.isArray(d.companies_personas)) {
77
+ const out: unknown[] = []
78
+ for (const group of d.companies_personas as Array<Record<string, unknown>>) {
79
+ const personas = group.personas
80
+ if (Array.isArray(personas)) out.push(...flattenDeep(personas))
81
+ }
82
+ return out
83
+ }
84
+
85
+ // LinkUp wraps results under data.profiles.
86
+ if (providerId === 'linkupapi-search-profiles') {
87
+ const inner = d.data
88
+ if (inner && typeof inner === 'object') {
89
+ const profiles = (inner as Record<string, unknown>).profiles
90
+ if (Array.isArray(profiles)) return profiles
91
+ }
92
+ return []
93
+ }
94
+
95
+ for (const key of ['people', 'data', 'results', 'contacts', 'content', 'profiles', 'items']) {
96
+ const v = d[key]
97
+ if (Array.isArray(v)) return v
98
+ }
99
+ return []
100
+ }
101
+
102
+ // Providers nest person fields at different depths:
103
+ // - LeadsFactory wraps in `{ contact: {...person}, company: {...}, persona_index, ... }`
104
+ // - AI-Ark wraps in `{ id, profile: {...person} }`
105
+ // - Apollo / PDL / FullEnrich keep person fields at the top level
106
+ // Merging the nested objects into the outer scope lets the candidate-path resolver
107
+ // find person fields regardless of provider. The sibling `company` on LeadsFactory
108
+ // stays at the merged top level so `company.name`, `company.domain` paths resolve.
109
+ function unwrapRecord(record: unknown): Record<string, unknown> | null {
110
+ if (!record || typeof record !== 'object' || Array.isArray(record)) return null
111
+ const r = record as Record<string, unknown>
112
+ const merged: Record<string, unknown> = { ...r }
113
+ if (r.profile && typeof r.profile === 'object' && !Array.isArray(r.profile)) {
114
+ Object.assign(merged, r.profile as Record<string, unknown>)
115
+ }
116
+ if (r.contact && typeof r.contact === 'object' && !Array.isArray(r.contact)) {
117
+ Object.assign(merged, r.contact as Record<string, unknown>)
118
+ }
119
+ return merged
120
+ }
121
+
122
+ // Confirm a candidate value really points at LinkedIn before surfacing it as
123
+ // linkedin_url. Some providers (Sumble) keep their own profile URL under a bare
124
+ // `url` key — without this guard, agents pasting linkedin_url into LinkedIn
125
+ // lookups would silently fail. Accept linkedin domains and short canonical forms
126
+ // PDL stores (`linkedin.com/in/x`, no protocol).
127
+ function isLinkedInUrl(v: unknown): v is string {
128
+ if (typeof v !== 'string' || v.length === 0) return false
129
+ const lower = v.toLowerCase()
130
+ return lower.includes('linkedin.com/')
131
+ }
132
+
133
+ function pickLinkedIn(obj: unknown, paths: string[]): string | undefined {
134
+ for (const p of paths) {
135
+ const v = getPath(obj, p)
136
+ if (isLinkedInUrl(v)) return v
137
+ }
138
+ return undefined
139
+ }
140
+
141
+ export function normalizePerson(record: unknown): CompactPerson | null {
142
+ const r = unwrapRecord(record)
143
+ if (!r) return null
144
+
145
+ const out: CompactPerson = {}
146
+
147
+ const fullName = asString(pick(r, ['full_name', 'fullName', 'name', 'displayName']))
148
+ const firstName = asString(pick(r, ['first_name', 'firstName', 'given_name']))
149
+ const lastName = asString(pick(r, ['last_name', 'lastName', 'family_name', 'surname']))
150
+ if (fullName) out.full_name = fullName
151
+ if (firstName) out.first_name = firstName
152
+ if (lastName) out.last_name = lastName
153
+ if (!out.full_name && (firstName || lastName)) {
154
+ out.full_name = [firstName, lastName].filter(Boolean).join(' ')
155
+ }
156
+
157
+ const title = asString(pick(r, [
158
+ 'employment.current.title',
159
+ 'title',
160
+ 'job_title',
161
+ 'jobTitle',
162
+ 'position',
163
+ 'current_position.title',
164
+ 'current_position',
165
+ 'headline',
166
+ ]))
167
+ if (title) out.title = title
168
+
169
+ const seniorityRaw = pick(r, [
170
+ 'employment.current.seniority',
171
+ 'seniority',
172
+ 'job_title_levels.0',
173
+ 'job_level', // Sumble
174
+ 'level',
175
+ ])
176
+ const seniority = asString(seniorityRaw)
177
+ if (seniority) out.seniority = seniority
178
+
179
+ // LinkedIn URL is the one field worth strict-validating: a wrong value here
180
+ // silently breaks downstream LinkedIn-driven flows. `url`/`profile_url`/`linkedin`
181
+ // are too ambiguous to trust without the linkedin.com substring check.
182
+ const linkedin = pickLinkedIn(r, [
183
+ 'linkedin_url',
184
+ 'linkedinUrl',
185
+ 'social_profiles.professional_network.url',
186
+ 'social_profiles.linkedin.url',
187
+ 'profile_url',
188
+ 'linkedin',
189
+ 'url',
190
+ ])
191
+ if (linkedin) out.linkedin_url = linkedin
192
+
193
+ const email = asString(pick(r, [
194
+ 'email',
195
+ 'work_email',
196
+ 'personal_email',
197
+ 'best_email',
198
+ 'emails.0',
199
+ 'contact.email',
200
+ 'contact.emails.0',
201
+ ]))
202
+ if (email) out.email = email
203
+
204
+ const country = asString(pick(r, [
205
+ 'location.country',
206
+ 'country',
207
+ 'location_country',
208
+ 'job_company_location_country',
209
+ ]))
210
+ const city = asString(pick(r, [
211
+ 'location.city',
212
+ 'city',
213
+ 'location_locality',
214
+ 'job_company_location_locality',
215
+ ]))
216
+ const locationCombined = [city, country].filter(Boolean).join(', ')
217
+ if (locationCombined) out.location = locationCombined
218
+
219
+ const companyName = asString(pick(r, [
220
+ 'employment.current.company.name',
221
+ 'organization.name',
222
+ 'company.name',
223
+ 'job_company_name',
224
+ 'current_company.name',
225
+ 'companyName',
226
+ ]))
227
+ if (companyName) out.company_name = companyName
228
+
229
+ const companyDomain = asString(pick(r, [
230
+ 'employment.current.company.domain',
231
+ 'organization.primary_domain',
232
+ 'organization.website_url',
233
+ 'company.domain',
234
+ 'company.website',
235
+ 'job_company_website',
236
+ ]))
237
+ if (companyDomain) out.company_domain = companyDomain
238
+
239
+ const companyLinkedIn = asString(pick(r, [
240
+ 'employment.current.company.social_profiles.professional_network.url',
241
+ 'employment.current.company.social_profiles.linkedin.url',
242
+ 'employment.current.company.linkedin_url',
243
+ 'organization.linkedin_url',
244
+ 'company.linkedin_url',
245
+ 'job_company_linkedin_url',
246
+ ]))
247
+ if (companyLinkedIn) out.company_linkedin_url = companyLinkedIn
248
+
249
+ const headcountRaw = pick(r, [
250
+ 'employment.current.company.headcount',
251
+ 'employment.current.company.headcount_range',
252
+ 'organization.estimated_num_employees',
253
+ 'organization.num_employees',
254
+ 'company.headcount',
255
+ 'company.size',
256
+ 'job_company_size',
257
+ 'job_company_employee_count',
258
+ ])
259
+ if (typeof headcountRaw === 'number' || (typeof headcountRaw === 'string' && headcountRaw.length > 0)) {
260
+ out.company_headcount = headcountRaw
261
+ }
262
+
263
+ // Drop records that resolved to nothing meaningful.
264
+ if (Object.keys(out).length === 0) return null
265
+ return out
266
+ }
267
+
268
+ export interface CompactPayload {
269
+ people: CompactPerson[]
270
+ total?: number
271
+ gap_fill_provider?: string
272
+ revealed?: true
273
+ }
274
+
275
+ export function compactPayload(data: unknown, providerId: string): CompactPayload {
276
+ const mainArr = extractPeopleArray(data, providerId)
277
+ const main: CompactPerson[] = []
278
+ for (const r of mainArr) {
279
+ const p = normalizePerson(r)
280
+ if (p) main.push(p)
281
+ }
282
+
283
+ let gapFillProvider: string | undefined
284
+ let gapFillPeople: CompactPerson[] = []
285
+ if (data && typeof data === 'object' && !Array.isArray(data)) {
286
+ const gf = (data as Record<string, unknown>).gap_fill
287
+ if (gf && typeof gf === 'object') {
288
+ gapFillProvider = asString((gf as Record<string, unknown>).provider)
289
+ const gfArr = extractPeopleArray(gf, gapFillProvider ?? '')
290
+ for (const r of gfArr) {
291
+ const p = normalizePerson(r)
292
+ if (p) gapFillPeople.push(p)
293
+ }
294
+ }
295
+ }
296
+
297
+ // Pull pagination total when present (FullEnrich exposes data.metadata.total).
298
+ let total: number | undefined
299
+ if (data && typeof data === 'object' && !Array.isArray(data)) {
300
+ const meta = (data as Record<string, unknown>).metadata
301
+ if (meta && typeof meta === 'object') {
302
+ const t = (meta as Record<string, unknown>).total
303
+ if (typeof t === 'number') total = t
304
+ }
305
+ }
306
+
307
+ const out: CompactPayload = { people: [...main, ...gapFillPeople] }
308
+ if (total !== undefined) out.total = total
309
+ if (gapFillProvider && gapFillPeople.length > 0) out.gap_fill_provider = gapFillProvider
310
+ // Preserve the revealed flag the Apollo reveal flow sets — it tells the caller
311
+ // whether the emails/full names in `people` came from /apollo/people/bulk-match
312
+ // (paid +1 credit per person) vs the obfuscated /apollo/people/search response.
313
+ if (data && typeof data === 'object' && !Array.isArray(data) &&
314
+ (data as Record<string, unknown>).revealed === true) {
315
+ out.revealed = true
316
+ }
317
+ return out
318
+ }
@@ -4,7 +4,18 @@ import { fuzzyMatch } from './fuzzy.js'
4
4
 
5
5
  // find_emails uses a custom waterfall — its providers are not in the registry.
6
6
  // Exported so find-emails.ts stays in sync without a second hardcoded list.
7
- export const FIND_EMAILS_PROVIDERS = ['prospeo', 'fullenrich', 'findymail', 'icypeas']
7
+ // Order = auto-route execution order: bulk providers first (Steps 1-3), then
8
+ // the single-find_email fallback providers used for stragglers (Step 4).
9
+ export const FIND_EMAILS_PROVIDERS = [
10
+ 'prospeo',
11
+ 'fullenrich',
12
+ 'findymail',
13
+ 'icypeas',
14
+ 'limadata-work-email',
15
+ 'blitzapi',
16
+ 'limadata-work-email-linkedin',
17
+ 'linkupapi',
18
+ ]
8
19
 
9
20
  export function getProvidersForCapability(capability: Capability | 'find_emails'): string[] {
10
21
  if (capability === 'find_emails') return FIND_EMAILS_PROVIDERS
@@ -198,6 +209,9 @@ const GATED_DESCRIPTIONS: Partial<Record<Capability, Partial<Record<string, Gate
198
209
  'prospeo-search-person': { kind: 'requires', fields: 'job_titles or company_domains' },
199
210
  'ai-ark-people': { kind: 'requires', fields: 'job_titles, seniorities, or keywords' },
200
211
  'findymail-search-employees': { kind: 'requires', fields: 'company_domains and job_titles' },
212
+ 'fullenrich-people-search': { kind: 'requires', fields: 'company_domains (upstream does not accept LinkedIn URL filters)' },
213
+ pdl: { kind: 'requires', fields: 'company_domains, company_linkedin_urls, job_titles, or seniorities' },
214
+ companyenrich: { kind: 'incompatible_with', fields: 'company_linkedin_urls without company_domains (upstream supports domain filters only)' },
201
215
  },
202
216
  // -------------------------------------------------------------------------
203
217
  find_email: {
@@ -734,6 +734,56 @@ describe('executeWithFallback with options.providers', () => {
734
734
  expect(result._meta.matchedFrom).toEqual({ prospec: 'prospeo' })
735
735
  }
736
736
  })
737
+
738
+ it('surfaces credits_charged + credits_remaining in success _meta when API emits credit headers', async () => {
739
+ stubProviders([
740
+ makeProvider({ id: 'prospeo', priority: 1, hasResult: () => true }),
741
+ ])
742
+
743
+ globalThis.fetch = vi.fn(async () =>
744
+ new Response(JSON.stringify({ ok: true }), {
745
+ status: 200,
746
+ headers: {
747
+ 'X-ColdIQ-Credits-Charged': '3',
748
+ 'X-ColdIQ-Credits-Remaining': '197',
749
+ },
750
+ })
751
+ ) as typeof fetch
752
+
753
+ const result = await executeWithFallback(
754
+ 'enrich_company',
755
+ { domain: 'coldiq.com' },
756
+ { providers: ['prospeo'] },
757
+ )
758
+
759
+ expect('data' in result).toBe(true)
760
+ if ('data' in result) {
761
+ expect(result._meta.credits_charged).toBe(3)
762
+ expect(result._meta.credits_remaining).toBe(197)
763
+ }
764
+ })
765
+
766
+ it('omits credit fields from _meta when API does not emit credit headers', async () => {
767
+ stubProviders([
768
+ makeProvider({ id: 'prospeo', priority: 1, hasResult: () => true }),
769
+ ])
770
+
771
+ globalThis.fetch = vi.fn(async () =>
772
+ new Response(JSON.stringify({ ok: true }), { status: 200 })
773
+ ) as typeof fetch
774
+
775
+ const result = await executeWithFallback(
776
+ 'enrich_company',
777
+ { domain: 'coldiq.com' },
778
+ { providers: ['prospeo'] },
779
+ )
780
+
781
+ expect('data' in result).toBe(true)
782
+ if ('data' in result) {
783
+ expect(result._meta.credits_charged).toBeUndefined()
784
+ expect(result._meta.credits_remaining).toBeUndefined()
785
+ }
786
+ })
737
787
  })
738
788
 
739
789
  // Note: the LeadsFactory backoff *schedule* itself is asserted against the live
@@ -789,3 +839,118 @@ describe('per-provider sync timeout cap', () => {
789
839
  expect(elapsed).toBeLessThan(2000)
790
840
  })
791
841
  })
842
+
843
+ // ---------------------------------------------------------------------------
844
+ // upstream_error — structured upstream body survives the short-string flatten
845
+ // ---------------------------------------------------------------------------
846
+
847
+ describe('executor upstream_error propagation', () => {
848
+ const originalFetch = globalThis.fetch
849
+
850
+ beforeEach(() => {
851
+ initClient('http://test-api.local', 'test-key-123')
852
+ })
853
+
854
+ afterEach(() => {
855
+ globalThis.fetch = originalFetch
856
+ vi.restoreAllMocks()
857
+ })
858
+
859
+ it('preserves API `details` passthrough verbatim under providers_tried[i].upstream_error', async () => {
860
+ stubProviders([makeProvider({ id: 'prospeo', hasResult: () => false })])
861
+
862
+ globalThis.fetch = vi.fn(async () =>
863
+ new Response(
864
+ JSON.stringify({
865
+ error: "INVALID_REQUEST: Invalid value '[CMO]' for filter 'job_titles'",
866
+ details: {
867
+ error_code: 'INVALID_REQUEST',
868
+ filter_error: "Invalid value '[CMO]' for filter 'job_titles'",
869
+ },
870
+ }),
871
+ { status: 400 },
872
+ ),
873
+ ) as typeof fetch
874
+
875
+ const result = await executeWithFallback('find_people', { company_domains: ['coldiq.com'] })
876
+
877
+ expect('error' in result).toBe(true)
878
+ if ('error' in result) {
879
+ expect(result.providers_tried).toHaveLength(1)
880
+ const tried = result.providers_tried[0]
881
+ expect(tried.status).toBe(400)
882
+ expect(tried.error).toContain('INVALID_REQUEST')
883
+ expect(tried.upstream_error).toEqual({
884
+ error_code: 'INVALID_REQUEST',
885
+ filter_error: "Invalid value '[CMO]' for filter 'job_titles'",
886
+ })
887
+ }
888
+ })
889
+
890
+ it('falls back to the full body when API has no `details` field', async () => {
891
+ stubProviders([makeProvider({ id: 'unmigrated', hasResult: () => false })])
892
+
893
+ globalThis.fetch = vi.fn(async () =>
894
+ new Response(
895
+ JSON.stringify({ error: true, error_code: 'X', filter_error: 'whatever' }),
896
+ { status: 400 },
897
+ ),
898
+ ) as typeof fetch
899
+
900
+ const result = await executeWithFallback('find_people', { company_domains: ['coldiq.com'] })
901
+
902
+ expect('error' in result).toBe(true)
903
+ if ('error' in result) {
904
+ const tried = result.providers_tried[0]
905
+ // The short `error` string is the JSON-stringified boolean (the old behavior),
906
+ // but the structured body now rides along under `upstream_error` so the
907
+ // caller can still recover the detail.
908
+ expect(tried.error).toBe('true')
909
+ expect(tried.upstream_error).toMatchObject({
910
+ error_code: 'X',
911
+ filter_error: 'whatever',
912
+ })
913
+ }
914
+ })
915
+
916
+ it('defensively caps oversized upstream_error payloads to a 2KB string', async () => {
917
+ stubProviders([makeProvider({ id: 'noisy', hasResult: () => false })])
918
+
919
+ const huge = 'x'.repeat(5000)
920
+ globalThis.fetch = vi.fn(async () =>
921
+ new Response(
922
+ JSON.stringify({ error: 'bad', details: { blob: huge } }),
923
+ { status: 400 },
924
+ ),
925
+ ) as typeof fetch
926
+
927
+ const result = await executeWithFallback('find_people', { company_domains: ['coldiq.com'] })
928
+
929
+ expect('error' in result).toBe(true)
930
+ if ('error' in result) {
931
+ const upstream = result.providers_tried[0].upstream_error
932
+ expect(typeof upstream).toBe('string')
933
+ expect((upstream as string).length).toBeLessThanOrEqual(2048)
934
+ }
935
+ })
936
+
937
+ it('surfaces the synthetic non-JSON envelope as upstream_error', async () => {
938
+ stubProviders([makeProvider({ id: 'empty', hasResult: () => false })])
939
+
940
+ // No JSON body — client.ts synthesizes { error: 'Non-JSON response from API' },
941
+ // which has no `details` field so the executor falls back to the full envelope.
942
+ // Surfacing it (rather than dropping to undefined) keeps "upstream returned
943
+ // garbage" debuggable end-to-end.
944
+ globalThis.fetch = vi.fn(async () =>
945
+ new Response('not json', { status: 500 }),
946
+ ) as typeof fetch
947
+
948
+ const result = await executeWithFallback('find_people', { company_domains: ['coldiq.com'] })
949
+
950
+ expect('error' in result).toBe(true)
951
+ if ('error' in result) {
952
+ const tried = result.providers_tried[0]
953
+ expect(tried.upstream_error).toEqual({ error: 'Non-JSON response from API' })
954
+ }
955
+ })
956
+ })
@@ -0,0 +1,55 @@
1
+ // Direct upstream probe — documents what FullEnrich /people/search actually
2
+ // honors as a company filter. The ColdIQ wrapper schema at
3
+ // src/providers/fullenrich/schema.ts declares `current_company_linkedin_urls`,
4
+ // but the live API rejects every shape with `error.filters.empty` (400). Only
5
+ // `current_company_domains` and `current_company_names` actually scope results.
6
+ //
7
+ // This is why the registry mapper for `fullenrich-people-search` is gated
8
+ // behind `company_domains` only — adding the LinkedIn URL field caused the
9
+ // upstream to treat the request as filterless and return a 50k-result global
10
+ // titles-only search, masquerading as company-scoped.
11
+ //
12
+ // Run: FULLENRICH_API_KEY=… npx tsx mcp/tests/live/fullenrich-upstream-probe.ts
13
+
14
+ const KEY = process.env.FULLENRICH_API_KEY
15
+ if (!KEY) { console.error('FULLENRICH_API_KEY required'); process.exit(1) }
16
+ const URL = 'https://app.fullenrich.com/api/v2/people/search'
17
+
18
+ async function call(label: string, body: unknown) {
19
+ const t0 = Date.now()
20
+ const res = await fetch(URL, {
21
+ method: 'POST',
22
+ headers: { Authorization: `Bearer ${KEY}`, 'Content-Type': 'application/json' },
23
+ body: JSON.stringify(body),
24
+ })
25
+ const data: any = await res.json().catch(async () => ({ raw: await res.text() }))
26
+ const total = data.metadata?.total
27
+ const ok = res.status === 200
28
+ console.log(`[${ok ? 'OK ' : 'ERR'}] ${label.padEnd(60)} status=${res.status} ms=${Date.now() - t0} total=${total ?? '-'} ${ok ? '' : 'err=' + JSON.stringify(data).slice(0, 120)}`)
29
+ }
30
+
31
+ async function main() {
32
+ console.log('=== FullEnrich /people/search filter compatibility probe ===\n')
33
+
34
+ console.log('▸ Known-working filters (should all return 200 with total > 0)')
35
+ await call('current_company_domains', { current_company_domains: [{ value: 'spendesk.com' }], limit: 1 })
36
+ await call('current_company_names', { current_company_names: [{ value: 'Spendesk' }], limit: 1 })
37
+
38
+ console.log('\n▸ LinkedIn URL variants (declared in ColdIQ schema, NOT honored upstream)')
39
+ for (const [field, value] of [
40
+ ['current_company_linkedin_urls', 'https://www.linkedin.com/company/spendesk'],
41
+ ['current_company_linkedin_url', 'https://www.linkedin.com/company/spendesk'],
42
+ ['current_company_linkedin_urls (short)', 'linkedin.com/company/spendesk'],
43
+ ['current_company_linkedin_urls (slug)', 'spendesk'],
44
+ ['current_company_linkedin_ids', 'spendesk'],
45
+ ] as const) {
46
+ const key = field.split(' ')[0]
47
+ await call(field, { [key]: [{ value }], limit: 1 })
48
+ }
49
+
50
+ console.log('\n▸ No-filter baseline (the buggy path: 50k+ titles-only results)')
51
+ await call('titles-only, no company', { current_position_titles: [{ value: 'CEO' }], limit: 1 })
52
+ await call('completely empty (sanity)', { limit: 1 })
53
+ }
54
+
55
+ main().catch((e) => { console.error('FATAL', e); process.exit(1) })
@@ -0,0 +1,83 @@
1
+ // Direct upstream probe — verifies PDL's /person/search accepts
2
+ // `job_company_linkedin_url` as a filterable term, and that the returned people
3
+ // actually work at the company we asked for.
4
+ //
5
+ // PDL's Elasticsearch DSL is permissive: an unknown field name returns 0 hits
6
+ // rather than an error, so a silent miss looks identical to "no data." The
7
+ // probe compares a known-good baseline (job_company_website) against the
8
+ // LinkedIn-URL variant.
9
+ //
10
+ // Run: PDL_API_KEY=… npx tsx mcp/tests/live/pdl-upstream-probe.ts
11
+
12
+ const KEY = process.env.PDL_API_KEY
13
+ if (!KEY) { console.error('PDL_API_KEY required'); process.exit(1) }
14
+ const BASE = 'https://api.peopledatalabs.com/v5'
15
+
16
+ async function search(must: unknown[], size = 3) {
17
+ const t0 = Date.now()
18
+ const res = await fetch(`${BASE}/person/search`, {
19
+ method: 'POST',
20
+ headers: { 'X-API-Key': KEY!, 'Content-Type': 'application/json' },
21
+ body: JSON.stringify({ query: { bool: { must } }, size }),
22
+ signal: AbortSignal.timeout(30_000),
23
+ })
24
+ let data: any
25
+ try { data = await res.json() } catch { data = await res.text() }
26
+ return { ok: res.ok, status: res.status, ms: Date.now() - t0, data }
27
+ }
28
+
29
+ function summarize(label: string, r: any) {
30
+ const data = r.data ?? {}
31
+ const total = data.total ?? '?'
32
+ const items = (data.data ?? []) as Array<any>
33
+ console.log(`\n[${label}] status=${r.status} ms=${r.ms} total=${total} returned=${items.length}`)
34
+ for (const p of items.slice(0, 5)) {
35
+ console.log(` - ${p.full_name} | ${p.job_title} | co=${p.job_company_name} | website=${p.job_company_website} | li=${p.job_company_linkedin_url}`)
36
+ }
37
+ }
38
+
39
+ async function main() {
40
+ console.log('=== PDL upstream probe: verify job_company_linkedin_url ===')
41
+
42
+ // Use a well-known mid-sized B2B SaaS for the comparison. Aircall is a good fit:
43
+ // ~700 employees, indexed by both website and LinkedIn URL in PDL.
44
+ const TARGET_DOMAIN = 'aircall.io'
45
+ const TARGET_LINKEDIN = 'linkedin.com/company/aircall'
46
+
47
+ // Baseline: search by known-working field (job_company_website)
48
+ console.log(`\n▸ Baseline: search by job_company_website = ${TARGET_DOMAIN}`)
49
+ const byDomain = await search([{ terms: { job_company_website: [TARGET_DOMAIN] } }])
50
+ summarize('job_company_website', byDomain)
51
+
52
+ // Test: same search but by LinkedIn URL
53
+ console.log(`\n▸ Test: search by job_company_linkedin_url = ${TARGET_LINKEDIN}`)
54
+ const byLinkedIn = await search([{ terms: { job_company_linkedin_url: [TARGET_LINKEDIN] } }])
55
+ summarize('job_company_linkedin_url', byLinkedIn)
56
+
57
+ const baselineTotal = byDomain.data?.total ?? 0
58
+ const linkedinTotal = byLinkedIn.data?.total ?? 0
59
+
60
+ console.log(`\n=== Result ===`)
61
+ console.log(`baseline (by website): total=${baselineTotal}`)
62
+ console.log(`test (by linkedin_url): total=${linkedinTotal}`)
63
+
64
+ if (linkedinTotal > 0 && baselineTotal > 0) {
65
+ const ratio = linkedinTotal / baselineTotal
66
+ console.log(`ratio = ${ratio.toFixed(2)} (1.0 = same company, both filters work)`)
67
+ if (ratio >= 0.5) {
68
+ console.log('✅ job_company_linkedin_url WORKS — keep the mapping in registry.ts')
69
+ process.exit(0)
70
+ } else {
71
+ console.log('⚠️ linkedin_url returns far fewer matches than website — investigate')
72
+ process.exit(2)
73
+ }
74
+ } else if (linkedinTotal === 0 && baselineTotal > 0) {
75
+ console.log('❌ job_company_linkedin_url silently returns 0 — REMOVE the mapping clause for pdl in registry.ts and keep gate-only')
76
+ process.exit(1)
77
+ } else {
78
+ console.log('⚠️ Baseline itself returned 0 — pick a different test company')
79
+ process.exit(2)
80
+ }
81
+ }
82
+
83
+ main().catch((e) => { console.error('FATAL', e); process.exit(1) })