@coldiq/mcp 0.3.8 → 0.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,208 @@
1
+ // Normalize search_companies responses into a small, predictable shape.
2
+ //
3
+ // The search_companies waterfall fans across many providers, each with a
4
+ // different payload shape (CompanyEnrich nests at data.items[], Apollo at
5
+ // data.organizations[] + pagination, FullEnrich at data.companies[], others at
6
+ // data.data[]/results[]). The raw provider object per company is heavy
7
+ // (~3-4KB: full funding-round history, technologies, ~30-item keyword arrays,
8
+ // NAICS codes, every social URL, SEO descriptions, logos). Compact mode keeps
9
+ // only the fields agents act on downstream — company identity (name, domain,
10
+ // website, linkedin_url, used by find_people/find_emails) plus the firmographic
11
+ // signals a user filters on (employees, revenue, industry, country, city,
12
+ // founded_year, categories). It changes how many descriptive fields ride along,
13
+ // never which companies are returned. Verbose mode keeps the raw passthrough.
14
+
15
+ export interface CompactCompany {
16
+ name?: string
17
+ domain?: string
18
+ website?: string
19
+ linkedin_url?: string
20
+ employees?: number | string
21
+ revenue?: number | string
22
+ industry?: string
23
+ country?: string
24
+ city?: string
25
+ founded_year?: number | string
26
+ categories?: string[]
27
+ }
28
+
29
+ function getPath(obj: unknown, path: string): unknown {
30
+ if (obj == null) return undefined
31
+ let cur: unknown = obj
32
+ for (const part of path.split('.')) {
33
+ if (cur == null) return undefined
34
+ if (Array.isArray(cur)) {
35
+ const idx = Number(part)
36
+ if (!Number.isInteger(idx)) return undefined
37
+ cur = cur[idx]
38
+ } else if (typeof cur === 'object') {
39
+ cur = (cur as Record<string, unknown>)[part]
40
+ } else {
41
+ return undefined
42
+ }
43
+ }
44
+ return cur
45
+ }
46
+
47
+ function pick(obj: unknown, paths: string[]): unknown {
48
+ for (const p of paths) {
49
+ const v = getPath(obj, p)
50
+ if (v !== undefined && v !== null && v !== '') return v
51
+ }
52
+ return undefined
53
+ }
54
+
55
+ function asString(v: unknown): string | undefined {
56
+ if (typeof v === 'string') return v.length > 0 ? v : undefined
57
+ if (typeof v === 'number') return String(v)
58
+ return undefined
59
+ }
60
+
61
+ function asStringOrNumber(v: unknown): number | string | undefined {
62
+ if (typeof v === 'number') return v
63
+ if (typeof v === 'string') return v.length > 0 ? v : undefined
64
+ return undefined
65
+ }
66
+
67
+ function isLinkedInUrl(v: unknown): v is string {
68
+ if (typeof v !== 'string' || v.length === 0) return false
69
+ return v.toLowerCase().includes('linkedin.com/')
70
+ }
71
+
72
+ function pickLinkedIn(obj: unknown, paths: string[]): string | undefined {
73
+ for (const p of paths) {
74
+ const v = getPath(obj, p)
75
+ if (isLinkedInUrl(v)) return v
76
+ }
77
+ return undefined
78
+ }
79
+
80
+ // Probe the keys providers use to hold the companies array. Covers the flat
81
+ // shapes (CompanyEnrich `items`, FullEnrich/BlitzAPI `companies`, Apollo
82
+ // `organizations`, generic `data`/`results`, AI-Ark `content`) and the one-level
83
+ // nested shape LinkUp uses (`data.companies[]`). Keys are kept in sync with the
84
+ // `hasResult` checks in registry.ts so compact never returns [] for a provider
85
+ // that actually matched.
86
+ export function extractCompaniesArray(data: unknown): unknown[] {
87
+ if (data == null) return []
88
+ if (Array.isArray(data)) return data
89
+ if (typeof data !== 'object') return []
90
+ const d = data as Record<string, unknown>
91
+ for (const key of ['items', 'companies', 'organizations', 'data', 'results', 'content']) {
92
+ const v = d[key]
93
+ if (Array.isArray(v)) return v
94
+ }
95
+ // LinkUp (search / fundraising / hiring) wraps results under data.companies[].
96
+ const inner = d.data
97
+ if (inner && typeof inner === 'object' && !Array.isArray(inner)) {
98
+ const di = inner as Record<string, unknown>
99
+ for (const key of ['companies', 'organizations', 'results', 'items', 'content']) {
100
+ const v = di[key]
101
+ if (Array.isArray(v)) return v
102
+ }
103
+ }
104
+ return []
105
+ }
106
+
107
+ export function normalizeCompany(record: unknown): CompactCompany | null {
108
+ if (!record || typeof record !== 'object' || Array.isArray(record)) return null
109
+ const r = record as Record<string, unknown>
110
+ const out: CompactCompany = {}
111
+
112
+ const name = asString(pick(r, ['name', 'company_name', 'organization.name', 'companyName']))
113
+ if (name) out.name = name
114
+
115
+ const domain = asString(pick(r, ['domain', 'primary_domain', 'website_domain', 'organization.primary_domain']))
116
+ if (domain) out.domain = domain
117
+
118
+ const website = asString(pick(r, ['website', 'website_url', 'organization.website_url', 'url']))
119
+ if (website) out.website = website
120
+
121
+ // LinkedIn URL is strict-validated: a wrong value silently breaks downstream
122
+ // find_people LinkedIn-driven lookups.
123
+ const linkedin = pickLinkedIn(r, [
124
+ 'socials.linkedin_url',
125
+ 'linkedin_url',
126
+ 'organization.linkedin_url',
127
+ 'social_profiles.linkedin.url',
128
+ ])
129
+ if (linkedin) out.linkedin_url = linkedin
130
+
131
+ const employees = asStringOrNumber(pick(r, [
132
+ 'employees',
133
+ 'employee_count',
134
+ 'employees_on_linkedin', // BlitzAPI
135
+ 'estimated_num_employees',
136
+ 'num_employees',
137
+ 'organization.estimated_num_employees',
138
+ 'size',
139
+ 'headcount',
140
+ ]))
141
+ if (employees !== undefined) out.employees = employees
142
+
143
+ const revenue = asStringOrNumber(pick(r, [
144
+ 'revenue',
145
+ 'estimated_revenue',
146
+ 'annual_revenue',
147
+ 'organization.annual_revenue',
148
+ ]))
149
+ if (revenue !== undefined) out.revenue = revenue
150
+
151
+ const industry = asString(pick(r, ['industry', 'organization.industry']))
152
+ if (industry) out.industry = industry
153
+
154
+ const country = asString(pick(r, [
155
+ 'location.country.name',
156
+ 'location.country.code',
157
+ 'country',
158
+ 'organization.country',
159
+ 'location_country',
160
+ ]))
161
+ if (country) out.country = country
162
+
163
+ const city = asString(pick(r, [
164
+ 'location.city.name',
165
+ 'city',
166
+ 'organization.city',
167
+ 'location_city',
168
+ ]))
169
+ if (city) out.city = city
170
+
171
+ const foundedYear = asStringOrNumber(pick(r, ['founded_year', 'organization.founded_year', 'foundation_date']))
172
+ if (foundedYear !== undefined) out.founded_year = foundedYear
173
+
174
+ const categories = getPath(r, 'categories')
175
+ if (Array.isArray(categories)) {
176
+ const cats = categories.filter((c): c is string => typeof c === 'string' && c.length > 0)
177
+ if (cats.length > 0) out.categories = cats
178
+ }
179
+
180
+ // Drop records that resolved to nothing meaningful.
181
+ if (Object.keys(out).length === 0) return null
182
+ return out
183
+ }
184
+
185
+ export interface CompactCompaniesPayload {
186
+ companies: CompactCompany[]
187
+ total?: number
188
+ }
189
+
190
+ export function compactCompaniesPayload(data: unknown): CompactCompaniesPayload {
191
+ const arr = extractCompaniesArray(data)
192
+ const companies: CompactCompany[] = []
193
+ for (const r of arr) {
194
+ const c = normalizeCompany(r)
195
+ if (c) companies.push(c)
196
+ }
197
+
198
+ const out: CompactCompaniesPayload = { companies }
199
+
200
+ // Preserve the total-addressable-market count when the provider exposes one
201
+ // (CompanyEnrich: data.totalItems; Apollo: data.pagination.total_entries).
202
+ if (data && typeof data === 'object' && !Array.isArray(data)) {
203
+ const total = pick(data, ['totalItems', 'total', 'pagination.total_entries', 'pagination.total'])
204
+ if (typeof total === 'number') out.total = total
205
+ }
206
+
207
+ return out
208
+ }
@@ -42,10 +42,21 @@ async function main() {
42
42
  console.log('\n-- A: query="SaaS" (OLD behavior — name/domain full-text) --')
43
43
  console.log(' ', JSON.stringify(names(await call('/companies/search', { ...COMMON, query: 'SaaS', pageSize: 6 })), null, 0))
44
44
 
45
- console.log('\n-- B: keywords=["saas"] Or (NEW behavioractivity tag filter) --')
46
- console.log(' ', JSON.stringify(names(await call('/companies/search', { ...COMMON, keywords: ['saas'], keywordsOperator: 'Or', pageSize: 6 })), null, 0))
45
+ console.log('\n-- B: keywords=["saas","b2b"] AND (text tagsconcentrates AGENCIES) --')
46
+ console.log(' ', JSON.stringify(names(await call('/companies/search', { ...COMMON, keywords: ['saas', 'b2b'], keywordsOperator: 'And', pageSize: 6 })), null, 0))
47
47
 
48
- console.log('\nExpected: A returns companies NAMED "SaaS …"; B returns real SaaS businesses.')
48
+ // The `category` filter is a controlled vocabulary matched on a company's clean
49
+ // classification. These are the ONLY accepted values — anything else returns an
50
+ // empty set. This is the source of truth for COMPANYENRICH_CATEGORIES in registry.ts.
51
+ console.log('\n-- category vocabulary probe (valid value -> count; invalid -> empty) --')
52
+ for (const v of ['b2b', 'b2c', 'b2g', 'saas', 'service-provider', 'media', 'e-commerce', 'mobile', 'fintech', 'software', 'marketplace']) {
53
+ console.log(` ${v.padEnd(18)} ->`, (await call('/companies/search/count', { ...COMMON, category: [v] })).data)
54
+ }
55
+
56
+ console.log('\n-- C: category=["saas","b2b"] AND (controlled vocab — TRUE "B2B SaaS") --')
57
+ console.log(' ', JSON.stringify(names(await call('/companies/search', { ...COMMON, category: ['saas', 'b2b'], categoryOperator: 'And', pageSize: 10 })), null, 0))
58
+
59
+ console.log('\nExpected: A = companies NAMED "SaaS …"; B = agencies/service-providers; C = real B2B SaaS products.')
49
60
  }
50
61
 
51
62
  main().catch(console.error)
@@ -47,7 +47,7 @@ describe('MCP polling interval schedules', () => {
47
47
  expect(30_000 / 1500).toBeGreaterThanOrEqual(20)
48
48
  })
49
49
 
50
- it('LeadsFactory find-people: shorter front, ≥7 polls in 5 minutes', () => {
50
+ it('LeadsFactory find-people: fast front probes, then flat 12s ceiling', () => {
51
51
  const providers = getProviders('find_people')
52
52
  const lf = providers.find((p) => p.id === 'leadsfactory')!
53
53
  expect(lf?.async).toBeDefined()
@@ -55,7 +55,13 @@ describe('MCP polling interval schedules', () => {
55
55
  expect(fn(1)).toBe(2000) // faster first probe vs old 3000
56
56
  expect(fn(2)).toBe(5000)
57
57
  expect(fn(3)).toBe(12000)
58
- // Validates ≥7 polls still fit in 5 min (guard against overly aggressive growth)
59
- expect(evalPollSchedule(fn, 300_000).length).toBeGreaterThanOrEqual(7)
58
+ // Flat ceiling: never grows past 12s, so completion-detection lag is bounded
59
+ // to ≤12s (the old +8s/attempt growth reached 44–72s gaps).
60
+ expect(fn(7)).toBe(12000)
61
+ expect(fn(20)).toBe(12000)
62
+ const schedule = evalPollSchedule(fn, 300_000)
63
+ expect(Math.max(...schedule)).toBe(12000) // never exceeds the ceiling
64
+ // Validates ≥7 polls still fit in 5 min
65
+ expect(schedule.length).toBeGreaterThanOrEqual(7)
60
66
  })
61
67
  })
@@ -189,18 +189,45 @@ describe('companyenrich search_companies', () => {
189
189
  expect(p().hasResult({})).toBe(false)
190
190
  })
191
191
 
192
- it('mapParams routes keywords + industries through the `keywords` tag filter (Or), not `query`', () => {
192
+ it('mapParams splits controlled categories (And) from free-text themes (Or); never uses `query`', () => {
193
193
  // `query` is a name+domain full-text match — a theme like "SaaS" would surface
194
- // companies merely NAMED "SaaS …" instead of actual SaaS businesses. The tag
195
- // filter matches business activity, which is what our unified keywords mean.
194
+ // companies merely NAMED "SaaS …". "SaaS" is a controlled category, so it routes
195
+ // to the precise `category` filter; "fintech" has no category and stays free-text.
196
196
  const body = p().mapParams({ keywords: ['SaaS'], industries: ['fintech'], countries: ['US'] }).body as Record<string, unknown>
197
197
  expect(body.query).toBeUndefined()
198
- expect(body.keywords).toEqual(['SaaS', 'fintech'])
198
+ expect(body.category).toEqual(['saas'])
199
+ expect(body.categoryOperator).toBe('And')
200
+ expect(body.keywords).toEqual(['fintech'])
199
201
  expect(body.keywordsOperator).toBe('Or')
200
202
  })
201
203
 
202
- it('mapParams omits the keyword tag filter entirely when no keywords/industries given', () => {
204
+ it('mapParams routes "B2B SaaS" to category=[saas,b2b] with And (the user-intended conjunction)', () => {
205
+ const body = p().mapParams({ keywords: ['SaaS', 'B2B'], countries: ['FR'], max_employees: 500 }).body as Record<string, unknown>
206
+ expect(body.category).toEqual(['saas', 'b2b'])
207
+ expect(body.categoryOperator).toBe('And')
208
+ expect(body.keywords).toBeUndefined()
209
+ expect(body.keywordsOperator).toBeUndefined()
210
+ })
211
+
212
+ it('mapParams normalizes category casing/spacing/aliases (E-Commerce, ecommerce, "Service Provider")', () => {
213
+ const body = p().mapParams({ keywords: ['E-Commerce', 'ecommerce', 'Service Provider'] }).body as Record<string, unknown>
214
+ // ecommerce -> e-commerce canonical, and the duplicate is deduped
215
+ expect(body.category).toEqual(['e-commerce', 'service-provider'])
216
+ expect(body.keywords).toBeUndefined()
217
+ })
218
+
219
+ it('mapParams keeps purely free-text themes in the keyword tag filter (no category)', () => {
220
+ const body = p().mapParams({ keywords: ['fintech', 'cybersecurity'] }).body as Record<string, unknown>
221
+ expect(body.category).toBeUndefined()
222
+ expect(body.categoryOperator).toBeUndefined()
223
+ expect(body.keywords).toEqual(['fintech', 'cybersecurity'])
224
+ expect(body.keywordsOperator).toBe('Or')
225
+ })
226
+
227
+ it('mapParams omits both filters when no keywords/industries given', () => {
203
228
  const body = p().mapParams({ countries: ['FR'], min_employees: 50 }).body as Record<string, unknown>
229
+ expect(body.category).toBeUndefined()
230
+ expect(body.categoryOperator).toBeUndefined()
204
231
  expect(body.keywords).toBeUndefined()
205
232
  expect(body.keywordsOperator).toBeUndefined()
206
233
  expect(body.query).toBeUndefined()
@@ -48,9 +48,10 @@ describe('registry', () => {
48
48
  })
49
49
 
50
50
  describe('search_companies mapParams', () => {
51
- it('CompanyEnrich routes keywords + industries into the `keywords` tag filter (Or), not the name/domain `query` field', () => {
51
+ it('CompanyEnrich splits controlled categories (And) from free-text themes (Or); never uses the name/domain `query` field', () => {
52
52
  // `query` matches company NAME + domain, so a theme like "SaaS" returns
53
- // companies merely NAMED "SaaS …". The tag filter matches business activity.
53
+ // companies merely NAMED "SaaS …". "SaaS" is a controlled category → routes to
54
+ // the precise `category` filter; "Software" has no category → stays free-text.
54
55
  const providers = getProviders('search_companies')
55
56
  const ce = providers.find((p) => p.id === 'companyenrich')!
56
57
  const result = ce.mapParams({
@@ -63,31 +64,48 @@ describe('registry', () => {
63
64
  })
64
65
  const body = result.body as Record<string, unknown>
65
66
  expect(body.query).toBeUndefined()
66
- expect(body.keywords).toEqual(['SaaS', 'Software'])
67
+ expect(body.category).toEqual(['saas'])
68
+ expect(body.categoryOperator).toBe('And')
69
+ expect(body.keywords).toEqual(['Software'])
67
70
  expect(body.keywordsOperator).toBe('Or')
68
71
  expect(body.countries).toEqual(['US'])
69
72
  expect(body.employees).toEqual([{ from: 10, to: 200 }])
70
73
  expect(body.pageSize).toBe(25)
71
74
  })
72
75
 
73
- it('CompanyEnrich routes a single keyword through the tag filter (themes, not brand-name lookup)', () => {
76
+ it('CompanyEnrich routes "B2B SaaS" to category=[saas,b2b] with And (the user-intended conjunction)', () => {
77
+ const providers = getProviders('search_companies')
78
+ const ce = providers.find((p) => p.id === 'companyenrich')!
79
+ const body = ce.mapParams({ keywords: ['SaaS', 'B2B'], countries: ['FR'] }).body as Record<string, unknown>
80
+ expect(body.category).toEqual(['saas', 'b2b'])
81
+ expect(body.categoryOperator).toBe('And')
82
+ expect(body.keywords).toBeUndefined()
83
+ expect(body.keywordsOperator).toBeUndefined()
84
+ })
85
+
86
+ it('CompanyEnrich keeps a free-text theme in the keyword tag filter, with no category (themes, not brand-name lookup)', () => {
74
87
  // The unified `keywords` param means topics/business-models/themes — brand-name
75
88
  // lookups belong to enrich_company or the `names`/`domains` fields, not here.
89
+ // "fintech" is not a controlled category, so it stays in the keyword tag filter.
76
90
  const providers = getProviders('search_companies')
77
91
  const ce = providers.find((p) => p.id === 'companyenrich')!
78
92
  const result = ce.mapParams({ keywords: ['fintech'], limit: 5 })
79
93
  const body = result.body as Record<string, unknown>
80
94
  expect(body.query).toBeUndefined()
95
+ expect(body.category).toBeUndefined()
96
+ expect(body.categoryOperator).toBeUndefined()
81
97
  expect(body.keywords).toEqual(['fintech'])
82
98
  expect(body.keywordsOperator).toBe('Or')
83
99
  })
84
100
 
85
- it('CompanyEnrich omits the keyword tag filter when no keywords/industries provided', () => {
101
+ it('CompanyEnrich omits both category and keyword filters when no keywords/industries provided', () => {
86
102
  const providers = getProviders('search_companies')
87
103
  const ce = providers.find((p) => p.id === 'companyenrich')!
88
104
  const result = ce.mapParams({ countries: ['US'], limit: 5 })
89
105
  const body = result.body as Record<string, unknown>
90
106
  expect(body.query).toBeUndefined()
107
+ expect(body.category).toBeUndefined()
108
+ expect(body.categoryOperator).toBeUndefined()
91
109
  expect(body.keywords).toBeUndefined()
92
110
  expect(body.keywordsOperator).toBeUndefined()
93
111
  })
@@ -533,20 +551,22 @@ describe('registry', () => {
533
551
  expect((result.body as Record<string, unknown>).company_linkedin_urls).toBeUndefined()
534
552
  })
535
553
 
536
- it('LeadsFactory has async config with continuous backoff schedule', () => {
554
+ it('LeadsFactory has async config with capped backoff schedule', () => {
537
555
  const providers = getProviders('find_people')
538
556
  const lf = providers.find((p) => p.id === 'leadsfactory')!
539
557
  expect(lf.async).toBeDefined()
540
558
  expect(lf.async!.timeoutMs).toBe(300_000)
541
- // Function-form pollIntervalMs: fast first probe, then continuously growing backoff.
559
+ // Function-form pollIntervalMs: fast first probes, then a flat 12s ceiling
560
+ // so completion-detection lag stays bounded (≤12s) instead of growing
561
+ // unbounded into 44–72s gaps.
542
562
  const sched = lf.async!.pollIntervalMs
543
563
  expect(typeof sched).toBe('function')
544
564
  const fn = sched as (attempt: number) => number
545
565
  expect(fn(1)).toBe(2000)
546
566
  expect(fn(2)).toBe(5000)
547
567
  expect(fn(3)).toBe(12000)
548
- expect(fn(4)).toBe(20000)
549
- expect(fn(8)).toBe(52000)
568
+ expect(fn(4)).toBe(12000)
569
+ expect(fn(8)).toBe(12000)
550
570
  // At least 7 polls must fit inside the 5-minute timeout — guards against
551
571
  // future tuning that would make the ramp so steep we only get 1–2 polls.
552
572
  let cumulative = 0
@@ -1285,8 +1305,10 @@ describe('registry', () => {
1285
1305
  })
1286
1306
  expect(result.body).toEqual({
1287
1307
  countries: ['US'],
1288
- keywords: ['SaaS'],
1289
- keywordsOperator: 'Or',
1308
+ category: ['saas'],
1309
+ categoryOperator: 'And',
1310
+ keywords: undefined,
1311
+ keywordsOperator: undefined,
1290
1312
  technologies: ['Salesforce', 'HubSpot'],
1291
1313
  employees: undefined,
1292
1314
  foundedYear: undefined,
@@ -13,7 +13,7 @@ describe('search_companies handler', () => {
13
13
  globalThis.fetch = originalFetch
14
14
  })
15
15
 
16
- it('returns results from first successful provider', async () => {
16
+ it('returns results from first successful provider (compact by default)', async () => {
17
17
  globalThis.fetch = vi.fn(async () =>
18
18
  new Response(JSON.stringify({ data: [{ name: 'ColdIQ', domain: 'coldiq.com' }] }), { status: 200 })
19
19
  ) as typeof fetch
@@ -23,7 +23,83 @@ describe('search_companies handler', () => {
23
23
  expect(result.isError).toBeUndefined()
24
24
  const parsed = JSON.parse(result.content[0].text)
25
25
  expect(parsed._meta.provider).toBe('companyenrich')
26
- expect(parsed.data.data).toHaveLength(1)
26
+ // Compact-by-default: the raw provider array is normalized into companies[].
27
+ expect(parsed.data.companies).toHaveLength(1)
28
+ expect(parsed.data.companies[0]).toMatchObject({ name: 'ColdIQ', domain: 'coldiq.com' })
29
+ })
30
+
31
+ it('compact (default) keeps identity + firmographic fields and drops heavy ones', async () => {
32
+ // Mirror the real CompanyEnrich shape: companies under data.items[], each a
33
+ // heavy object with funding/technologies/keywords/socials/seo.
34
+ globalThis.fetch = vi.fn(async () =>
35
+ new Response(JSON.stringify({
36
+ items: [{
37
+ id: '0192562e-7008-702b-97a2-0b2c489d339b',
38
+ name: 'Hugging Face',
39
+ domain: 'huggingface.co',
40
+ website: 'https://huggingface.co',
41
+ industry: 'Business Services',
42
+ categories: ['b2b', 'b2c', 'saas'],
43
+ employees: '201-500',
44
+ revenue: '10m-50m',
45
+ founded_year: 2016,
46
+ description: 'A very long company description that should be dropped in compact mode...',
47
+ keywords: Array.from({ length: 28 }, (_, i) => `kw${i}`),
48
+ technologies: ['Stripe', 'Nginx', 'Node-Js'],
49
+ naics_codes: ['511210', '518210'],
50
+ socials: { linkedin_url: 'https://www.linkedin.com/company/huggingface', twitter_url: 'https://twitter.com/huggingface' },
51
+ financial: { total_funding: 395200000, funding: [{ date: '2024-08-01', type: 'Venture Round' }] },
52
+ location: { country: { code: 'FR', name: 'France' }, city: { name: 'Paris' } },
53
+ }],
54
+ page: 1,
55
+ totalItems: 10000,
56
+ }), { status: 200 })
57
+ ) as typeof fetch
58
+
59
+ const result = await searchCompaniesHandler({ keywords: ['SaaS'], countries: ['FR'], limit: 5 })
60
+
61
+ expect(result.isError).toBeUndefined()
62
+ const parsed = JSON.parse(result.content[0].text)
63
+ expect(parsed.data.total).toBe(10000)
64
+ expect(parsed.data.companies).toHaveLength(1)
65
+ const c = parsed.data.companies[0]
66
+ // Kept: identity + firmographics + downstream-critical linkedin_url.
67
+ expect(c).toEqual({
68
+ name: 'Hugging Face',
69
+ domain: 'huggingface.co',
70
+ website: 'https://huggingface.co',
71
+ linkedin_url: 'https://www.linkedin.com/company/huggingface',
72
+ employees: '201-500',
73
+ revenue: '10m-50m',
74
+ industry: 'Business Services',
75
+ country: 'France',
76
+ city: 'Paris',
77
+ founded_year: 2016,
78
+ categories: ['b2b', 'b2c', 'saas'],
79
+ })
80
+ // Dropped heavy fields.
81
+ expect(c.description).toBeUndefined()
82
+ expect(c.keywords).toBeUndefined()
83
+ expect(c.technologies).toBeUndefined()
84
+ expect(c.naics_codes).toBeUndefined()
85
+ expect(c.financial).toBeUndefined()
86
+ expect(c.id).toBeUndefined()
87
+ })
88
+
89
+ it('fields=verbose returns the raw provider passthrough unchanged', async () => {
90
+ globalThis.fetch = vi.fn(async () =>
91
+ new Response(JSON.stringify({ items: [{ name: 'Hugging Face', keywords: ['ai'], technologies: ['Stripe'] }], totalItems: 10000 }), { status: 200 })
92
+ ) as typeof fetch
93
+
94
+ const result = await searchCompaniesHandler({ keywords: ['SaaS'], limit: 5, fields: 'verbose' })
95
+
96
+ expect(result.isError).toBeUndefined()
97
+ const parsed = JSON.parse(result.content[0].text)
98
+ // Raw shape preserved: items[] with heavy fields, no companies[] normalization.
99
+ expect(parsed.data.items).toHaveLength(1)
100
+ expect(parsed.data.items[0].keywords).toEqual(['ai'])
101
+ expect(parsed.data.items[0].technologies).toEqual(['Stripe'])
102
+ expect(parsed.data.companies).toBeUndefined()
27
103
  })
28
104
 
29
105
  it('falls back to FullEnrich (priority 2) on CompanyEnrich failure', async () => {
@@ -0,0 +1,146 @@
1
+ import { describe, it, expect } from 'vitest'
2
+ import {
3
+ compactCompaniesPayload,
4
+ extractCompaniesArray,
5
+ normalizeCompany,
6
+ } from '../../src/utils/compact-companies.js'
7
+
8
+ describe('extractCompaniesArray', () => {
9
+ it('finds the array under each provider key', () => {
10
+ expect(extractCompaniesArray({ items: [{ name: 'A' }] })).toHaveLength(1) // CompanyEnrich
11
+ expect(extractCompaniesArray({ companies: [{ name: 'A' }] })).toHaveLength(1) // FullEnrich
12
+ expect(extractCompaniesArray({ organizations: [{ name: 'A' }, { name: 'B' }] })).toHaveLength(2) // Apollo
13
+ expect(extractCompaniesArray({ data: [{ name: 'A' }] })).toHaveLength(1) // generic
14
+ expect(extractCompaniesArray({ results: [{ name: 'A' }] })).toHaveLength(1)
15
+ })
16
+
17
+ it('handles a bare top-level array', () => {
18
+ expect(extractCompaniesArray([{ name: 'A' }])).toHaveLength(1)
19
+ })
20
+
21
+ it('finds AI-Ark content[] and LinkUp nested data.companies[]', () => {
22
+ // AI-Ark companies live under content[] (registry hasResult: isNonEmptyArray(d.content)).
23
+ expect(extractCompaniesArray({ content: [{ name: 'A' }, { name: 'B' }] })).toHaveLength(2)
24
+ // LinkUp (search/fundraising/hiring) nests under data.companies[]
25
+ // (registry hasResult: isNonEmptyArray(d.data.companies)).
26
+ expect(extractCompaniesArray({ data: { companies: [{ name: 'A' }] } })).toHaveLength(1)
27
+ expect(extractCompaniesArray({ data: { organizations: [{ name: 'A' }, { name: 'B' }] } })).toHaveLength(2)
28
+ })
29
+
30
+ it('prefers a direct array under data over descending into a nested object', () => {
31
+ // PDL/Prospeo/TheirStack shape: { data: [...] } must stay flat, not be skipped.
32
+ expect(extractCompaniesArray({ data: [{ name: 'A' }] })).toHaveLength(1)
33
+ })
34
+
35
+ it('returns [] for null / non-object / no recognized key', () => {
36
+ expect(extractCompaniesArray(null)).toEqual([])
37
+ expect(extractCompaniesArray('nope')).toEqual([])
38
+ expect(extractCompaniesArray({ totalItems: 10 })).toEqual([])
39
+ })
40
+ })
41
+
42
+ describe('normalizeCompany', () => {
43
+ it('maps the CompanyEnrich nested shape to compact fields', () => {
44
+ const c = normalizeCompany({
45
+ id: 'uuid-123',
46
+ name: 'Mistral AI',
47
+ domain: 'mistral.ai',
48
+ website: 'https://mistral.ai',
49
+ industry: 'Software',
50
+ categories: ['b2b', 'saas'],
51
+ employees: '201-500',
52
+ revenue: '1m-10m',
53
+ founded_year: 2023,
54
+ description: 'long blurb',
55
+ keywords: ['a', 'b'],
56
+ technologies: ['Gmail'],
57
+ socials: { linkedin_url: 'https://www.linkedin.com/company/mistralai' },
58
+ location: { country: { code: 'FR', name: 'France' }, city: { name: 'Paris' } },
59
+ })
60
+ expect(c).toEqual({
61
+ name: 'Mistral AI',
62
+ domain: 'mistral.ai',
63
+ website: 'https://mistral.ai',
64
+ linkedin_url: 'https://www.linkedin.com/company/mistralai',
65
+ employees: '201-500',
66
+ revenue: '1m-10m',
67
+ industry: 'Software',
68
+ country: 'France',
69
+ city: 'Paris',
70
+ founded_year: 2023,
71
+ categories: ['b2b', 'saas'],
72
+ })
73
+ })
74
+
75
+ it('maps the Apollo organization shape (numeric employees, flat fields)', () => {
76
+ const c = normalizeCompany({
77
+ name: 'Apple',
78
+ primary_domain: 'apple.com',
79
+ website_url: 'https://apple.com',
80
+ linkedin_url: 'https://www.linkedin.com/company/apple',
81
+ estimated_num_employees: 164000,
82
+ industry: 'consumer electronics',
83
+ founded_year: 1976,
84
+ })
85
+ expect(c).toMatchObject({
86
+ name: 'Apple',
87
+ domain: 'apple.com',
88
+ website: 'https://apple.com',
89
+ linkedin_url: 'https://www.linkedin.com/company/apple',
90
+ employees: 164000,
91
+ industry: 'consumer electronics',
92
+ founded_year: 1976,
93
+ })
94
+ })
95
+
96
+ it('maps BlitzAPI employees_on_linkedin into employees', () => {
97
+ const c = normalizeCompany({ name: 'Blitz Co', domain: 'blitz.co', employees_on_linkedin: 320 })
98
+ expect(c).toMatchObject({ name: 'Blitz Co', domain: 'blitz.co', employees: 320 })
99
+ })
100
+
101
+ it('rejects a non-LinkedIn url in the linkedin slot', () => {
102
+ const c = normalizeCompany({ name: 'X', socials: { linkedin_url: 'https://twitter.com/x' } })
103
+ expect(c?.linkedin_url).toBeUndefined()
104
+ expect(c?.name).toBe('X')
105
+ })
106
+
107
+ it('returns null for empty / non-object records', () => {
108
+ expect(normalizeCompany(null)).toBeNull()
109
+ expect(normalizeCompany({})).toBeNull()
110
+ expect(normalizeCompany([{ name: 'A' }])).toBeNull()
111
+ expect(normalizeCompany('x')).toBeNull()
112
+ })
113
+
114
+ it('omits fields that are not present', () => {
115
+ const c = normalizeCompany({ name: 'Solo' })
116
+ expect(c).toEqual({ name: 'Solo' })
117
+ })
118
+ })
119
+
120
+ describe('compactCompaniesPayload', () => {
121
+ it('normalizes items[] and surfaces totalItems', () => {
122
+ const out = compactCompaniesPayload({
123
+ items: [{ name: 'A', domain: 'a.com' }, { name: 'B', domain: 'b.com' }],
124
+ totalItems: 10000,
125
+ })
126
+ expect(out.companies).toHaveLength(2)
127
+ expect(out.total).toBe(10000)
128
+ })
129
+
130
+ it('reads Apollo pagination.total_entries as total', () => {
131
+ const out = compactCompaniesPayload({ organizations: [{ name: 'A' }], pagination: { total_entries: 8371 } })
132
+ expect(out.companies).toHaveLength(1)
133
+ expect(out.total).toBe(8371)
134
+ })
135
+
136
+ it('drops records that normalize to nothing but keeps real ones', () => {
137
+ const out = compactCompaniesPayload({ items: [{ name: 'A' }, {}, { foo: 'bar' }] })
138
+ expect(out.companies).toHaveLength(1)
139
+ expect(out.companies[0].name).toBe('A')
140
+ })
141
+
142
+ it('returns an empty companies array (no total) when there is nothing to extract', () => {
143
+ const out = compactCompaniesPayload({ unexpected: true })
144
+ expect(out).toEqual({ companies: [] })
145
+ })
146
+ })