@coldiq/mcp 0.3.8 → 0.3.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/registry.d.ts.map +1 -1
- package/dist/registry.js +58 -17
- package/dist/registry.js.map +1 -1
- package/dist/tools/search-companies.d.ts +2 -1
- package/dist/tools/search-companies.d.ts.map +1 -1
- package/dist/tools/search-companies.js +10 -3
- package/dist/tools/search-companies.js.map +1 -1
- package/dist/utils/compact-companies.d.ts +21 -0
- package/dist/utils/compact-companies.d.ts.map +1 -0
- package/dist/utils/compact-companies.js +198 -0
- package/dist/utils/compact-companies.js.map +1 -0
- package/package.json +1 -1
- package/src/registry.ts +58 -17
- package/src/tools/search-companies.ts +10 -3
- package/src/utils/compact-companies.ts +208 -0
- package/tests/live/companyenrich-keyword-tag-probe.ts +14 -3
- package/tests/registry-polling.test.ts +9 -3
- package/tests/registry-search-companies.test.ts +32 -5
- package/tests/registry.test.ts +33 -11
- package/tests/tools/search-companies.test.ts +78 -2
- package/tests/utils/compact-companies.test.ts +146 -0
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
// Normalize search_companies responses into a small, predictable shape.
|
|
2
|
+
//
|
|
3
|
+
// The search_companies waterfall fans across many providers, each with a
|
|
4
|
+
// different payload shape (CompanyEnrich nests at data.items[], Apollo at
|
|
5
|
+
// data.organizations[] + pagination, FullEnrich at data.companies[], others at
|
|
6
|
+
// data.data[]/results[]). The raw provider object per company is heavy
|
|
7
|
+
// (~3-4KB: full funding-round history, technologies, ~30-item keyword arrays,
|
|
8
|
+
// NAICS codes, every social URL, SEO descriptions, logos). Compact mode keeps
|
|
9
|
+
// only the fields agents act on downstream — company identity (name, domain,
|
|
10
|
+
// website, linkedin_url, used by find_people/find_emails) plus the firmographic
|
|
11
|
+
// signals a user filters on (employees, revenue, industry, country, city,
|
|
12
|
+
// founded_year, categories). It changes how many descriptive fields ride along,
|
|
13
|
+
// never which companies are returned. Verbose mode keeps the raw passthrough.
|
|
14
|
+
|
|
15
|
+
export interface CompactCompany {
|
|
16
|
+
name?: string
|
|
17
|
+
domain?: string
|
|
18
|
+
website?: string
|
|
19
|
+
linkedin_url?: string
|
|
20
|
+
employees?: number | string
|
|
21
|
+
revenue?: number | string
|
|
22
|
+
industry?: string
|
|
23
|
+
country?: string
|
|
24
|
+
city?: string
|
|
25
|
+
founded_year?: number | string
|
|
26
|
+
categories?: string[]
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function getPath(obj: unknown, path: string): unknown {
|
|
30
|
+
if (obj == null) return undefined
|
|
31
|
+
let cur: unknown = obj
|
|
32
|
+
for (const part of path.split('.')) {
|
|
33
|
+
if (cur == null) return undefined
|
|
34
|
+
if (Array.isArray(cur)) {
|
|
35
|
+
const idx = Number(part)
|
|
36
|
+
if (!Number.isInteger(idx)) return undefined
|
|
37
|
+
cur = cur[idx]
|
|
38
|
+
} else if (typeof cur === 'object') {
|
|
39
|
+
cur = (cur as Record<string, unknown>)[part]
|
|
40
|
+
} else {
|
|
41
|
+
return undefined
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
return cur
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function pick(obj: unknown, paths: string[]): unknown {
|
|
48
|
+
for (const p of paths) {
|
|
49
|
+
const v = getPath(obj, p)
|
|
50
|
+
if (v !== undefined && v !== null && v !== '') return v
|
|
51
|
+
}
|
|
52
|
+
return undefined
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function asString(v: unknown): string | undefined {
|
|
56
|
+
if (typeof v === 'string') return v.length > 0 ? v : undefined
|
|
57
|
+
if (typeof v === 'number') return String(v)
|
|
58
|
+
return undefined
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function asStringOrNumber(v: unknown): number | string | undefined {
|
|
62
|
+
if (typeof v === 'number') return v
|
|
63
|
+
if (typeof v === 'string') return v.length > 0 ? v : undefined
|
|
64
|
+
return undefined
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function isLinkedInUrl(v: unknown): v is string {
|
|
68
|
+
if (typeof v !== 'string' || v.length === 0) return false
|
|
69
|
+
return v.toLowerCase().includes('linkedin.com/')
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function pickLinkedIn(obj: unknown, paths: string[]): string | undefined {
|
|
73
|
+
for (const p of paths) {
|
|
74
|
+
const v = getPath(obj, p)
|
|
75
|
+
if (isLinkedInUrl(v)) return v
|
|
76
|
+
}
|
|
77
|
+
return undefined
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Probe the keys providers use to hold the companies array. Covers the flat
|
|
81
|
+
// shapes (CompanyEnrich `items`, FullEnrich/BlitzAPI `companies`, Apollo
|
|
82
|
+
// `organizations`, generic `data`/`results`, AI-Ark `content`) and the one-level
|
|
83
|
+
// nested shape LinkUp uses (`data.companies[]`). Keys are kept in sync with the
|
|
84
|
+
// `hasResult` checks in registry.ts so compact never returns [] for a provider
|
|
85
|
+
// that actually matched.
|
|
86
|
+
export function extractCompaniesArray(data: unknown): unknown[] {
|
|
87
|
+
if (data == null) return []
|
|
88
|
+
if (Array.isArray(data)) return data
|
|
89
|
+
if (typeof data !== 'object') return []
|
|
90
|
+
const d = data as Record<string, unknown>
|
|
91
|
+
for (const key of ['items', 'companies', 'organizations', 'data', 'results', 'content']) {
|
|
92
|
+
const v = d[key]
|
|
93
|
+
if (Array.isArray(v)) return v
|
|
94
|
+
}
|
|
95
|
+
// LinkUp (search / fundraising / hiring) wraps results under data.companies[].
|
|
96
|
+
const inner = d.data
|
|
97
|
+
if (inner && typeof inner === 'object' && !Array.isArray(inner)) {
|
|
98
|
+
const di = inner as Record<string, unknown>
|
|
99
|
+
for (const key of ['companies', 'organizations', 'results', 'items', 'content']) {
|
|
100
|
+
const v = di[key]
|
|
101
|
+
if (Array.isArray(v)) return v
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
return []
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
export function normalizeCompany(record: unknown): CompactCompany | null {
|
|
108
|
+
if (!record || typeof record !== 'object' || Array.isArray(record)) return null
|
|
109
|
+
const r = record as Record<string, unknown>
|
|
110
|
+
const out: CompactCompany = {}
|
|
111
|
+
|
|
112
|
+
const name = asString(pick(r, ['name', 'company_name', 'organization.name', 'companyName']))
|
|
113
|
+
if (name) out.name = name
|
|
114
|
+
|
|
115
|
+
const domain = asString(pick(r, ['domain', 'primary_domain', 'website_domain', 'organization.primary_domain']))
|
|
116
|
+
if (domain) out.domain = domain
|
|
117
|
+
|
|
118
|
+
const website = asString(pick(r, ['website', 'website_url', 'organization.website_url', 'url']))
|
|
119
|
+
if (website) out.website = website
|
|
120
|
+
|
|
121
|
+
// LinkedIn URL is strict-validated: a wrong value silently breaks downstream
|
|
122
|
+
// find_people LinkedIn-driven lookups.
|
|
123
|
+
const linkedin = pickLinkedIn(r, [
|
|
124
|
+
'socials.linkedin_url',
|
|
125
|
+
'linkedin_url',
|
|
126
|
+
'organization.linkedin_url',
|
|
127
|
+
'social_profiles.linkedin.url',
|
|
128
|
+
])
|
|
129
|
+
if (linkedin) out.linkedin_url = linkedin
|
|
130
|
+
|
|
131
|
+
const employees = asStringOrNumber(pick(r, [
|
|
132
|
+
'employees',
|
|
133
|
+
'employee_count',
|
|
134
|
+
'employees_on_linkedin', // BlitzAPI
|
|
135
|
+
'estimated_num_employees',
|
|
136
|
+
'num_employees',
|
|
137
|
+
'organization.estimated_num_employees',
|
|
138
|
+
'size',
|
|
139
|
+
'headcount',
|
|
140
|
+
]))
|
|
141
|
+
if (employees !== undefined) out.employees = employees
|
|
142
|
+
|
|
143
|
+
const revenue = asStringOrNumber(pick(r, [
|
|
144
|
+
'revenue',
|
|
145
|
+
'estimated_revenue',
|
|
146
|
+
'annual_revenue',
|
|
147
|
+
'organization.annual_revenue',
|
|
148
|
+
]))
|
|
149
|
+
if (revenue !== undefined) out.revenue = revenue
|
|
150
|
+
|
|
151
|
+
const industry = asString(pick(r, ['industry', 'organization.industry']))
|
|
152
|
+
if (industry) out.industry = industry
|
|
153
|
+
|
|
154
|
+
const country = asString(pick(r, [
|
|
155
|
+
'location.country.name',
|
|
156
|
+
'location.country.code',
|
|
157
|
+
'country',
|
|
158
|
+
'organization.country',
|
|
159
|
+
'location_country',
|
|
160
|
+
]))
|
|
161
|
+
if (country) out.country = country
|
|
162
|
+
|
|
163
|
+
const city = asString(pick(r, [
|
|
164
|
+
'location.city.name',
|
|
165
|
+
'city',
|
|
166
|
+
'organization.city',
|
|
167
|
+
'location_city',
|
|
168
|
+
]))
|
|
169
|
+
if (city) out.city = city
|
|
170
|
+
|
|
171
|
+
const foundedYear = asStringOrNumber(pick(r, ['founded_year', 'organization.founded_year', 'foundation_date']))
|
|
172
|
+
if (foundedYear !== undefined) out.founded_year = foundedYear
|
|
173
|
+
|
|
174
|
+
const categories = getPath(r, 'categories')
|
|
175
|
+
if (Array.isArray(categories)) {
|
|
176
|
+
const cats = categories.filter((c): c is string => typeof c === 'string' && c.length > 0)
|
|
177
|
+
if (cats.length > 0) out.categories = cats
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// Drop records that resolved to nothing meaningful.
|
|
181
|
+
if (Object.keys(out).length === 0) return null
|
|
182
|
+
return out
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
export interface CompactCompaniesPayload {
|
|
186
|
+
companies: CompactCompany[]
|
|
187
|
+
total?: number
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
export function compactCompaniesPayload(data: unknown): CompactCompaniesPayload {
|
|
191
|
+
const arr = extractCompaniesArray(data)
|
|
192
|
+
const companies: CompactCompany[] = []
|
|
193
|
+
for (const r of arr) {
|
|
194
|
+
const c = normalizeCompany(r)
|
|
195
|
+
if (c) companies.push(c)
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
const out: CompactCompaniesPayload = { companies }
|
|
199
|
+
|
|
200
|
+
// Preserve the total-addressable-market count when the provider exposes one
|
|
201
|
+
// (CompanyEnrich: data.totalItems; Apollo: data.pagination.total_entries).
|
|
202
|
+
if (data && typeof data === 'object' && !Array.isArray(data)) {
|
|
203
|
+
const total = pick(data, ['totalItems', 'total', 'pagination.total_entries', 'pagination.total'])
|
|
204
|
+
if (typeof total === 'number') out.total = total
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
return out
|
|
208
|
+
}
|
|
@@ -42,10 +42,21 @@ async function main() {
|
|
|
42
42
|
console.log('\n-- A: query="SaaS" (OLD behavior — name/domain full-text) --')
|
|
43
43
|
console.log(' ', JSON.stringify(names(await call('/companies/search', { ...COMMON, query: 'SaaS', pageSize: 6 })), null, 0))
|
|
44
44
|
|
|
45
|
-
console.log('\n-- B: keywords=["saas"]
|
|
46
|
-
console.log(' ', JSON.stringify(names(await call('/companies/search', { ...COMMON, keywords: ['saas'], keywordsOperator: '
|
|
45
|
+
console.log('\n-- B: keywords=["saas","b2b"] AND (text tags — concentrates AGENCIES) --')
|
|
46
|
+
console.log(' ', JSON.stringify(names(await call('/companies/search', { ...COMMON, keywords: ['saas', 'b2b'], keywordsOperator: 'And', pageSize: 6 })), null, 0))
|
|
47
47
|
|
|
48
|
-
|
|
48
|
+
// The `category` filter is a controlled vocabulary matched on a company's clean
|
|
49
|
+
// classification. These are the ONLY accepted values — anything else returns an
|
|
50
|
+
// empty set. This is the source of truth for COMPANYENRICH_CATEGORIES in registry.ts.
|
|
51
|
+
console.log('\n-- category vocabulary probe (valid value -> count; invalid -> empty) --')
|
|
52
|
+
for (const v of ['b2b', 'b2c', 'b2g', 'saas', 'service-provider', 'media', 'e-commerce', 'mobile', 'fintech', 'software', 'marketplace']) {
|
|
53
|
+
console.log(` ${v.padEnd(18)} ->`, (await call('/companies/search/count', { ...COMMON, category: [v] })).data)
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
console.log('\n-- C: category=["saas","b2b"] AND (controlled vocab — TRUE "B2B SaaS") --')
|
|
57
|
+
console.log(' ', JSON.stringify(names(await call('/companies/search', { ...COMMON, category: ['saas', 'b2b'], categoryOperator: 'And', pageSize: 10 })), null, 0))
|
|
58
|
+
|
|
59
|
+
console.log('\nExpected: A = companies NAMED "SaaS …"; B = agencies/service-providers; C = real B2B SaaS products.')
|
|
49
60
|
}
|
|
50
61
|
|
|
51
62
|
main().catch(console.error)
|
|
@@ -47,7 +47,7 @@ describe('MCP polling interval schedules', () => {
|
|
|
47
47
|
expect(30_000 / 1500).toBeGreaterThanOrEqual(20)
|
|
48
48
|
})
|
|
49
49
|
|
|
50
|
-
it('LeadsFactory find-people:
|
|
50
|
+
it('LeadsFactory find-people: fast front probes, then flat 12s ceiling', () => {
|
|
51
51
|
const providers = getProviders('find_people')
|
|
52
52
|
const lf = providers.find((p) => p.id === 'leadsfactory')!
|
|
53
53
|
expect(lf?.async).toBeDefined()
|
|
@@ -55,7 +55,13 @@ describe('MCP polling interval schedules', () => {
|
|
|
55
55
|
expect(fn(1)).toBe(2000) // faster first probe vs old 3000
|
|
56
56
|
expect(fn(2)).toBe(5000)
|
|
57
57
|
expect(fn(3)).toBe(12000)
|
|
58
|
-
//
|
|
59
|
-
|
|
58
|
+
// Flat ceiling: never grows past 12s, so completion-detection lag is bounded
|
|
59
|
+
// to ≤12s (the old +8s/attempt growth reached 44–72s gaps).
|
|
60
|
+
expect(fn(7)).toBe(12000)
|
|
61
|
+
expect(fn(20)).toBe(12000)
|
|
62
|
+
const schedule = evalPollSchedule(fn, 300_000)
|
|
63
|
+
expect(Math.max(...schedule)).toBe(12000) // never exceeds the ceiling
|
|
64
|
+
// Validates ≥7 polls still fit in 5 min
|
|
65
|
+
expect(schedule.length).toBeGreaterThanOrEqual(7)
|
|
60
66
|
})
|
|
61
67
|
})
|
|
@@ -189,18 +189,45 @@ describe('companyenrich search_companies', () => {
|
|
|
189
189
|
expect(p().hasResult({})).toBe(false)
|
|
190
190
|
})
|
|
191
191
|
|
|
192
|
-
it('mapParams
|
|
192
|
+
it('mapParams splits controlled categories (And) from free-text themes (Or); never uses `query`', () => {
|
|
193
193
|
// `query` is a name+domain full-text match — a theme like "SaaS" would surface
|
|
194
|
-
// companies merely NAMED "SaaS …"
|
|
195
|
-
//
|
|
194
|
+
// companies merely NAMED "SaaS …". "SaaS" is a controlled category, so it routes
|
|
195
|
+
// to the precise `category` filter; "fintech" has no category and stays free-text.
|
|
196
196
|
const body = p().mapParams({ keywords: ['SaaS'], industries: ['fintech'], countries: ['US'] }).body as Record<string, unknown>
|
|
197
197
|
expect(body.query).toBeUndefined()
|
|
198
|
-
expect(body.
|
|
198
|
+
expect(body.category).toEqual(['saas'])
|
|
199
|
+
expect(body.categoryOperator).toBe('And')
|
|
200
|
+
expect(body.keywords).toEqual(['fintech'])
|
|
199
201
|
expect(body.keywordsOperator).toBe('Or')
|
|
200
202
|
})
|
|
201
203
|
|
|
202
|
-
it('mapParams
|
|
204
|
+
it('mapParams routes "B2B SaaS" to category=[saas,b2b] with And (the user-intended conjunction)', () => {
|
|
205
|
+
const body = p().mapParams({ keywords: ['SaaS', 'B2B'], countries: ['FR'], max_employees: 500 }).body as Record<string, unknown>
|
|
206
|
+
expect(body.category).toEqual(['saas', 'b2b'])
|
|
207
|
+
expect(body.categoryOperator).toBe('And')
|
|
208
|
+
expect(body.keywords).toBeUndefined()
|
|
209
|
+
expect(body.keywordsOperator).toBeUndefined()
|
|
210
|
+
})
|
|
211
|
+
|
|
212
|
+
it('mapParams normalizes category casing/spacing/aliases (E-Commerce, ecommerce, "Service Provider")', () => {
|
|
213
|
+
const body = p().mapParams({ keywords: ['E-Commerce', 'ecommerce', 'Service Provider'] }).body as Record<string, unknown>
|
|
214
|
+
// ecommerce -> e-commerce canonical, and the duplicate is deduped
|
|
215
|
+
expect(body.category).toEqual(['e-commerce', 'service-provider'])
|
|
216
|
+
expect(body.keywords).toBeUndefined()
|
|
217
|
+
})
|
|
218
|
+
|
|
219
|
+
it('mapParams keeps purely free-text themes in the keyword tag filter (no category)', () => {
|
|
220
|
+
const body = p().mapParams({ keywords: ['fintech', 'cybersecurity'] }).body as Record<string, unknown>
|
|
221
|
+
expect(body.category).toBeUndefined()
|
|
222
|
+
expect(body.categoryOperator).toBeUndefined()
|
|
223
|
+
expect(body.keywords).toEqual(['fintech', 'cybersecurity'])
|
|
224
|
+
expect(body.keywordsOperator).toBe('Or')
|
|
225
|
+
})
|
|
226
|
+
|
|
227
|
+
it('mapParams omits both filters when no keywords/industries given', () => {
|
|
203
228
|
const body = p().mapParams({ countries: ['FR'], min_employees: 50 }).body as Record<string, unknown>
|
|
229
|
+
expect(body.category).toBeUndefined()
|
|
230
|
+
expect(body.categoryOperator).toBeUndefined()
|
|
204
231
|
expect(body.keywords).toBeUndefined()
|
|
205
232
|
expect(body.keywordsOperator).toBeUndefined()
|
|
206
233
|
expect(body.query).toBeUndefined()
|
package/tests/registry.test.ts
CHANGED
|
@@ -48,9 +48,10 @@ describe('registry', () => {
|
|
|
48
48
|
})
|
|
49
49
|
|
|
50
50
|
describe('search_companies mapParams', () => {
|
|
51
|
-
it('CompanyEnrich
|
|
51
|
+
it('CompanyEnrich splits controlled categories (And) from free-text themes (Or); never uses the name/domain `query` field', () => {
|
|
52
52
|
// `query` matches company NAME + domain, so a theme like "SaaS" returns
|
|
53
|
-
// companies merely NAMED "SaaS …".
|
|
53
|
+
// companies merely NAMED "SaaS …". "SaaS" is a controlled category → routes to
|
|
54
|
+
// the precise `category` filter; "Software" has no category → stays free-text.
|
|
54
55
|
const providers = getProviders('search_companies')
|
|
55
56
|
const ce = providers.find((p) => p.id === 'companyenrich')!
|
|
56
57
|
const result = ce.mapParams({
|
|
@@ -63,31 +64,48 @@ describe('registry', () => {
|
|
|
63
64
|
})
|
|
64
65
|
const body = result.body as Record<string, unknown>
|
|
65
66
|
expect(body.query).toBeUndefined()
|
|
66
|
-
expect(body.
|
|
67
|
+
expect(body.category).toEqual(['saas'])
|
|
68
|
+
expect(body.categoryOperator).toBe('And')
|
|
69
|
+
expect(body.keywords).toEqual(['Software'])
|
|
67
70
|
expect(body.keywordsOperator).toBe('Or')
|
|
68
71
|
expect(body.countries).toEqual(['US'])
|
|
69
72
|
expect(body.employees).toEqual([{ from: 10, to: 200 }])
|
|
70
73
|
expect(body.pageSize).toBe(25)
|
|
71
74
|
})
|
|
72
75
|
|
|
73
|
-
it('CompanyEnrich routes
|
|
76
|
+
it('CompanyEnrich routes "B2B SaaS" to category=[saas,b2b] with And (the user-intended conjunction)', () => {
|
|
77
|
+
const providers = getProviders('search_companies')
|
|
78
|
+
const ce = providers.find((p) => p.id === 'companyenrich')!
|
|
79
|
+
const body = ce.mapParams({ keywords: ['SaaS', 'B2B'], countries: ['FR'] }).body as Record<string, unknown>
|
|
80
|
+
expect(body.category).toEqual(['saas', 'b2b'])
|
|
81
|
+
expect(body.categoryOperator).toBe('And')
|
|
82
|
+
expect(body.keywords).toBeUndefined()
|
|
83
|
+
expect(body.keywordsOperator).toBeUndefined()
|
|
84
|
+
})
|
|
85
|
+
|
|
86
|
+
it('CompanyEnrich keeps a free-text theme in the keyword tag filter, with no category (themes, not brand-name lookup)', () => {
|
|
74
87
|
// The unified `keywords` param means topics/business-models/themes — brand-name
|
|
75
88
|
// lookups belong to enrich_company or the `names`/`domains` fields, not here.
|
|
89
|
+
// "fintech" is not a controlled category, so it stays in the keyword tag filter.
|
|
76
90
|
const providers = getProviders('search_companies')
|
|
77
91
|
const ce = providers.find((p) => p.id === 'companyenrich')!
|
|
78
92
|
const result = ce.mapParams({ keywords: ['fintech'], limit: 5 })
|
|
79
93
|
const body = result.body as Record<string, unknown>
|
|
80
94
|
expect(body.query).toBeUndefined()
|
|
95
|
+
expect(body.category).toBeUndefined()
|
|
96
|
+
expect(body.categoryOperator).toBeUndefined()
|
|
81
97
|
expect(body.keywords).toEqual(['fintech'])
|
|
82
98
|
expect(body.keywordsOperator).toBe('Or')
|
|
83
99
|
})
|
|
84
100
|
|
|
85
|
-
it('CompanyEnrich omits
|
|
101
|
+
it('CompanyEnrich omits both category and keyword filters when no keywords/industries provided', () => {
|
|
86
102
|
const providers = getProviders('search_companies')
|
|
87
103
|
const ce = providers.find((p) => p.id === 'companyenrich')!
|
|
88
104
|
const result = ce.mapParams({ countries: ['US'], limit: 5 })
|
|
89
105
|
const body = result.body as Record<string, unknown>
|
|
90
106
|
expect(body.query).toBeUndefined()
|
|
107
|
+
expect(body.category).toBeUndefined()
|
|
108
|
+
expect(body.categoryOperator).toBeUndefined()
|
|
91
109
|
expect(body.keywords).toBeUndefined()
|
|
92
110
|
expect(body.keywordsOperator).toBeUndefined()
|
|
93
111
|
})
|
|
@@ -533,20 +551,22 @@ describe('registry', () => {
|
|
|
533
551
|
expect((result.body as Record<string, unknown>).company_linkedin_urls).toBeUndefined()
|
|
534
552
|
})
|
|
535
553
|
|
|
536
|
-
it('LeadsFactory has async config with
|
|
554
|
+
it('LeadsFactory has async config with capped backoff schedule', () => {
|
|
537
555
|
const providers = getProviders('find_people')
|
|
538
556
|
const lf = providers.find((p) => p.id === 'leadsfactory')!
|
|
539
557
|
expect(lf.async).toBeDefined()
|
|
540
558
|
expect(lf.async!.timeoutMs).toBe(300_000)
|
|
541
|
-
// Function-form pollIntervalMs: fast first
|
|
559
|
+
// Function-form pollIntervalMs: fast first probes, then a flat 12s ceiling
|
|
560
|
+
// so completion-detection lag stays bounded (≤12s) instead of growing
|
|
561
|
+
// unbounded into 44–72s gaps.
|
|
542
562
|
const sched = lf.async!.pollIntervalMs
|
|
543
563
|
expect(typeof sched).toBe('function')
|
|
544
564
|
const fn = sched as (attempt: number) => number
|
|
545
565
|
expect(fn(1)).toBe(2000)
|
|
546
566
|
expect(fn(2)).toBe(5000)
|
|
547
567
|
expect(fn(3)).toBe(12000)
|
|
548
|
-
expect(fn(4)).toBe(
|
|
549
|
-
expect(fn(8)).toBe(
|
|
568
|
+
expect(fn(4)).toBe(12000)
|
|
569
|
+
expect(fn(8)).toBe(12000)
|
|
550
570
|
// At least 7 polls must fit inside the 5-minute timeout — guards against
|
|
551
571
|
// future tuning that would make the ramp so steep we only get 1–2 polls.
|
|
552
572
|
let cumulative = 0
|
|
@@ -1285,8 +1305,10 @@ describe('registry', () => {
|
|
|
1285
1305
|
})
|
|
1286
1306
|
expect(result.body).toEqual({
|
|
1287
1307
|
countries: ['US'],
|
|
1288
|
-
|
|
1289
|
-
|
|
1308
|
+
category: ['saas'],
|
|
1309
|
+
categoryOperator: 'And',
|
|
1310
|
+
keywords: undefined,
|
|
1311
|
+
keywordsOperator: undefined,
|
|
1290
1312
|
technologies: ['Salesforce', 'HubSpot'],
|
|
1291
1313
|
employees: undefined,
|
|
1292
1314
|
foundedYear: undefined,
|
|
@@ -13,7 +13,7 @@ describe('search_companies handler', () => {
|
|
|
13
13
|
globalThis.fetch = originalFetch
|
|
14
14
|
})
|
|
15
15
|
|
|
16
|
-
it('returns results from first successful provider', async () => {
|
|
16
|
+
it('returns results from first successful provider (compact by default)', async () => {
|
|
17
17
|
globalThis.fetch = vi.fn(async () =>
|
|
18
18
|
new Response(JSON.stringify({ data: [{ name: 'ColdIQ', domain: 'coldiq.com' }] }), { status: 200 })
|
|
19
19
|
) as typeof fetch
|
|
@@ -23,7 +23,83 @@ describe('search_companies handler', () => {
|
|
|
23
23
|
expect(result.isError).toBeUndefined()
|
|
24
24
|
const parsed = JSON.parse(result.content[0].text)
|
|
25
25
|
expect(parsed._meta.provider).toBe('companyenrich')
|
|
26
|
-
|
|
26
|
+
// Compact-by-default: the raw provider array is normalized into companies[].
|
|
27
|
+
expect(parsed.data.companies).toHaveLength(1)
|
|
28
|
+
expect(parsed.data.companies[0]).toMatchObject({ name: 'ColdIQ', domain: 'coldiq.com' })
|
|
29
|
+
})
|
|
30
|
+
|
|
31
|
+
it('compact (default) keeps identity + firmographic fields and drops heavy ones', async () => {
|
|
32
|
+
// Mirror the real CompanyEnrich shape: companies under data.items[], each a
|
|
33
|
+
// heavy object with funding/technologies/keywords/socials/seo.
|
|
34
|
+
globalThis.fetch = vi.fn(async () =>
|
|
35
|
+
new Response(JSON.stringify({
|
|
36
|
+
items: [{
|
|
37
|
+
id: '0192562e-7008-702b-97a2-0b2c489d339b',
|
|
38
|
+
name: 'Hugging Face',
|
|
39
|
+
domain: 'huggingface.co',
|
|
40
|
+
website: 'https://huggingface.co',
|
|
41
|
+
industry: 'Business Services',
|
|
42
|
+
categories: ['b2b', 'b2c', 'saas'],
|
|
43
|
+
employees: '201-500',
|
|
44
|
+
revenue: '10m-50m',
|
|
45
|
+
founded_year: 2016,
|
|
46
|
+
description: 'A very long company description that should be dropped in compact mode...',
|
|
47
|
+
keywords: Array.from({ length: 28 }, (_, i) => `kw${i}`),
|
|
48
|
+
technologies: ['Stripe', 'Nginx', 'Node-Js'],
|
|
49
|
+
naics_codes: ['511210', '518210'],
|
|
50
|
+
socials: { linkedin_url: 'https://www.linkedin.com/company/huggingface', twitter_url: 'https://twitter.com/huggingface' },
|
|
51
|
+
financial: { total_funding: 395200000, funding: [{ date: '2024-08-01', type: 'Venture Round' }] },
|
|
52
|
+
location: { country: { code: 'FR', name: 'France' }, city: { name: 'Paris' } },
|
|
53
|
+
}],
|
|
54
|
+
page: 1,
|
|
55
|
+
totalItems: 10000,
|
|
56
|
+
}), { status: 200 })
|
|
57
|
+
) as typeof fetch
|
|
58
|
+
|
|
59
|
+
const result = await searchCompaniesHandler({ keywords: ['SaaS'], countries: ['FR'], limit: 5 })
|
|
60
|
+
|
|
61
|
+
expect(result.isError).toBeUndefined()
|
|
62
|
+
const parsed = JSON.parse(result.content[0].text)
|
|
63
|
+
expect(parsed.data.total).toBe(10000)
|
|
64
|
+
expect(parsed.data.companies).toHaveLength(1)
|
|
65
|
+
const c = parsed.data.companies[0]
|
|
66
|
+
// Kept: identity + firmographics + downstream-critical linkedin_url.
|
|
67
|
+
expect(c).toEqual({
|
|
68
|
+
name: 'Hugging Face',
|
|
69
|
+
domain: 'huggingface.co',
|
|
70
|
+
website: 'https://huggingface.co',
|
|
71
|
+
linkedin_url: 'https://www.linkedin.com/company/huggingface',
|
|
72
|
+
employees: '201-500',
|
|
73
|
+
revenue: '10m-50m',
|
|
74
|
+
industry: 'Business Services',
|
|
75
|
+
country: 'France',
|
|
76
|
+
city: 'Paris',
|
|
77
|
+
founded_year: 2016,
|
|
78
|
+
categories: ['b2b', 'b2c', 'saas'],
|
|
79
|
+
})
|
|
80
|
+
// Dropped heavy fields.
|
|
81
|
+
expect(c.description).toBeUndefined()
|
|
82
|
+
expect(c.keywords).toBeUndefined()
|
|
83
|
+
expect(c.technologies).toBeUndefined()
|
|
84
|
+
expect(c.naics_codes).toBeUndefined()
|
|
85
|
+
expect(c.financial).toBeUndefined()
|
|
86
|
+
expect(c.id).toBeUndefined()
|
|
87
|
+
})
|
|
88
|
+
|
|
89
|
+
it('fields=verbose returns the raw provider passthrough unchanged', async () => {
|
|
90
|
+
globalThis.fetch = vi.fn(async () =>
|
|
91
|
+
new Response(JSON.stringify({ items: [{ name: 'Hugging Face', keywords: ['ai'], technologies: ['Stripe'] }], totalItems: 10000 }), { status: 200 })
|
|
92
|
+
) as typeof fetch
|
|
93
|
+
|
|
94
|
+
const result = await searchCompaniesHandler({ keywords: ['SaaS'], limit: 5, fields: 'verbose' })
|
|
95
|
+
|
|
96
|
+
expect(result.isError).toBeUndefined()
|
|
97
|
+
const parsed = JSON.parse(result.content[0].text)
|
|
98
|
+
// Raw shape preserved: items[] with heavy fields, no companies[] normalization.
|
|
99
|
+
expect(parsed.data.items).toHaveLength(1)
|
|
100
|
+
expect(parsed.data.items[0].keywords).toEqual(['ai'])
|
|
101
|
+
expect(parsed.data.items[0].technologies).toEqual(['Stripe'])
|
|
102
|
+
expect(parsed.data.companies).toBeUndefined()
|
|
27
103
|
})
|
|
28
104
|
|
|
29
105
|
it('falls back to FullEnrich (priority 2) on CompanyEnrich failure', async () => {
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import {
|
|
3
|
+
compactCompaniesPayload,
|
|
4
|
+
extractCompaniesArray,
|
|
5
|
+
normalizeCompany,
|
|
6
|
+
} from '../../src/utils/compact-companies.js'
|
|
7
|
+
|
|
8
|
+
describe('extractCompaniesArray', () => {
|
|
9
|
+
it('finds the array under each provider key', () => {
|
|
10
|
+
expect(extractCompaniesArray({ items: [{ name: 'A' }] })).toHaveLength(1) // CompanyEnrich
|
|
11
|
+
expect(extractCompaniesArray({ companies: [{ name: 'A' }] })).toHaveLength(1) // FullEnrich
|
|
12
|
+
expect(extractCompaniesArray({ organizations: [{ name: 'A' }, { name: 'B' }] })).toHaveLength(2) // Apollo
|
|
13
|
+
expect(extractCompaniesArray({ data: [{ name: 'A' }] })).toHaveLength(1) // generic
|
|
14
|
+
expect(extractCompaniesArray({ results: [{ name: 'A' }] })).toHaveLength(1)
|
|
15
|
+
})
|
|
16
|
+
|
|
17
|
+
it('handles a bare top-level array', () => {
|
|
18
|
+
expect(extractCompaniesArray([{ name: 'A' }])).toHaveLength(1)
|
|
19
|
+
})
|
|
20
|
+
|
|
21
|
+
it('finds AI-Ark content[] and LinkUp nested data.companies[]', () => {
|
|
22
|
+
// AI-Ark companies live under content[] (registry hasResult: isNonEmptyArray(d.content)).
|
|
23
|
+
expect(extractCompaniesArray({ content: [{ name: 'A' }, { name: 'B' }] })).toHaveLength(2)
|
|
24
|
+
// LinkUp (search/fundraising/hiring) nests under data.companies[]
|
|
25
|
+
// (registry hasResult: isNonEmptyArray(d.data.companies)).
|
|
26
|
+
expect(extractCompaniesArray({ data: { companies: [{ name: 'A' }] } })).toHaveLength(1)
|
|
27
|
+
expect(extractCompaniesArray({ data: { organizations: [{ name: 'A' }, { name: 'B' }] } })).toHaveLength(2)
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
it('prefers a direct array under data over descending into a nested object', () => {
|
|
31
|
+
// PDL/Prospeo/TheirStack shape: { data: [...] } must stay flat, not be skipped.
|
|
32
|
+
expect(extractCompaniesArray({ data: [{ name: 'A' }] })).toHaveLength(1)
|
|
33
|
+
})
|
|
34
|
+
|
|
35
|
+
it('returns [] for null / non-object / no recognized key', () => {
|
|
36
|
+
expect(extractCompaniesArray(null)).toEqual([])
|
|
37
|
+
expect(extractCompaniesArray('nope')).toEqual([])
|
|
38
|
+
expect(extractCompaniesArray({ totalItems: 10 })).toEqual([])
|
|
39
|
+
})
|
|
40
|
+
})
|
|
41
|
+
|
|
42
|
+
describe('normalizeCompany', () => {
|
|
43
|
+
it('maps the CompanyEnrich nested shape to compact fields', () => {
|
|
44
|
+
const c = normalizeCompany({
|
|
45
|
+
id: 'uuid-123',
|
|
46
|
+
name: 'Mistral AI',
|
|
47
|
+
domain: 'mistral.ai',
|
|
48
|
+
website: 'https://mistral.ai',
|
|
49
|
+
industry: 'Software',
|
|
50
|
+
categories: ['b2b', 'saas'],
|
|
51
|
+
employees: '201-500',
|
|
52
|
+
revenue: '1m-10m',
|
|
53
|
+
founded_year: 2023,
|
|
54
|
+
description: 'long blurb',
|
|
55
|
+
keywords: ['a', 'b'],
|
|
56
|
+
technologies: ['Gmail'],
|
|
57
|
+
socials: { linkedin_url: 'https://www.linkedin.com/company/mistralai' },
|
|
58
|
+
location: { country: { code: 'FR', name: 'France' }, city: { name: 'Paris' } },
|
|
59
|
+
})
|
|
60
|
+
expect(c).toEqual({
|
|
61
|
+
name: 'Mistral AI',
|
|
62
|
+
domain: 'mistral.ai',
|
|
63
|
+
website: 'https://mistral.ai',
|
|
64
|
+
linkedin_url: 'https://www.linkedin.com/company/mistralai',
|
|
65
|
+
employees: '201-500',
|
|
66
|
+
revenue: '1m-10m',
|
|
67
|
+
industry: 'Software',
|
|
68
|
+
country: 'France',
|
|
69
|
+
city: 'Paris',
|
|
70
|
+
founded_year: 2023,
|
|
71
|
+
categories: ['b2b', 'saas'],
|
|
72
|
+
})
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
it('maps the Apollo organization shape (numeric employees, flat fields)', () => {
|
|
76
|
+
const c = normalizeCompany({
|
|
77
|
+
name: 'Apple',
|
|
78
|
+
primary_domain: 'apple.com',
|
|
79
|
+
website_url: 'https://apple.com',
|
|
80
|
+
linkedin_url: 'https://www.linkedin.com/company/apple',
|
|
81
|
+
estimated_num_employees: 164000,
|
|
82
|
+
industry: 'consumer electronics',
|
|
83
|
+
founded_year: 1976,
|
|
84
|
+
})
|
|
85
|
+
expect(c).toMatchObject({
|
|
86
|
+
name: 'Apple',
|
|
87
|
+
domain: 'apple.com',
|
|
88
|
+
website: 'https://apple.com',
|
|
89
|
+
linkedin_url: 'https://www.linkedin.com/company/apple',
|
|
90
|
+
employees: 164000,
|
|
91
|
+
industry: 'consumer electronics',
|
|
92
|
+
founded_year: 1976,
|
|
93
|
+
})
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
it('maps BlitzAPI employees_on_linkedin into employees', () => {
|
|
97
|
+
const c = normalizeCompany({ name: 'Blitz Co', domain: 'blitz.co', employees_on_linkedin: 320 })
|
|
98
|
+
expect(c).toMatchObject({ name: 'Blitz Co', domain: 'blitz.co', employees: 320 })
|
|
99
|
+
})
|
|
100
|
+
|
|
101
|
+
it('rejects a non-LinkedIn url in the linkedin slot', () => {
|
|
102
|
+
const c = normalizeCompany({ name: 'X', socials: { linkedin_url: 'https://twitter.com/x' } })
|
|
103
|
+
expect(c?.linkedin_url).toBeUndefined()
|
|
104
|
+
expect(c?.name).toBe('X')
|
|
105
|
+
})
|
|
106
|
+
|
|
107
|
+
it('returns null for empty / non-object records', () => {
|
|
108
|
+
expect(normalizeCompany(null)).toBeNull()
|
|
109
|
+
expect(normalizeCompany({})).toBeNull()
|
|
110
|
+
expect(normalizeCompany([{ name: 'A' }])).toBeNull()
|
|
111
|
+
expect(normalizeCompany('x')).toBeNull()
|
|
112
|
+
})
|
|
113
|
+
|
|
114
|
+
it('omits fields that are not present', () => {
|
|
115
|
+
const c = normalizeCompany({ name: 'Solo' })
|
|
116
|
+
expect(c).toEqual({ name: 'Solo' })
|
|
117
|
+
})
|
|
118
|
+
})
|
|
119
|
+
|
|
120
|
+
describe('compactCompaniesPayload', () => {
|
|
121
|
+
it('normalizes items[] and surfaces totalItems', () => {
|
|
122
|
+
const out = compactCompaniesPayload({
|
|
123
|
+
items: [{ name: 'A', domain: 'a.com' }, { name: 'B', domain: 'b.com' }],
|
|
124
|
+
totalItems: 10000,
|
|
125
|
+
})
|
|
126
|
+
expect(out.companies).toHaveLength(2)
|
|
127
|
+
expect(out.total).toBe(10000)
|
|
128
|
+
})
|
|
129
|
+
|
|
130
|
+
it('reads Apollo pagination.total_entries as total', () => {
|
|
131
|
+
const out = compactCompaniesPayload({ organizations: [{ name: 'A' }], pagination: { total_entries: 8371 } })
|
|
132
|
+
expect(out.companies).toHaveLength(1)
|
|
133
|
+
expect(out.total).toBe(8371)
|
|
134
|
+
})
|
|
135
|
+
|
|
136
|
+
it('drops records that normalize to nothing but keeps real ones', () => {
|
|
137
|
+
const out = compactCompaniesPayload({ items: [{ name: 'A' }, {}, { foo: 'bar' }] })
|
|
138
|
+
expect(out.companies).toHaveLength(1)
|
|
139
|
+
expect(out.companies[0].name).toBe('A')
|
|
140
|
+
})
|
|
141
|
+
|
|
142
|
+
it('returns an empty companies array (no total) when there is nothing to extract', () => {
|
|
143
|
+
const out = compactCompaniesPayload({ unexpected: true })
|
|
144
|
+
expect(out).toEqual({ companies: [] })
|
|
145
|
+
})
|
|
146
|
+
})
|