@coldiq/mcp 0.2.8 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +4 -0
- package/dist/index.js.map +1 -1
- package/dist/registry.d.ts +1 -1
- package/dist/registry.d.ts.map +1 -1
- package/dist/registry.js +99 -15
- package/dist/registry.js.map +1 -1
- package/dist/tools/extract-post-engagement.d.ts +21 -0
- package/dist/tools/extract-post-engagement.d.ts.map +1 -0
- package/dist/tools/extract-post-engagement.js +117 -0
- package/dist/tools/extract-post-engagement.js.map +1 -0
- package/dist/tools/find-influencers.d.ts +1 -1
- package/dist/tools/find-influencers.d.ts.map +1 -1
- package/dist/tools/find-influencers.js +2 -1
- package/dist/tools/find-influencers.js.map +1 -1
- package/dist/tools/find-signals.d.ts.map +1 -1
- package/dist/tools/find-signals.js +27 -10
- package/dist/tools/find-signals.js.map +1 -1
- package/dist/tools/get-place-reviews.d.ts +24 -0
- package/dist/tools/get-place-reviews.d.ts.map +1 -0
- package/dist/tools/get-place-reviews.js +46 -0
- package/dist/tools/get-place-reviews.js.map +1 -0
- package/dist/tools/search-ads.d.ts +1 -1
- package/dist/tools/search-ads.d.ts.map +1 -1
- package/dist/tools/search-ads.js +1 -1
- package/dist/tools/search-ads.js.map +1 -1
- package/dist/tools/search-places.d.ts +1 -1
- package/dist/tools/search-places.d.ts.map +1 -1
- package/dist/tools/search-places.js +23 -3
- package/dist/tools/search-places.js.map +1 -1
- package/dist/tools/search-reddit.js +1 -1
- package/dist/tools/search-reddit.js.map +1 -1
- package/package.json +1 -1
- package/src/index.ts +16 -0
- package/src/registry.ts +90 -4
- package/src/tools/extract-post-engagement.ts +135 -0
- package/src/tools/find-influencers.ts +2 -1
- package/src/tools/find-signals.ts +28 -11
- package/src/tools/get-place-reviews.ts +50 -0
- package/src/tools/search-ads.ts +1 -1
- package/src/tools/search-places.ts +22 -3
- package/src/tools/search-reddit.ts +1 -1
- package/tests/registry-find-signals.test.ts +66 -0
- package/tests/tools/extract-post-engagement.test.ts +76 -0
- package/tests/tools/find-signals.test.ts +5 -2
- package/tests/tools/get-place-reviews.test.ts +73 -0
- package/tests/tools/search-reddit.test.ts +69 -0
package/src/registry.ts
CHANGED
|
@@ -54,6 +54,7 @@ export type Capability =
|
|
|
54
54
|
| 'search_jobs'
|
|
55
55
|
| 'search_ads'
|
|
56
56
|
| 'search_places'
|
|
57
|
+
| 'get_place_reviews'
|
|
57
58
|
| 'find_influencers'
|
|
58
59
|
| 'search_reddit'
|
|
59
60
|
| 'search_seo'
|
|
@@ -2468,6 +2469,31 @@ const searchPlacesProviders: ProviderEntry[] = [
|
|
|
2468
2469
|
},
|
|
2469
2470
|
]
|
|
2470
2471
|
|
|
2472
|
+
// ---------------------------------------------------------------------------
|
|
2473
|
+
// get_place_reviews
|
|
2474
|
+
// ---------------------------------------------------------------------------
|
|
2475
|
+
|
|
2476
|
+
const getPlaceReviewsProviders: ProviderEntry[] = [
|
|
2477
|
+
{
|
|
2478
|
+
id: 'google_maps_reviews',
|
|
2479
|
+
endpoint: '/google-maps/reviews',
|
|
2480
|
+
method: 'POST',
|
|
2481
|
+
priority: 1,
|
|
2482
|
+
mapParams: (input) => ({
|
|
2483
|
+
body: {
|
|
2484
|
+
startUrls: (input.place_urls as string[]).map((url) => ({ url })),
|
|
2485
|
+
maxReviews: input.max_reviews,
|
|
2486
|
+
reviewsSort: input.sort,
|
|
2487
|
+
language: input.language,
|
|
2488
|
+
},
|
|
2489
|
+
}),
|
|
2490
|
+
// A completed job is a valid result even with an empty reviews array (a place
|
|
2491
|
+
// may genuinely have no reviews) — only failed/timed_out should fall through.
|
|
2492
|
+
hasResult: (data) => (data as { status?: string }).status === 'done',
|
|
2493
|
+
async: { ..._placesSharedAsync, pollEndpoint: (id) => `/google-maps/reviews/${id}` },
|
|
2494
|
+
},
|
|
2495
|
+
]
|
|
2496
|
+
|
|
2471
2497
|
// ---------------------------------------------------------------------------
|
|
2472
2498
|
// find_influencers
|
|
2473
2499
|
// ---------------------------------------------------------------------------
|
|
@@ -2543,6 +2569,22 @@ const _redditSharedAsync = {
|
|
|
2543
2569
|
},
|
|
2544
2570
|
}
|
|
2545
2571
|
|
|
2572
|
+
// A bare subreddit URL (e.g. https://www.reddit.com/r/sales or .../r/sales/)
|
|
2573
|
+
// makes the Apify actor ENUMERATE that subreddit's feed and ignore the search
|
|
2574
|
+
// keyword entirely. When the caller also passes a `query`, rewrite bare
|
|
2575
|
+
// subreddit URLs into in-subreddit search URLs so the keyword is actually
|
|
2576
|
+
// applied. Already-formed search/post URLs are left untouched.
|
|
2577
|
+
const _BARE_SUBREDDIT_RE = /^(https?:\/\/(?:www\.)?reddit\.com\/r\/[A-Za-z0-9_]+)\/?$/i
|
|
2578
|
+
|
|
2579
|
+
function _toRedditSearchUrl(url: string, query: string, opts: { sort?: unknown; time?: unknown }): string {
|
|
2580
|
+
const m = url.match(_BARE_SUBREDDIT_RE)
|
|
2581
|
+
if (!m) return url
|
|
2582
|
+
const params = new URLSearchParams({ q: query, restrict_sr: '1' })
|
|
2583
|
+
if (typeof opts.sort === 'string' && opts.sort) params.set('sort', opts.sort)
|
|
2584
|
+
if (typeof opts.time === 'string' && opts.time) params.set('t', opts.time)
|
|
2585
|
+
return `${m[1]}/search/?${params.toString()}`
|
|
2586
|
+
}
|
|
2587
|
+
|
|
2546
2588
|
const searchRedditProviders: ProviderEntry[] = [
|
|
2547
2589
|
{
|
|
2548
2590
|
id: 'reddit',
|
|
@@ -2550,10 +2592,26 @@ const searchRedditProviders: ProviderEntry[] = [
|
|
|
2550
2592
|
method: 'POST',
|
|
2551
2593
|
priority: 1,
|
|
2552
2594
|
isApplicable: (input) => isNonEmptyArray(input.start_urls) || typeof input.query === 'string',
|
|
2553
|
-
mapParams: (input) =>
|
|
2595
|
+
mapParams: (input) => {
|
|
2596
|
+
const query = typeof input.query === 'string' && input.query ? input.query : undefined
|
|
2597
|
+
const rawStartUrls = input.start_urls as string[] | undefined
|
|
2598
|
+
let startUrls = rawStartUrls?.map((url) => ({ url }))
|
|
2599
|
+
let searchQueries = query ? [query] : undefined
|
|
2600
|
+
// If a query is provided alongside start_urls, embed it into any bare
|
|
2601
|
+
// subreddit URLs (which would otherwise ignore it). The keyword then lives
|
|
2602
|
+
// in the URL, so drop the top-level searchQueries to avoid a conflicting
|
|
2603
|
+
// global search.
|
|
2604
|
+
if (query && rawStartUrls && rawStartUrls.length > 0) {
|
|
2605
|
+
const rewritten = rawStartUrls.map((u) => _toRedditSearchUrl(u, query, { sort: input.sort, time: input.time }))
|
|
2606
|
+
if (rewritten.some((u, i) => u !== rawStartUrls[i])) {
|
|
2607
|
+
startUrls = rewritten.map((url) => ({ url }))
|
|
2608
|
+
searchQueries = undefined
|
|
2609
|
+
}
|
|
2610
|
+
}
|
|
2611
|
+
return {
|
|
2554
2612
|
body: {
|
|
2555
|
-
searchQueries
|
|
2556
|
-
startUrls
|
|
2613
|
+
searchQueries,
|
|
2614
|
+
startUrls,
|
|
2557
2615
|
searchType: input.search_type ?? 'posts',
|
|
2558
2616
|
searchCommunityName: input.search_community_name,
|
|
2559
2617
|
sort: input.sort,
|
|
@@ -2564,7 +2622,7 @@ const searchRedditProviders: ProviderEntry[] = [
|
|
|
2564
2622
|
postDateLimit: input.post_date_limit,
|
|
2565
2623
|
commentDateLimit: input.comment_date_limit,
|
|
2566
2624
|
},
|
|
2567
|
-
}
|
|
2625
|
+
}},
|
|
2568
2626
|
hasResult: (data) => isNonEmptyArray((data as { items?: unknown[] }).items),
|
|
2569
2627
|
async: {
|
|
2570
2628
|
..._redditSharedAsync,
|
|
@@ -3164,6 +3222,33 @@ const findSignalsProviders: ProviderEntry[] = [
|
|
|
3164
3222
|
hasResult: (data) => isNonEmptyArray((data as Record<string, unknown>).data),
|
|
3165
3223
|
},
|
|
3166
3224
|
{
|
|
3225
|
+
// Topic-based DISCOVERY: "which companies show intent on topic X" with no
|
|
3226
|
+
// company list known in advance. Routes to /theirstack/companies/search,
|
|
3227
|
+
// which returns a list of companies filtered by buying-intent keyword slugs.
|
|
3228
|
+
// This is the GTM-primary use case (find prospects by intent), distinct from
|
|
3229
|
+
// theirstack-buying-intents which verifies intent on companies you already have.
|
|
3230
|
+
id: 'theirstack-intent-discovery',
|
|
3231
|
+
endpoint: '/theirstack/companies/search',
|
|
3232
|
+
method: 'POST',
|
|
3233
|
+
priority: 5,
|
|
3234
|
+
isApplicable: (input) =>
|
|
3235
|
+
input.signal_type === 'intent' &&
|
|
3236
|
+
isNonEmptyArray(input.topics) &&
|
|
3237
|
+
!isNonEmptyArray(input.companies) &&
|
|
3238
|
+
!isNonEmptyArray(input.domains),
|
|
3239
|
+
mapParams: (input) => ({
|
|
3240
|
+
body: {
|
|
3241
|
+
company_keyword_slug_or: input.topics,
|
|
3242
|
+
...(isNonEmptyArray(input.industries) && { industry_or: input.industries }),
|
|
3243
|
+
...(isNonEmptyArray(input.countries) && { company_country_code_or: input.countries }),
|
|
3244
|
+
limit: Math.min((input.limit as number | undefined) ?? 25, 100),
|
|
3245
|
+
include_total_results: true,
|
|
3246
|
+
},
|
|
3247
|
+
}),
|
|
3248
|
+
hasResult: (data) => isNonEmptyArray((data as Record<string, unknown>).data),
|
|
3249
|
+
},
|
|
3250
|
+
{
|
|
3251
|
+
// Verify intent on KNOWN companies/domains.
|
|
3167
3252
|
id: 'theirstack-buying-intents',
|
|
3168
3253
|
endpoint: '/theirstack/companies/buying_intents',
|
|
3169
3254
|
method: 'POST',
|
|
@@ -3262,6 +3347,7 @@ const registry: Record<Capability, ProviderEntry[]> = {
|
|
|
3262
3347
|
search_jobs: searchJobsProviders,
|
|
3263
3348
|
search_ads: searchAdsProviders,
|
|
3264
3349
|
search_places: searchPlacesProviders,
|
|
3350
|
+
get_place_reviews: getPlaceReviewsProviders,
|
|
3265
3351
|
find_influencers: findInfluencersProviders,
|
|
3266
3352
|
search_reddit: searchRedditProviders,
|
|
3267
3353
|
search_seo: searchSeoProviders,
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import { z } from 'zod'
|
|
2
|
+
import { callApi } from '../client.js'
|
|
3
|
+
|
|
4
|
+
export const extractPostEngagementName = 'extract_post_engagement'
|
|
5
|
+
|
|
6
|
+
export const extractPostEngagementDescription =
|
|
7
|
+
'Extract the people who engaged with a LinkedIn post — commenters and/or reactors — as a deduplicated list of contacts (name, profile URL, headline). ' +
|
|
8
|
+
'Use this for social-signal prospecting: pull everyone who engaged with a viral post, then chain the results into enrich_person / find_email to get roles and work emails. ' +
|
|
9
|
+
'Runs an async extraction job (typically ~30–120s) and returns once the people are ready. Costs 10 credits per post.'
|
|
10
|
+
|
|
11
|
+
export const extractPostEngagementSchema = {
|
|
12
|
+
post_url: z
|
|
13
|
+
.string()
|
|
14
|
+
.url()
|
|
15
|
+
.describe('LinkedIn post URL to extract engagement from (e.g. "https://www.linkedin.com/feed/update/urn:li:activity:7234567890123456789" or a /posts/ permalink).'),
|
|
16
|
+
type: z
|
|
17
|
+
.enum(['comments', 'reactions', 'both'])
|
|
18
|
+
.default('both')
|
|
19
|
+
.describe('Which engagement to extract: "comments" (people who commented), "reactions" (people who reacted), or "both" (default — deduplicated across both).'),
|
|
20
|
+
include_replies: z
|
|
21
|
+
.boolean()
|
|
22
|
+
.optional()
|
|
23
|
+
.describe('When extracting comments, also include people who replied to comments. Defaults to true upstream.'),
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function sleep(ms: number): Promise<void> {
|
|
27
|
+
return new Promise((resolve) => setTimeout(resolve, ms))
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function errorResult(error: string, extra?: Record<string, unknown>) {
|
|
31
|
+
return {
|
|
32
|
+
content: [{ type: 'text' as const, text: JSON.stringify({ error, ...extra }) }],
|
|
33
|
+
isError: true,
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function extractErrorMessage(data: unknown): string | undefined {
|
|
38
|
+
if (data && typeof data === 'object' && 'error' in data) {
|
|
39
|
+
const e = (data as Record<string, unknown>).error
|
|
40
|
+
return typeof e === 'string' ? e : JSON.stringify(e)
|
|
41
|
+
}
|
|
42
|
+
return undefined
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export async function extractPostEngagementHandler(input: Record<string, unknown>) {
|
|
46
|
+
const postUrl = input.post_url as string
|
|
47
|
+
const type = (input.type as 'comments' | 'reactions' | 'both' | undefined) ?? 'both'
|
|
48
|
+
const includeReplies = input.include_replies as boolean | undefined
|
|
49
|
+
|
|
50
|
+
const dataTypes =
|
|
51
|
+
type === 'comments' ? ['comment'] : type === 'reactions' ? ['reaction'] : ['comment', 'reaction']
|
|
52
|
+
|
|
53
|
+
// Step 1 — create the extraction task. Billing (10 credits) happens here.
|
|
54
|
+
const createRes = await callApi('POST', '/jungler/workbooks', {
|
|
55
|
+
post_url: postUrl,
|
|
56
|
+
data_types: dataTypes,
|
|
57
|
+
})
|
|
58
|
+
if (!createRes.ok) {
|
|
59
|
+
return errorResult(extractErrorMessage(createRes.data) ?? `Failed to start extraction (status ${createRes.status})`)
|
|
60
|
+
}
|
|
61
|
+
const createData = createRes.data as { task_id?: string }
|
|
62
|
+
const taskId = createData.task_id
|
|
63
|
+
if (!taskId) {
|
|
64
|
+
return errorResult('Extraction job did not return a task id')
|
|
65
|
+
}
|
|
66
|
+
// Credit headers are emitted on the create call (the only billed step).
|
|
67
|
+
const creditsCharged = Number(createRes.headers['x-coldiq-credits-charged'])
|
|
68
|
+
const creditsRemaining = Number(createRes.headers['x-coldiq-credits-remaining'])
|
|
69
|
+
|
|
70
|
+
// Step 2 — poll task status until it resolves. The status endpoint is free, so
|
|
71
|
+
// polling does not bill; only the create call above charged credits.
|
|
72
|
+
const pollIntervalMs = parseInt(process.env.COLDIQ_ENGAGEMENT_POLL_MS ?? '2000', 10)
|
|
73
|
+
const timeoutMs = parseInt(process.env.COLDIQ_ENGAGEMENT_TIMEOUT_MS ?? '180000', 10)
|
|
74
|
+
const maxPollErrors = 3
|
|
75
|
+
const deadline = Date.now() + timeoutMs
|
|
76
|
+
|
|
77
|
+
let workbookId: string | undefined
|
|
78
|
+
let consecutivePollErrors = 0
|
|
79
|
+
|
|
80
|
+
while (Date.now() < deadline) {
|
|
81
|
+
await sleep(pollIntervalMs)
|
|
82
|
+
const statusRes = await callApi('GET', `/jungler/tasks/${taskId}/status`)
|
|
83
|
+
if (!statusRes.ok) {
|
|
84
|
+
consecutivePollErrors++
|
|
85
|
+
if (consecutivePollErrors >= maxPollErrors) {
|
|
86
|
+
return errorResult('Could not read extraction status — please retry', { post_url: postUrl })
|
|
87
|
+
}
|
|
88
|
+
continue
|
|
89
|
+
}
|
|
90
|
+
consecutivePollErrors = 0
|
|
91
|
+
const status = (statusRes.data as { status?: string; workbook_id?: string }).status
|
|
92
|
+
if (status === 'success') {
|
|
93
|
+
workbookId = (statusRes.data as { workbook_id?: string }).workbook_id
|
|
94
|
+
break
|
|
95
|
+
}
|
|
96
|
+
if (status === 'failure') {
|
|
97
|
+
return errorResult('Engagement extraction failed upstream — the post may be private, deleted, or have no engagement', { post_url: postUrl })
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
if (!workbookId) {
|
|
102
|
+
return errorResult(`Engagement extraction did not complete within ${Math.round(timeoutMs / 1000)}s — try again shortly`, { post_url: postUrl })
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Step 3 — fetch the deduplicated people. activity_filter narrows to commenters
|
|
106
|
+
// or reactors; omitted for "both" so the upstream returns all unique contacts.
|
|
107
|
+
const queryParams: Record<string, string> = {}
|
|
108
|
+
if (type === 'comments') queryParams.activity_filter = 'commenters'
|
|
109
|
+
else if (type === 'reactions') queryParams.activity_filter = 'reactors'
|
|
110
|
+
if (includeReplies !== undefined) queryParams.include_replies = String(includeReplies)
|
|
111
|
+
|
|
112
|
+
const contactsRes = await callApi(
|
|
113
|
+
'GET',
|
|
114
|
+
`/jungler/workbooks/${workbookId}/contacts`,
|
|
115
|
+
undefined,
|
|
116
|
+
Object.keys(queryParams).length > 0 ? queryParams : undefined,
|
|
117
|
+
)
|
|
118
|
+
if (!contactsRes.ok) {
|
|
119
|
+
return errorResult(extractErrorMessage(contactsRes.data) ?? 'Failed to fetch extracted people', { post_url: postUrl })
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const meta: Record<string, unknown> = {}
|
|
123
|
+
if (Number.isFinite(creditsCharged)) meta.credits_charged = creditsCharged
|
|
124
|
+
if (Number.isFinite(creditsRemaining)) meta.credits_remaining = creditsRemaining
|
|
125
|
+
|
|
126
|
+
return {
|
|
127
|
+
content: [{
|
|
128
|
+
type: 'text' as const,
|
|
129
|
+
text: JSON.stringify({
|
|
130
|
+
data: { post_url: postUrl, type, people: contactsRes.data },
|
|
131
|
+
_meta: meta,
|
|
132
|
+
}),
|
|
133
|
+
}],
|
|
134
|
+
}
|
|
135
|
+
}
|
|
@@ -5,7 +5,8 @@ import { resolvePreferredProviders, getProvidersForCapability } from '../utils/p
|
|
|
5
5
|
export const findInfluencersName = 'find_influencers'
|
|
6
6
|
|
|
7
7
|
export const findInfluencersDescription =
|
|
8
|
-
'Discover and find influencers/creators on Instagram, YouTube, TikTok, Twitch, Twitter, and OnlyFans via 2 providers (Influencers Club Similar, Influencers Club Discovery). Routes by input: handle set → lookalike search (influencers_similar) runs first; no handle → keyword/filter discovery. Filters: location, gender, type (creator/business), AI natural language search, sort. Cost: 1 credit per result returned.'
|
|
8
|
+
'Discover and find influencers/creators on Instagram, YouTube, TikTok, Twitch, Twitter, and OnlyFans via 2 providers (Influencers Club Similar, Influencers Club Discovery). Routes by input: handle set → lookalike search (influencers_similar) runs first; no handle → keyword/filter discovery. Filters: location, gender, type (creator/business), AI natural language search, sort. Cost: 1 credit per result returned. ' +
|
|
9
|
+
'LIMITATIONS: LinkedIn is not a supported platform (the underlying creator index has no LinkedIn coverage) — for B2B/LinkedIn prospecting use extract_post_engagement to pull engagers off a specific LinkedIn post instead. There is no follower-count range filter; to bias toward a follower tier, set sort_by="number_of_followers" and filter the returned list client-side.'
|
|
9
10
|
|
|
10
11
|
export const findInfluencersSchema = {
|
|
11
12
|
platform: z.enum(['instagram', 'youtube', 'tiktok', 'twitch', 'twitter', 'onlyfans'])
|
|
@@ -9,7 +9,7 @@ export const findSignalsDescription =
|
|
|
9
9
|
'Each call targets one signal type. Two modes: ' +
|
|
10
10
|
'Company-targeted (funding | acquisition | hiring | job_change | intent): accepts companies/domains/industries/countries/since filters. ' +
|
|
11
11
|
'funding additionally accepts `round_type` (e.g. ["Series A", "Seed"]). ' +
|
|
12
|
-
'intent
|
|
12
|
+
'intent has two modes: (a) DISCOVERY — pass `topics` (e.g. ["sales-automation"]) with no companies/domains to find companies showing intent on those topics; (b) VERIFY — pass companies/domains to check intent on known companies. Requires topics OR companies/domains. ' +
|
|
13
13
|
'Feed-style (news | startup_post): country and since only — does NOT filter by company. Passing companies/domains for these types is rejected. ' +
|
|
14
14
|
'hiring returns individual job postings with company context (title, location, descriptionText, company industries) — for richer job-board queries with description/seniority/easy-apply filters use search_jobs instead.'
|
|
15
15
|
|
|
@@ -19,7 +19,7 @@ export const findSignalsSchema = {
|
|
|
19
19
|
.describe(
|
|
20
20
|
'Signal type to retrieve. ' +
|
|
21
21
|
'Company-targeted: "funding" (fundraising rounds), "acquisition" (M&A), "hiring" (individual job postings indexed by Signalbase, with company context), ' +
|
|
22
|
-
'"job_change" (people who recently changed roles), "intent" (companies showing buying intent). ' +
|
|
22
|
+
'"job_change" (people who recently changed roles), "intent" (companies showing buying intent — discover by `topics` or verify on known companies/domains). ' +
|
|
23
23
|
'Feed-style (country/date filter only — company filter not supported): "news" (company news events), "startup_post" (Product Hunt, Hacker News, etc.)'
|
|
24
24
|
),
|
|
25
25
|
companies: z
|
|
@@ -29,15 +29,15 @@ export const findSignalsSchema = {
|
|
|
29
29
|
domains: z
|
|
30
30
|
.array(z.string())
|
|
31
31
|
.optional()
|
|
32
|
-
.describe('Company domains to filter signals for (e.g. ["coldiq.com"]). Only used by company-targeted types.
|
|
32
|
+
.describe('Company domains to filter signals for (e.g. ["coldiq.com"]). Only used by company-targeted types. For intent VERIFY mode: pass companies or domains. For intent DISCOVERY mode: omit both and pass topics instead.'),
|
|
33
33
|
since: z
|
|
34
34
|
.string()
|
|
35
35
|
.optional()
|
|
36
|
-
.describe('Return signals after this date. ISO date format, e.g. "2026-01-01".'),
|
|
36
|
+
.describe('Return signals after this date. ISO date format, e.g. "2026-01-01". Honored by funding, acquisition, hiring, job_change, and startup_post. NOT supported for intent (TheirStack has no date filter on intent) — passing it has no effect.'),
|
|
37
37
|
industries: z
|
|
38
38
|
.array(z.string())
|
|
39
39
|
.optional()
|
|
40
|
-
.describe('Industry names to filter by (e.g. ["Software", "SaaS"]). Forwarded to upstream for funding and acquisition. For hiring, filtered client-side against each row\'s `industries` field (case-insensitive substring match).
|
|
40
|
+
.describe('Industry names to filter by (e.g. ["Software", "SaaS"]). Forwarded to upstream for funding and acquisition. For hiring, filtered client-side against each row\'s `industries` field (case-insensitive substring match); Signalbase uses coarse labels (e.g. "Financial Services"), so prefer those over narrow terms like "Fintech" — if nothing matches, rows are returned UNFILTERED with a `_industry_filter` note rather than an empty set. For intent DISCOVERY, forwarded to TheirStack as `industry_or`. Ignored for job_change, news, startup_post.'),
|
|
41
41
|
countries: z
|
|
42
42
|
.array(z.string())
|
|
43
43
|
.optional()
|
|
@@ -49,7 +49,7 @@ export const findSignalsSchema = {
|
|
|
49
49
|
topics: z
|
|
50
50
|
.array(z.string())
|
|
51
51
|
.optional()
|
|
52
|
-
.describe('Intent topic / keyword slugs (e.g. ["sales-automation", "lead-generation"]). Only honored by signal_type=intent
|
|
52
|
+
.describe('Intent topic / keyword slugs (e.g. ["sales-automation", "lead-generation"]). Only honored by signal_type=intent. DISCOVERY mode: pass topics WITHOUT companies/domains to find companies showing intent on these topics (forwarded to TheirStack company search as `company_keyword_slug_or`, returns a company list). VERIFY mode: pass topics WITH companies/domains to narrow intent results for those known companies (forwarded as `keyword_slug_or`).'),
|
|
53
53
|
limit: z
|
|
54
54
|
.number()
|
|
55
55
|
.int()
|
|
@@ -65,11 +65,13 @@ export async function findSignalsHandler(input: Record<string, unknown>) {
|
|
|
65
65
|
const hasCompanies = Array.isArray(restInput.companies) && (restInput.companies as unknown[]).length > 0
|
|
66
66
|
const hasDomains = Array.isArray(restInput.domains) && (restInput.domains as unknown[]).length > 0
|
|
67
67
|
|
|
68
|
-
|
|
68
|
+
const hasTopics = Array.isArray(restInput.topics) && (restInput.topics as unknown[]).length > 0
|
|
69
|
+
|
|
70
|
+
if (restInput.signal_type === 'intent' && !hasCompanies && !hasDomains && !hasTopics) {
|
|
69
71
|
return {
|
|
70
72
|
content: [{
|
|
71
73
|
type: 'text' as const,
|
|
72
|
-
text: JSON.stringify({ error: 'intent signal_type requires at least one of: companies or domains' }),
|
|
74
|
+
text: JSON.stringify({ error: 'intent signal_type requires at least one of: topics (to discover companies by intent topic), or companies/domains (to verify intent on known companies)' }),
|
|
73
75
|
}],
|
|
74
76
|
isError: true,
|
|
75
77
|
}
|
|
@@ -110,21 +112,36 @@ export async function findSignalsHandler(input: Record<string, unknown>) {
|
|
|
110
112
|
// `industries` param would otherwise be silently dropped. Filter client-side:
|
|
111
113
|
// each hiring row carries an `industries` string (e.g. "Law Practice and Legal
|
|
112
114
|
// Services") which we substring-match against the user-supplied list.
|
|
115
|
+
//
|
|
116
|
+
// Non-destructive fallback: Signalbase has no industry facet and tags rows with
|
|
117
|
+
// coarse labels (e.g. "Financial Services"), so a user term like "Fintech" can
|
|
118
|
+
// match nothing even when relevant rows exist. Rather than return a misleading
|
|
119
|
+
// empty set (which reads as "no companies are hiring"), when the filter would
|
|
120
|
+
// drop every row we keep the unfiltered rows and attach a note explaining that
|
|
121
|
+
// the industry filter matched nothing.
|
|
113
122
|
if (restInput.signal_type === 'hiring' && Array.isArray(restInput.industries) && restInput.industries.length > 0) {
|
|
114
123
|
const wanted = (restInput.industries as unknown[])
|
|
115
124
|
.map((s) => (typeof s === 'string' ? s.toLowerCase() : ''))
|
|
116
125
|
.filter((s) => s.length > 0)
|
|
117
126
|
if (wanted.length > 0) {
|
|
118
|
-
const typed = result as { data?: { data?: unknown[] } }
|
|
127
|
+
const typed = result as { data?: { data?: unknown[]; _industry_filter?: string } }
|
|
119
128
|
const rows = typed.data?.data
|
|
120
|
-
if (Array.isArray(rows)) {
|
|
121
|
-
|
|
129
|
+
if (Array.isArray(rows) && rows.length > 0) {
|
|
130
|
+
const filtered = rows.filter((row) => {
|
|
122
131
|
if (!row || typeof row !== 'object') return false
|
|
123
132
|
const industriesField = (row as Record<string, unknown>).industries
|
|
124
133
|
if (typeof industriesField !== 'string' || industriesField.length === 0) return false
|
|
125
134
|
const haystack = industriesField.toLowerCase()
|
|
126
135
|
return wanted.some((needle) => haystack.includes(needle))
|
|
127
136
|
})
|
|
137
|
+
if (filtered.length > 0) {
|
|
138
|
+
typed.data!.data = filtered
|
|
139
|
+
} else {
|
|
140
|
+
typed.data!._industry_filter =
|
|
141
|
+
`No hiring rows matched industries [${(restInput.industries as string[]).join(', ')}]. ` +
|
|
142
|
+
'Signalbase tags hiring rows with coarse industry labels (e.g. "Financial Services"), so a narrow term may match nothing — results are returned UNFILTERED. ' +
|
|
143
|
+
'Narrow with countries or a broader/more exact industry label (e.g. "Financial Services" instead of "Fintech").'
|
|
144
|
+
}
|
|
128
145
|
}
|
|
129
146
|
}
|
|
130
147
|
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import { z } from 'zod'
|
|
2
|
+
import { executeWithFallback, isExecutionError } from '../executor.js'
|
|
3
|
+
import { resolvePreferredProviders, getProvidersForCapability } from '../utils/provider-resolver.js'
|
|
4
|
+
|
|
5
|
+
export const getPlaceReviewsName = 'get_place_reviews'
|
|
6
|
+
|
|
7
|
+
export const getPlaceReviewsDescription =
|
|
8
|
+
'Fetch Google Maps reviews for one or more places. Pass the Google Maps place URLs (from search_places results, the `url` field) and get back each place\'s reviews — useful for reputation management, local-services prospecting, and surfacing negative-review signals. ' +
|
|
9
|
+
'search_places returns place listings WITHOUT review text; use this tool to get the actual review content. ' +
|
|
10
|
+
'Runs an async job (~30–120s). Cost: 1 credit per review returned.'
|
|
11
|
+
|
|
12
|
+
export const getPlaceReviewsSchema = {
|
|
13
|
+
place_urls: z
|
|
14
|
+
.array(z.string().url())
|
|
15
|
+
.min(1)
|
|
16
|
+
.max(10)
|
|
17
|
+
.describe('Google Maps place URLs to scrape reviews from (1–10). Use the `url` field from search_places results, or a maps.google.com place/search URL.'),
|
|
18
|
+
max_reviews: z
|
|
19
|
+
.number()
|
|
20
|
+
.int()
|
|
21
|
+
.min(1)
|
|
22
|
+
.max(300)
|
|
23
|
+
.optional()
|
|
24
|
+
.describe('Maximum reviews to fetch per place (default 5, max 300). Each returned review costs 1 credit, so keep this tight.'),
|
|
25
|
+
sort: z
|
|
26
|
+
.enum(['mostRelevant', 'newest', 'highestRanking', 'lowestRanking'])
|
|
27
|
+
.optional()
|
|
28
|
+
.describe('Review sort order. Use "newest" for recent reviews or "lowestRanking" to surface negative reviews first. Default "mostRelevant".'),
|
|
29
|
+
language: z
|
|
30
|
+
.string()
|
|
31
|
+
.optional()
|
|
32
|
+
.describe('ISO 639-1 language code to filter reviews by language (e.g. "en", "fr").'),
|
|
33
|
+
use_providers: z
|
|
34
|
+
.array(z.string())
|
|
35
|
+
.optional()
|
|
36
|
+
.describe(`Optional ordered list of providers to use. Leave empty to let ColdIQ automatically pick — recommended. Available providers: ${getProvidersForCapability('get_place_reviews').join(', ')}. Provider names are matched fuzzily.`),
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export async function getPlaceReviewsHandler(input: Record<string, unknown>) {
|
|
40
|
+
const { use_providers: rawUseProviders, ...restInput } = input
|
|
41
|
+
const resolved = resolvePreferredProviders('get_place_reviews', restInput, rawUseProviders)
|
|
42
|
+
if (!resolved.ok) {
|
|
43
|
+
return { content: [{ type: 'text' as const, text: JSON.stringify(resolved.error) }], isError: true }
|
|
44
|
+
}
|
|
45
|
+
const result = await executeWithFallback('get_place_reviews', restInput, { providers: resolved.providers, matchedFrom: resolved.matchedFrom })
|
|
46
|
+
if (isExecutionError(result)) {
|
|
47
|
+
return { content: [{ type: 'text' as const, text: JSON.stringify(result) }], isError: true }
|
|
48
|
+
}
|
|
49
|
+
return { content: [{ type: 'text' as const, text: JSON.stringify(result) }] }
|
|
50
|
+
}
|
package/src/tools/search-ads.ts
CHANGED
|
@@ -5,7 +5,7 @@ import { resolvePreferredProviders, getProvidersForCapability } from '../utils/p
|
|
|
5
5
|
export const searchAdsName = 'search_ads'
|
|
6
6
|
|
|
7
7
|
export const searchAdsDescription =
|
|
8
|
-
'Search live ad creatives across 5 ad libraries (Google Ads Transparency, LinkedIn Ad Library, Meta Ads Library, Twitter/X Ads, Reddit Ads) — a high-signal GTM input for competitive intelligence, ICP refinement, and pitch personalization. Routes by input: domains/advertiser_ids → Google only; search_urls → LinkedIn only; bare query → Google → Meta → Twitter → Reddit waterfall. Use platform="google"|"linkedin"|"meta"|"twitter"|"reddit" to pin to one platform. All providers are async (~10–60s). Cost: ~5 credits per call (Twitter charges 1 credit per ad returned
|
|
8
|
+
'Search live ad creatives across 5 ad libraries (Google Ads Transparency, LinkedIn Ad Library, Meta Ads Library, Twitter/X Ads, Reddit Ads) — a high-signal GTM input for competitive intelligence, ICP refinement, and pitch personalization. Routes by input: domains/advertiser_ids → Google only; search_urls → LinkedIn only; bare query → Google → Meta → Twitter → Reddit waterfall. Use platform="google"|"linkedin"|"meta"|"twitter"|"reddit" to pin to one platform. All providers are async (~10–60s). Cost: ~5 credits per call (Twitter charges 1 credit per ad returned). Credits are fully refunded when a run returns zero ads. NOTE: Google Ads creatives return image URLs + creative IDs, not ad copy text — open the image URLs to read the ad. There is no "currently running only" filter; results can span past campaigns.'
|
|
9
9
|
|
|
10
10
|
export const searchAdsSchema = {
|
|
11
11
|
query: z.string().optional().describe('Advertiser/company name or keyword. Routes to Google→Meta→Twitter→Reddit when no platform-specific input is set.'),
|
|
@@ -5,7 +5,7 @@ import { resolvePreferredProviders, getProvidersForCapability } from '../utils/p
|
|
|
5
5
|
export const searchPlacesName = 'search_places'
|
|
6
6
|
|
|
7
7
|
export const searchPlacesDescription =
|
|
8
|
-
'Search local businesses and places via 2 providers (Openmart Search, Google Maps Scraper) — useful for territory mapping, local-services prospecting, restaurant/retail/vertical research. Routes by input: structured filters or country in {US,CA,AU,PR,NZ} → Openmart (sync, ~1s) first, then Google Maps Scraper (async, ~30–120s) as fallback or for global coverage. Use provider="openmart"|"google_maps" to pin to one. Cost: 1 credit per place returned (both providers).'
|
|
8
|
+
'Search local businesses and places via 2 providers (Openmart Search, Google Maps Scraper) — useful for territory mapping, local-services prospecting, restaurant/retail/vertical research. Routes by input: structured filters or country in {US,CA,AU,PR,NZ} → Openmart (sync, ~1s) first, then Google Maps Scraper (async, ~30–120s) as fallback or for global coverage. Use provider="openmart"|"google_maps" to pin to one. Cost: 1 credit per place returned (both providers). Results do NOT include review text — to fetch a place\'s reviews, pass its `url` to get_place_reviews.'
|
|
9
9
|
|
|
10
10
|
export const searchPlacesSchema = {
|
|
11
11
|
query: z.string().optional().describe('Free-text query (e.g. "coffee shops in Brooklyn", "law firm New York"). Used by both providers.'),
|
|
@@ -90,11 +90,30 @@ export async function searchPlacesHandler(input: Record<string, unknown>) {
|
|
|
90
90
|
if (isExecutionError(result)) {
|
|
91
91
|
return { content: [{ type: 'text' as const, text: JSON.stringify(result) }], isError: true }
|
|
92
92
|
}
|
|
93
|
-
|
|
93
|
+
const filters = {
|
|
94
94
|
minRating: asNumber(restInput.min_overall_rating),
|
|
95
95
|
maxRating: asNumber(restInput.max_overall_rating),
|
|
96
96
|
minReviews: asNumber(restInput.min_total_reviews),
|
|
97
97
|
maxReviews: asNumber(restInput.max_total_reviews),
|
|
98
|
-
}
|
|
98
|
+
}
|
|
99
|
+
const scraped = placesCount(result.data)
|
|
100
|
+
result.data = applyPlaceFilters(result.data, filters)
|
|
101
|
+
const matched = placesCount(result.data)
|
|
102
|
+
// Google Maps bills per place scraped upstream (what ColdIQ pays the provider),
|
|
103
|
+
// but rating/review filters are applied here client-side. When the filter trims
|
|
104
|
+
// the set, make the gap explicit so the credit charge isn't surprising.
|
|
105
|
+
if (scraped !== undefined && matched !== undefined && matched < scraped) {
|
|
106
|
+
;(result._meta as Record<string, unknown>).filtered = {
|
|
107
|
+
scraped,
|
|
108
|
+
matched,
|
|
109
|
+
note: 'Rating/review filters are applied client-side. You are billed per place scraped upstream (scraped), not per matched place.',
|
|
110
|
+
}
|
|
111
|
+
}
|
|
99
112
|
return { content: [{ type: 'text' as const, text: JSON.stringify(result) }] }
|
|
100
113
|
}
|
|
114
|
+
|
|
115
|
+
function placesCount(data: unknown): number | undefined {
|
|
116
|
+
if (!data || typeof data !== 'object') return undefined
|
|
117
|
+
const places = (data as Record<string, unknown>).places
|
|
118
|
+
return Array.isArray(places) ? places.length : undefined
|
|
119
|
+
}
|
|
@@ -11,7 +11,7 @@ export const searchRedditSchema = {
|
|
|
11
11
|
start_urls: z.array(z.string().url()).max(25).optional()
|
|
12
12
|
.describe('Reddit URLs to scrape (subreddit, post, user, or search URL). Up to 25. Provide this and/or query. Example: ["https://www.reddit.com/r/sales/"]'),
|
|
13
13
|
query: z.string().optional()
|
|
14
|
-
.describe('Keyword search query
|
|
14
|
+
.describe('Keyword search query e.g. "best CRM for startups". Provide this and/or start_urls. When combined with a bare subreddit start_url (e.g. ".../r/sales/"), the query is applied as an in-subreddit search so only matching posts are returned (a bare subreddit URL alone would otherwise return its whole feed, ignoring the keyword).'),
|
|
15
15
|
search_type: z.enum(['posts', 'comments', 'communities', 'users']).default('posts')
|
|
16
16
|
.describe('What the search query returns: posts, comments, communities, or users.'),
|
|
17
17
|
search_community_name: z.string().optional()
|
|
@@ -309,6 +309,72 @@ describe('signalbase-job-change', () => {
|
|
|
309
309
|
})
|
|
310
310
|
})
|
|
311
311
|
|
|
312
|
+
// ---------------------------------------------------------------------------
|
|
313
|
+
// theirstack-intent-discovery
|
|
314
|
+
// ---------------------------------------------------------------------------
|
|
315
|
+
|
|
316
|
+
describe('theirstack-intent-discovery', () => {
|
|
317
|
+
const p = () => get('theirstack-intent-discovery')
|
|
318
|
+
|
|
319
|
+
it('routes to the company search endpoint', () => {
|
|
320
|
+
expect(p().endpoint).toBe('/theirstack/companies/search')
|
|
321
|
+
expect(p().method).toBe('POST')
|
|
322
|
+
})
|
|
323
|
+
|
|
324
|
+
it('isApplicable: true for intent with topics and no companies/domains', () => {
|
|
325
|
+
expect(p().isApplicable!({ signal_type: 'intent', topics: ['sales-automation'] })).toBe(true)
|
|
326
|
+
})
|
|
327
|
+
|
|
328
|
+
it('isApplicable: false when companies present (verify mode handles that)', () => {
|
|
329
|
+
expect(p().isApplicable!({ signal_type: 'intent', topics: ['sales-automation'], companies: ['ColdIQ'] })).toBe(false)
|
|
330
|
+
})
|
|
331
|
+
|
|
332
|
+
it('isApplicable: false when domains present', () => {
|
|
333
|
+
expect(p().isApplicable!({ signal_type: 'intent', topics: ['sales-automation'], domains: ['coldiq.com'] })).toBe(false)
|
|
334
|
+
})
|
|
335
|
+
|
|
336
|
+
it('isApplicable: false when no topics', () => {
|
|
337
|
+
expect(p().isApplicable!({ signal_type: 'intent' })).toBe(false)
|
|
338
|
+
})
|
|
339
|
+
|
|
340
|
+
it('isApplicable: false for other signal types', () => {
|
|
341
|
+
expect(p().isApplicable!({ signal_type: 'funding', topics: ['x'] })).toBe(false)
|
|
342
|
+
})
|
|
343
|
+
|
|
344
|
+
it('mapParams forwards topics as company_keyword_slug_or', () => {
|
|
345
|
+
const result = p().mapParams({ signal_type: 'intent', topics: ['sales-automation', 'lead-generation'], limit: 20 })
|
|
346
|
+
const body = result.body as Record<string, unknown>
|
|
347
|
+
expect(body.company_keyword_slug_or).toEqual(['sales-automation', 'lead-generation'])
|
|
348
|
+
expect(body.limit).toBe(20)
|
|
349
|
+
expect(body.include_total_results).toBe(true)
|
|
350
|
+
})
|
|
351
|
+
|
|
352
|
+
it('mapParams forwards industries and countries when present', () => {
|
|
353
|
+
const result = p().mapParams({
|
|
354
|
+
signal_type: 'intent',
|
|
355
|
+
topics: ['sales-automation'],
|
|
356
|
+
industries: ['Software'],
|
|
357
|
+
countries: ['US', 'GB'],
|
|
358
|
+
})
|
|
359
|
+
const body = result.body as Record<string, unknown>
|
|
360
|
+
expect(body.industry_or).toEqual(['Software'])
|
|
361
|
+
expect(body.company_country_code_or).toEqual(['US', 'GB'])
|
|
362
|
+
})
|
|
363
|
+
|
|
364
|
+
it('mapParams caps limit at 100', () => {
|
|
365
|
+
const result = p().mapParams({ signal_type: 'intent', topics: ['x'], limit: 999 })
|
|
366
|
+
expect((result.body as Record<string, unknown>).limit).toBe(100)
|
|
367
|
+
})
|
|
368
|
+
|
|
369
|
+
it('hasResult: true when data non-empty', () => {
|
|
370
|
+
expect(p().hasResult({ data: [{ name: 'ColdIQ' }] })).toBe(true)
|
|
371
|
+
})
|
|
372
|
+
|
|
373
|
+
it('hasResult: false on empty data', () => {
|
|
374
|
+
expect(p().hasResult({ data: [] })).toBe(false)
|
|
375
|
+
})
|
|
376
|
+
})
|
|
377
|
+
|
|
312
378
|
// ---------------------------------------------------------------------------
|
|
313
379
|
// theirstack-buying-intents
|
|
314
380
|
// ---------------------------------------------------------------------------
|