@coldiq/mcp 0.2.8 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/index.js +4 -0
  2. package/dist/index.js.map +1 -1
  3. package/dist/registry.d.ts +1 -1
  4. package/dist/registry.d.ts.map +1 -1
  5. package/dist/registry.js +99 -15
  6. package/dist/registry.js.map +1 -1
  7. package/dist/tools/extract-post-engagement.d.ts +21 -0
  8. package/dist/tools/extract-post-engagement.d.ts.map +1 -0
  9. package/dist/tools/extract-post-engagement.js +117 -0
  10. package/dist/tools/extract-post-engagement.js.map +1 -0
  11. package/dist/tools/find-influencers.d.ts +1 -1
  12. package/dist/tools/find-influencers.d.ts.map +1 -1
  13. package/dist/tools/find-influencers.js +2 -1
  14. package/dist/tools/find-influencers.js.map +1 -1
  15. package/dist/tools/find-signals.d.ts.map +1 -1
  16. package/dist/tools/find-signals.js +27 -10
  17. package/dist/tools/find-signals.js.map +1 -1
  18. package/dist/tools/get-place-reviews.d.ts +24 -0
  19. package/dist/tools/get-place-reviews.d.ts.map +1 -0
  20. package/dist/tools/get-place-reviews.js +46 -0
  21. package/dist/tools/get-place-reviews.js.map +1 -0
  22. package/dist/tools/search-ads.d.ts +1 -1
  23. package/dist/tools/search-ads.d.ts.map +1 -1
  24. package/dist/tools/search-ads.js +1 -1
  25. package/dist/tools/search-ads.js.map +1 -1
  26. package/dist/tools/search-places.d.ts +1 -1
  27. package/dist/tools/search-places.d.ts.map +1 -1
  28. package/dist/tools/search-places.js +23 -3
  29. package/dist/tools/search-places.js.map +1 -1
  30. package/dist/tools/search-reddit.js +1 -1
  31. package/dist/tools/search-reddit.js.map +1 -1
  32. package/package.json +1 -1
  33. package/src/index.ts +16 -0
  34. package/src/registry.ts +90 -4
  35. package/src/tools/extract-post-engagement.ts +135 -0
  36. package/src/tools/find-influencers.ts +2 -1
  37. package/src/tools/find-signals.ts +28 -11
  38. package/src/tools/get-place-reviews.ts +50 -0
  39. package/src/tools/search-ads.ts +1 -1
  40. package/src/tools/search-places.ts +22 -3
  41. package/src/tools/search-reddit.ts +1 -1
  42. package/tests/registry-find-signals.test.ts +66 -0
  43. package/tests/tools/extract-post-engagement.test.ts +76 -0
  44. package/tests/tools/find-signals.test.ts +5 -2
  45. package/tests/tools/get-place-reviews.test.ts +73 -0
  46. package/tests/tools/search-reddit.test.ts +69 -0
package/src/registry.ts CHANGED
@@ -54,6 +54,7 @@ export type Capability =
54
54
  | 'search_jobs'
55
55
  | 'search_ads'
56
56
  | 'search_places'
57
+ | 'get_place_reviews'
57
58
  | 'find_influencers'
58
59
  | 'search_reddit'
59
60
  | 'search_seo'
@@ -2468,6 +2469,31 @@ const searchPlacesProviders: ProviderEntry[] = [
2468
2469
  },
2469
2470
  ]
2470
2471
 
2472
+ // ---------------------------------------------------------------------------
2473
+ // get_place_reviews
2474
+ // ---------------------------------------------------------------------------
2475
+
2476
+ const getPlaceReviewsProviders: ProviderEntry[] = [
2477
+ {
2478
+ id: 'google_maps_reviews',
2479
+ endpoint: '/google-maps/reviews',
2480
+ method: 'POST',
2481
+ priority: 1,
2482
+ mapParams: (input) => ({
2483
+ body: {
2484
+ startUrls: (input.place_urls as string[]).map((url) => ({ url })),
2485
+ maxReviews: input.max_reviews,
2486
+ reviewsSort: input.sort,
2487
+ language: input.language,
2488
+ },
2489
+ }),
2490
+ // A completed job is a valid result even with an empty reviews array (a place
2491
+ // may genuinely have no reviews) — only failed/timed_out should fall through.
2492
+ hasResult: (data) => (data as { status?: string }).status === 'done',
2493
+ async: { ..._placesSharedAsync, pollEndpoint: (id) => `/google-maps/reviews/${id}` },
2494
+ },
2495
+ ]
2496
+
2471
2497
  // ---------------------------------------------------------------------------
2472
2498
  // find_influencers
2473
2499
  // ---------------------------------------------------------------------------
@@ -2543,6 +2569,22 @@ const _redditSharedAsync = {
2543
2569
  },
2544
2570
  }
2545
2571
 
2572
+ // A bare subreddit URL (e.g. https://www.reddit.com/r/sales or .../r/sales/)
2573
+ // makes the Apify actor ENUMERATE that subreddit's feed and ignore the search
2574
+ // keyword entirely. When the caller also passes a `query`, rewrite bare
2575
+ // subreddit URLs into in-subreddit search URLs so the keyword is actually
2576
+ // applied. Already-formed search/post URLs are left untouched.
2577
+ const _BARE_SUBREDDIT_RE = /^(https?:\/\/(?:www\.)?reddit\.com\/r\/[A-Za-z0-9_]+)\/?$/i
2578
+
2579
+ function _toRedditSearchUrl(url: string, query: string, opts: { sort?: unknown; time?: unknown }): string {
2580
+ const m = url.match(_BARE_SUBREDDIT_RE)
2581
+ if (!m) return url
2582
+ const params = new URLSearchParams({ q: query, restrict_sr: '1' })
2583
+ if (typeof opts.sort === 'string' && opts.sort) params.set('sort', opts.sort)
2584
+ if (typeof opts.time === 'string' && opts.time) params.set('t', opts.time)
2585
+ return `${m[1]}/search/?${params.toString()}`
2586
+ }
2587
+
2546
2588
  const searchRedditProviders: ProviderEntry[] = [
2547
2589
  {
2548
2590
  id: 'reddit',
@@ -2550,10 +2592,26 @@ const searchRedditProviders: ProviderEntry[] = [
2550
2592
  method: 'POST',
2551
2593
  priority: 1,
2552
2594
  isApplicable: (input) => isNonEmptyArray(input.start_urls) || typeof input.query === 'string',
2553
- mapParams: (input) => ({
2595
+ mapParams: (input) => {
2596
+ const query = typeof input.query === 'string' && input.query ? input.query : undefined
2597
+ const rawStartUrls = input.start_urls as string[] | undefined
2598
+ let startUrls = rawStartUrls?.map((url) => ({ url }))
2599
+ let searchQueries = query ? [query] : undefined
2600
+ // If a query is provided alongside start_urls, embed it into any bare
2601
+ // subreddit URLs (which would otherwise ignore it). The keyword then lives
2602
+ // in the URL, so drop the top-level searchQueries to avoid a conflicting
2603
+ // global search.
2604
+ if (query && rawStartUrls && rawStartUrls.length > 0) {
2605
+ const rewritten = rawStartUrls.map((u) => _toRedditSearchUrl(u, query, { sort: input.sort, time: input.time }))
2606
+ if (rewritten.some((u, i) => u !== rawStartUrls[i])) {
2607
+ startUrls = rewritten.map((url) => ({ url }))
2608
+ searchQueries = undefined
2609
+ }
2610
+ }
2611
+ return {
2554
2612
  body: {
2555
- searchQueries: input.query ? [input.query] : undefined,
2556
- startUrls: (input.start_urls as string[] | undefined)?.map((url) => ({ url })),
2613
+ searchQueries,
2614
+ startUrls,
2557
2615
  searchType: input.search_type ?? 'posts',
2558
2616
  searchCommunityName: input.search_community_name,
2559
2617
  sort: input.sort,
@@ -2564,7 +2622,7 @@ const searchRedditProviders: ProviderEntry[] = [
2564
2622
  postDateLimit: input.post_date_limit,
2565
2623
  commentDateLimit: input.comment_date_limit,
2566
2624
  },
2567
- }),
2625
+ }},
2568
2626
  hasResult: (data) => isNonEmptyArray((data as { items?: unknown[] }).items),
2569
2627
  async: {
2570
2628
  ..._redditSharedAsync,
@@ -3164,6 +3222,33 @@ const findSignalsProviders: ProviderEntry[] = [
3164
3222
  hasResult: (data) => isNonEmptyArray((data as Record<string, unknown>).data),
3165
3223
  },
3166
3224
  {
3225
+ // Topic-based DISCOVERY: "which companies show intent on topic X" with no
3226
+ // company list known in advance. Routes to /theirstack/companies/search,
3227
+ // which returns a list of companies filtered by buying-intent keyword slugs.
3228
+ // This is the GTM-primary use case (find prospects by intent), distinct from
3229
+ // theirstack-buying-intents which verifies intent on companies you already have.
3230
+ id: 'theirstack-intent-discovery',
3231
+ endpoint: '/theirstack/companies/search',
3232
+ method: 'POST',
3233
+ priority: 5,
3234
+ isApplicable: (input) =>
3235
+ input.signal_type === 'intent' &&
3236
+ isNonEmptyArray(input.topics) &&
3237
+ !isNonEmptyArray(input.companies) &&
3238
+ !isNonEmptyArray(input.domains),
3239
+ mapParams: (input) => ({
3240
+ body: {
3241
+ company_keyword_slug_or: input.topics,
3242
+ ...(isNonEmptyArray(input.industries) && { industry_or: input.industries }),
3243
+ ...(isNonEmptyArray(input.countries) && { company_country_code_or: input.countries }),
3244
+ limit: Math.min((input.limit as number | undefined) ?? 25, 100),
3245
+ include_total_results: true,
3246
+ },
3247
+ }),
3248
+ hasResult: (data) => isNonEmptyArray((data as Record<string, unknown>).data),
3249
+ },
3250
+ {
3251
+ // Verify intent on KNOWN companies/domains.
3167
3252
  id: 'theirstack-buying-intents',
3168
3253
  endpoint: '/theirstack/companies/buying_intents',
3169
3254
  method: 'POST',
@@ -3262,6 +3347,7 @@ const registry: Record<Capability, ProviderEntry[]> = {
3262
3347
  search_jobs: searchJobsProviders,
3263
3348
  search_ads: searchAdsProviders,
3264
3349
  search_places: searchPlacesProviders,
3350
+ get_place_reviews: getPlaceReviewsProviders,
3265
3351
  find_influencers: findInfluencersProviders,
3266
3352
  search_reddit: searchRedditProviders,
3267
3353
  search_seo: searchSeoProviders,
@@ -0,0 +1,135 @@
1
+ import { z } from 'zod'
2
+ import { callApi } from '../client.js'
3
+
4
+ export const extractPostEngagementName = 'extract_post_engagement'
5
+
6
+ export const extractPostEngagementDescription =
7
+ 'Extract the people who engaged with a LinkedIn post — commenters and/or reactors — as a deduplicated list of contacts (name, profile URL, headline). ' +
8
+ 'Use this for social-signal prospecting: pull everyone who engaged with a viral post, then chain the results into enrich_person / find_email to get roles and work emails. ' +
9
+ 'Runs an async extraction job (typically ~30–120s) and returns once the people are ready. Costs 10 credits per post.'
10
+
11
+ export const extractPostEngagementSchema = {
12
+ post_url: z
13
+ .string()
14
+ .url()
15
+ .describe('LinkedIn post URL to extract engagement from (e.g. "https://www.linkedin.com/feed/update/urn:li:activity:7234567890123456789" or a /posts/ permalink).'),
16
+ type: z
17
+ .enum(['comments', 'reactions', 'both'])
18
+ .default('both')
19
+ .describe('Which engagement to extract: "comments" (people who commented), "reactions" (people who reacted), or "both" (default — deduplicated across both).'),
20
+ include_replies: z
21
+ .boolean()
22
+ .optional()
23
+ .describe('When extracting comments, also include people who replied to comments. Defaults to true upstream.'),
24
+ }
25
+
26
+ function sleep(ms: number): Promise<void> {
27
+ return new Promise((resolve) => setTimeout(resolve, ms))
28
+ }
29
+
30
+ function errorResult(error: string, extra?: Record<string, unknown>) {
31
+ return {
32
+ content: [{ type: 'text' as const, text: JSON.stringify({ error, ...extra }) }],
33
+ isError: true,
34
+ }
35
+ }
36
+
37
+ function extractErrorMessage(data: unknown): string | undefined {
38
+ if (data && typeof data === 'object' && 'error' in data) {
39
+ const e = (data as Record<string, unknown>).error
40
+ return typeof e === 'string' ? e : JSON.stringify(e)
41
+ }
42
+ return undefined
43
+ }
44
+
45
+ export async function extractPostEngagementHandler(input: Record<string, unknown>) {
46
+ const postUrl = input.post_url as string
47
+ const type = (input.type as 'comments' | 'reactions' | 'both' | undefined) ?? 'both'
48
+ const includeReplies = input.include_replies as boolean | undefined
49
+
50
+ const dataTypes =
51
+ type === 'comments' ? ['comment'] : type === 'reactions' ? ['reaction'] : ['comment', 'reaction']
52
+
53
+ // Step 1 — create the extraction task. Billing (10 credits) happens here.
54
+ const createRes = await callApi('POST', '/jungler/workbooks', {
55
+ post_url: postUrl,
56
+ data_types: dataTypes,
57
+ })
58
+ if (!createRes.ok) {
59
+ return errorResult(extractErrorMessage(createRes.data) ?? `Failed to start extraction (status ${createRes.status})`)
60
+ }
61
+ const createData = createRes.data as { task_id?: string }
62
+ const taskId = createData.task_id
63
+ if (!taskId) {
64
+ return errorResult('Extraction job did not return a task id')
65
+ }
66
+ // Credit headers are emitted on the create call (the only billed step).
67
+ const creditsCharged = Number(createRes.headers['x-coldiq-credits-charged'])
68
+ const creditsRemaining = Number(createRes.headers['x-coldiq-credits-remaining'])
69
+
70
+ // Step 2 — poll task status until it resolves. The status endpoint is free, so
71
+ // polling does not bill; only the create call above charged credits.
72
+ const pollIntervalMs = parseInt(process.env.COLDIQ_ENGAGEMENT_POLL_MS ?? '2000', 10)
73
+ const timeoutMs = parseInt(process.env.COLDIQ_ENGAGEMENT_TIMEOUT_MS ?? '180000', 10)
74
+ const maxPollErrors = 3
75
+ const deadline = Date.now() + timeoutMs
76
+
77
+ let workbookId: string | undefined
78
+ let consecutivePollErrors = 0
79
+
80
+ while (Date.now() < deadline) {
81
+ await sleep(pollIntervalMs)
82
+ const statusRes = await callApi('GET', `/jungler/tasks/${taskId}/status`)
83
+ if (!statusRes.ok) {
84
+ consecutivePollErrors++
85
+ if (consecutivePollErrors >= maxPollErrors) {
86
+ return errorResult('Could not read extraction status — please retry', { post_url: postUrl })
87
+ }
88
+ continue
89
+ }
90
+ consecutivePollErrors = 0
91
+ const status = (statusRes.data as { status?: string; workbook_id?: string }).status
92
+ if (status === 'success') {
93
+ workbookId = (statusRes.data as { workbook_id?: string }).workbook_id
94
+ break
95
+ }
96
+ if (status === 'failure') {
97
+ return errorResult('Engagement extraction failed upstream — the post may be private, deleted, or have no engagement', { post_url: postUrl })
98
+ }
99
+ }
100
+
101
+ if (!workbookId) {
102
+ return errorResult(`Engagement extraction did not complete within ${Math.round(timeoutMs / 1000)}s — try again shortly`, { post_url: postUrl })
103
+ }
104
+
105
+ // Step 3 — fetch the deduplicated people. activity_filter narrows to commenters
106
+ // or reactors; omitted for "both" so the upstream returns all unique contacts.
107
+ const queryParams: Record<string, string> = {}
108
+ if (type === 'comments') queryParams.activity_filter = 'commenters'
109
+ else if (type === 'reactions') queryParams.activity_filter = 'reactors'
110
+ if (includeReplies !== undefined) queryParams.include_replies = String(includeReplies)
111
+
112
+ const contactsRes = await callApi(
113
+ 'GET',
114
+ `/jungler/workbooks/${workbookId}/contacts`,
115
+ undefined,
116
+ Object.keys(queryParams).length > 0 ? queryParams : undefined,
117
+ )
118
+ if (!contactsRes.ok) {
119
+ return errorResult(extractErrorMessage(contactsRes.data) ?? 'Failed to fetch extracted people', { post_url: postUrl })
120
+ }
121
+
122
+ const meta: Record<string, unknown> = {}
123
+ if (Number.isFinite(creditsCharged)) meta.credits_charged = creditsCharged
124
+ if (Number.isFinite(creditsRemaining)) meta.credits_remaining = creditsRemaining
125
+
126
+ return {
127
+ content: [{
128
+ type: 'text' as const,
129
+ text: JSON.stringify({
130
+ data: { post_url: postUrl, type, people: contactsRes.data },
131
+ _meta: meta,
132
+ }),
133
+ }],
134
+ }
135
+ }
@@ -5,7 +5,8 @@ import { resolvePreferredProviders, getProvidersForCapability } from '../utils/p
5
5
  export const findInfluencersName = 'find_influencers'
6
6
 
7
7
  export const findInfluencersDescription =
8
- 'Discover and find influencers/creators on Instagram, YouTube, TikTok, Twitch, Twitter, and OnlyFans via 2 providers (Influencers Club Similar, Influencers Club Discovery). Routes by input: handle set → lookalike search (influencers_similar) runs first; no handle → keyword/filter discovery. Filters: location, gender, type (creator/business), AI natural language search, sort. Cost: 1 credit per result returned.'
8
+ 'Discover and find influencers/creators on Instagram, YouTube, TikTok, Twitch, Twitter, and OnlyFans via 2 providers (Influencers Club Similar, Influencers Club Discovery). Routes by input: handle set → lookalike search (influencers_similar) runs first; no handle → keyword/filter discovery. Filters: location, gender, type (creator/business), AI natural language search, sort. Cost: 1 credit per result returned. ' +
9
+ 'LIMITATIONS: LinkedIn is not a supported platform (the underlying creator index has no LinkedIn coverage) — for B2B/LinkedIn prospecting use extract_post_engagement to pull engagers off a specific LinkedIn post instead. There is no follower-count range filter; to bias toward a follower tier, set sort_by="number_of_followers" and filter the returned list client-side.'
9
10
 
10
11
  export const findInfluencersSchema = {
11
12
  platform: z.enum(['instagram', 'youtube', 'tiktok', 'twitch', 'twitter', 'onlyfans'])
@@ -9,7 +9,7 @@ export const findSignalsDescription =
9
9
  'Each call targets one signal type. Two modes: ' +
10
10
  'Company-targeted (funding | acquisition | hiring | job_change | intent): accepts companies/domains/industries/countries/since filters. ' +
11
11
  'funding additionally accepts `round_type` (e.g. ["Series A", "Seed"]). ' +
12
- 'intent REQUIRES at least one of companies or domains and additionally accepts `topics` (e.g. ["sales-automation"]) to narrow by intent keyword. ' +
12
+ 'intent has two modes: (a) DISCOVERY pass `topics` (e.g. ["sales-automation"]) with no companies/domains to find companies showing intent on those topics; (b) VERIFY — pass companies/domains to check intent on known companies. Requires topics OR companies/domains. ' +
13
13
  'Feed-style (news | startup_post): country and since only — does NOT filter by company. Passing companies/domains for these types is rejected. ' +
14
14
  'hiring returns individual job postings with company context (title, location, descriptionText, company industries) — for richer job-board queries with description/seniority/easy-apply filters use search_jobs instead.'
15
15
 
@@ -19,7 +19,7 @@ export const findSignalsSchema = {
19
19
  .describe(
20
20
  'Signal type to retrieve. ' +
21
21
  'Company-targeted: "funding" (fundraising rounds), "acquisition" (M&A), "hiring" (individual job postings indexed by Signalbase, with company context), ' +
22
- '"job_change" (people who recently changed roles), "intent" (companies showing buying intent). ' +
22
+ '"job_change" (people who recently changed roles), "intent" (companies showing buying intent — discover by `topics` or verify on known companies/domains). ' +
23
23
  'Feed-style (country/date filter only — company filter not supported): "news" (company news events), "startup_post" (Product Hunt, Hacker News, etc.)'
24
24
  ),
25
25
  companies: z
@@ -29,15 +29,15 @@ export const findSignalsSchema = {
29
29
  domains: z
30
30
  .array(z.string())
31
31
  .optional()
32
- .describe('Company domains to filter signals for (e.g. ["coldiq.com"]). Only used by company-targeted types. Required for intent when companies is absent.'),
32
+ .describe('Company domains to filter signals for (e.g. ["coldiq.com"]). Only used by company-targeted types. For intent VERIFY mode: pass companies or domains. For intent DISCOVERY mode: omit both and pass topics instead.'),
33
33
  since: z
34
34
  .string()
35
35
  .optional()
36
- .describe('Return signals after this date. ISO date format, e.g. "2026-01-01".'),
36
+ .describe('Return signals after this date. ISO date format, e.g. "2026-01-01". Honored by funding, acquisition, hiring, job_change, and startup_post. NOT supported for intent (TheirStack has no date filter on intent) — passing it has no effect.'),
37
37
  industries: z
38
38
  .array(z.string())
39
39
  .optional()
40
- .describe('Industry names to filter by (e.g. ["Software", "SaaS"]). Forwarded to upstream for funding and acquisition. For hiring, filtered client-side against each row\'s `industries` field (case-insensitive substring match). Ignored for job_change, intent, news, startup_post (those signal types have no industry data to filter on).'),
40
+ .describe('Industry names to filter by (e.g. ["Software", "SaaS"]). Forwarded to upstream for funding and acquisition. For hiring, filtered client-side against each row\'s `industries` field (case-insensitive substring match); Signalbase uses coarse labels (e.g. "Financial Services"), so prefer those over narrow terms like "Fintech" — if nothing matches, rows are returned UNFILTERED with a `_industry_filter` note rather than an empty set. For intent DISCOVERY, forwarded to TheirStack as `industry_or`. Ignored for job_change, news, startup_post.'),
41
41
  countries: z
42
42
  .array(z.string())
43
43
  .optional()
@@ -49,7 +49,7 @@ export const findSignalsSchema = {
49
49
  topics: z
50
50
  .array(z.string())
51
51
  .optional()
52
- .describe('Intent topic / keyword slugs (e.g. ["sales-automation", "lead-generation"]). Only honored by signal_type=intent (forwarded to TheirStack as `keyword_slug_or`). Note: topics is supplemental TheirStack still requires at least one of `companies` or `domains`, so topics narrows an existing company-targeted search rather than enabling pure topic discovery.'),
52
+ .describe('Intent topic / keyword slugs (e.g. ["sales-automation", "lead-generation"]). Only honored by signal_type=intent. DISCOVERY mode: pass topics WITHOUT companies/domains to find companies showing intent on these topics (forwarded to TheirStack company search as `company_keyword_slug_or`, returns a company list). VERIFY mode: pass topics WITH companies/domains to narrow intent results for those known companies (forwarded as `keyword_slug_or`).'),
53
53
  limit: z
54
54
  .number()
55
55
  .int()
@@ -65,11 +65,13 @@ export async function findSignalsHandler(input: Record<string, unknown>) {
65
65
  const hasCompanies = Array.isArray(restInput.companies) && (restInput.companies as unknown[]).length > 0
66
66
  const hasDomains = Array.isArray(restInput.domains) && (restInput.domains as unknown[]).length > 0
67
67
 
68
- if (restInput.signal_type === 'intent' && !hasCompanies && !hasDomains) {
68
+ const hasTopics = Array.isArray(restInput.topics) && (restInput.topics as unknown[]).length > 0
69
+
70
+ if (restInput.signal_type === 'intent' && !hasCompanies && !hasDomains && !hasTopics) {
69
71
  return {
70
72
  content: [{
71
73
  type: 'text' as const,
72
- text: JSON.stringify({ error: 'intent signal_type requires at least one of: companies or domains' }),
74
+ text: JSON.stringify({ error: 'intent signal_type requires at least one of: topics (to discover companies by intent topic), or companies/domains (to verify intent on known companies)' }),
73
75
  }],
74
76
  isError: true,
75
77
  }
@@ -110,21 +112,36 @@ export async function findSignalsHandler(input: Record<string, unknown>) {
110
112
  // `industries` param would otherwise be silently dropped. Filter client-side:
111
113
  // each hiring row carries an `industries` string (e.g. "Law Practice and Legal
112
114
  // Services") which we substring-match against the user-supplied list.
115
+ //
116
+ // Non-destructive fallback: Signalbase has no industry facet and tags rows with
117
+ // coarse labels (e.g. "Financial Services"), so a user term like "Fintech" can
118
+ // match nothing even when relevant rows exist. Rather than return a misleading
119
+ // empty set (which reads as "no companies are hiring"), when the filter would
120
+ // drop every row we keep the unfiltered rows and attach a note explaining that
121
+ // the industry filter matched nothing.
113
122
  if (restInput.signal_type === 'hiring' && Array.isArray(restInput.industries) && restInput.industries.length > 0) {
114
123
  const wanted = (restInput.industries as unknown[])
115
124
  .map((s) => (typeof s === 'string' ? s.toLowerCase() : ''))
116
125
  .filter((s) => s.length > 0)
117
126
  if (wanted.length > 0) {
118
- const typed = result as { data?: { data?: unknown[] } }
127
+ const typed = result as { data?: { data?: unknown[]; _industry_filter?: string } }
119
128
  const rows = typed.data?.data
120
- if (Array.isArray(rows)) {
121
- typed.data!.data = rows.filter((row) => {
129
+ if (Array.isArray(rows) && rows.length > 0) {
130
+ const filtered = rows.filter((row) => {
122
131
  if (!row || typeof row !== 'object') return false
123
132
  const industriesField = (row as Record<string, unknown>).industries
124
133
  if (typeof industriesField !== 'string' || industriesField.length === 0) return false
125
134
  const haystack = industriesField.toLowerCase()
126
135
  return wanted.some((needle) => haystack.includes(needle))
127
136
  })
137
+ if (filtered.length > 0) {
138
+ typed.data!.data = filtered
139
+ } else {
140
+ typed.data!._industry_filter =
141
+ `No hiring rows matched industries [${(restInput.industries as string[]).join(', ')}]. ` +
142
+ 'Signalbase tags hiring rows with coarse industry labels (e.g. "Financial Services"), so a narrow term may match nothing — results are returned UNFILTERED. ' +
143
+ 'Narrow with countries or a broader/more exact industry label (e.g. "Financial Services" instead of "Fintech").'
144
+ }
128
145
  }
129
146
  }
130
147
  }
@@ -0,0 +1,50 @@
1
+ import { z } from 'zod'
2
+ import { executeWithFallback, isExecutionError } from '../executor.js'
3
+ import { resolvePreferredProviders, getProvidersForCapability } from '../utils/provider-resolver.js'
4
+
5
+ export const getPlaceReviewsName = 'get_place_reviews'
6
+
7
+ export const getPlaceReviewsDescription =
8
+ 'Fetch Google Maps reviews for one or more places. Pass the Google Maps place URLs (from search_places results, the `url` field) and get back each place\'s reviews — useful for reputation management, local-services prospecting, and surfacing negative-review signals. ' +
9
+ 'search_places returns place listings WITHOUT review text; use this tool to get the actual review content. ' +
10
+ 'Runs an async job (~30–120s). Cost: 1 credit per review returned.'
11
+
12
+ export const getPlaceReviewsSchema = {
13
+ place_urls: z
14
+ .array(z.string().url())
15
+ .min(1)
16
+ .max(10)
17
+ .describe('Google Maps place URLs to scrape reviews from (1–10). Use the `url` field from search_places results, or a maps.google.com place/search URL.'),
18
+ max_reviews: z
19
+ .number()
20
+ .int()
21
+ .min(1)
22
+ .max(300)
23
+ .optional()
24
+ .describe('Maximum reviews to fetch per place (default 5, max 300). Each returned review costs 1 credit, so keep this tight.'),
25
+ sort: z
26
+ .enum(['mostRelevant', 'newest', 'highestRanking', 'lowestRanking'])
27
+ .optional()
28
+ .describe('Review sort order. Use "newest" for recent reviews or "lowestRanking" to surface negative reviews first. Default "mostRelevant".'),
29
+ language: z
30
+ .string()
31
+ .optional()
32
+ .describe('ISO 639-1 language code to filter reviews by language (e.g. "en", "fr").'),
33
+ use_providers: z
34
+ .array(z.string())
35
+ .optional()
36
+ .describe(`Optional ordered list of providers to use. Leave empty to let ColdIQ automatically pick — recommended. Available providers: ${getProvidersForCapability('get_place_reviews').join(', ')}. Provider names are matched fuzzily.`),
37
+ }
38
+
39
+ export async function getPlaceReviewsHandler(input: Record<string, unknown>) {
40
+ const { use_providers: rawUseProviders, ...restInput } = input
41
+ const resolved = resolvePreferredProviders('get_place_reviews', restInput, rawUseProviders)
42
+ if (!resolved.ok) {
43
+ return { content: [{ type: 'text' as const, text: JSON.stringify(resolved.error) }], isError: true }
44
+ }
45
+ const result = await executeWithFallback('get_place_reviews', restInput, { providers: resolved.providers, matchedFrom: resolved.matchedFrom })
46
+ if (isExecutionError(result)) {
47
+ return { content: [{ type: 'text' as const, text: JSON.stringify(result) }], isError: true }
48
+ }
49
+ return { content: [{ type: 'text' as const, text: JSON.stringify(result) }] }
50
+ }
@@ -5,7 +5,7 @@ import { resolvePreferredProviders, getProvidersForCapability } from '../utils/p
5
5
  export const searchAdsName = 'search_ads'
6
6
 
7
7
  export const searchAdsDescription =
8
- 'Search live ad creatives across 5 ad libraries (Google Ads Transparency, LinkedIn Ad Library, Meta Ads Library, Twitter/X Ads, Reddit Ads) — a high-signal GTM input for competitive intelligence, ICP refinement, and pitch personalization. Routes by input: domains/advertiser_ids → Google only; search_urls → LinkedIn only; bare query → Google → Meta → Twitter → Reddit waterfall. Use platform="google"|"linkedin"|"meta"|"twitter"|"reddit" to pin to one platform. All providers are async (~10–60s). Cost: ~5 credits per call (Twitter charges 1 credit per ad returned; Meta does not refund on failure).'
8
+ 'Search live ad creatives across 5 ad libraries (Google Ads Transparency, LinkedIn Ad Library, Meta Ads Library, Twitter/X Ads, Reddit Ads) — a high-signal GTM input for competitive intelligence, ICP refinement, and pitch personalization. Routes by input: domains/advertiser_ids → Google only; search_urls → LinkedIn only; bare query → Google → Meta → Twitter → Reddit waterfall. Use platform="google"|"linkedin"|"meta"|"twitter"|"reddit" to pin to one platform. All providers are async (~10–60s). Cost: ~5 credits per call (Twitter charges 1 credit per ad returned). Credits are fully refunded when a run returns zero ads. NOTE: Google Ads creatives return image URLs + creative IDs, not ad copy text — open the image URLs to read the ad. There is no "currently running only" filter; results can span past campaigns.'
9
9
 
10
10
  export const searchAdsSchema = {
11
11
  query: z.string().optional().describe('Advertiser/company name or keyword. Routes to Google→Meta→Twitter→Reddit when no platform-specific input is set.'),
@@ -5,7 +5,7 @@ import { resolvePreferredProviders, getProvidersForCapability } from '../utils/p
5
5
  export const searchPlacesName = 'search_places'
6
6
 
7
7
  export const searchPlacesDescription =
8
- 'Search local businesses and places via 2 providers (Openmart Search, Google Maps Scraper) — useful for territory mapping, local-services prospecting, restaurant/retail/vertical research. Routes by input: structured filters or country in {US,CA,AU,PR,NZ} → Openmart (sync, ~1s) first, then Google Maps Scraper (async, ~30–120s) as fallback or for global coverage. Use provider="openmart"|"google_maps" to pin to one. Cost: 1 credit per place returned (both providers).'
8
+ 'Search local businesses and places via 2 providers (Openmart Search, Google Maps Scraper) — useful for territory mapping, local-services prospecting, restaurant/retail/vertical research. Routes by input: structured filters or country in {US,CA,AU,PR,NZ} → Openmart (sync, ~1s) first, then Google Maps Scraper (async, ~30–120s) as fallback or for global coverage. Use provider="openmart"|"google_maps" to pin to one. Cost: 1 credit per place returned (both providers). Results do NOT include review text — to fetch a place\'s reviews, pass its `url` to get_place_reviews.'
9
9
 
10
10
  export const searchPlacesSchema = {
11
11
  query: z.string().optional().describe('Free-text query (e.g. "coffee shops in Brooklyn", "law firm New York"). Used by both providers.'),
@@ -90,11 +90,30 @@ export async function searchPlacesHandler(input: Record<string, unknown>) {
90
90
  if (isExecutionError(result)) {
91
91
  return { content: [{ type: 'text' as const, text: JSON.stringify(result) }], isError: true }
92
92
  }
93
- result.data = applyPlaceFilters(result.data, {
93
+ const filters = {
94
94
  minRating: asNumber(restInput.min_overall_rating),
95
95
  maxRating: asNumber(restInput.max_overall_rating),
96
96
  minReviews: asNumber(restInput.min_total_reviews),
97
97
  maxReviews: asNumber(restInput.max_total_reviews),
98
- })
98
+ }
99
+ const scraped = placesCount(result.data)
100
+ result.data = applyPlaceFilters(result.data, filters)
101
+ const matched = placesCount(result.data)
102
+ // Google Maps bills per place scraped upstream (what ColdIQ pays the provider),
103
+ // but rating/review filters are applied here client-side. When the filter trims
104
+ // the set, make the gap explicit so the credit charge isn't surprising.
105
+ if (scraped !== undefined && matched !== undefined && matched < scraped) {
106
+ ;(result._meta as Record<string, unknown>).filtered = {
107
+ scraped,
108
+ matched,
109
+ note: 'Rating/review filters are applied client-side. You are billed per place scraped upstream (scraped), not per matched place.',
110
+ }
111
+ }
99
112
  return { content: [{ type: 'text' as const, text: JSON.stringify(result) }] }
100
113
  }
114
+
115
+ function placesCount(data: unknown): number | undefined {
116
+ if (!data || typeof data !== 'object') return undefined
117
+ const places = (data as Record<string, unknown>).places
118
+ return Array.isArray(places) ? places.length : undefined
119
+ }
@@ -11,7 +11,7 @@ export const searchRedditSchema = {
11
11
  start_urls: z.array(z.string().url()).max(25).optional()
12
12
  .describe('Reddit URLs to scrape (subreddit, post, user, or search URL). Up to 25. Provide this and/or query. Example: ["https://www.reddit.com/r/sales/"]'),
13
13
  query: z.string().optional()
14
- .describe('Keyword search query run across Reddit e.g. "best CRM for startups". Provide this and/or start_urls.'),
14
+ .describe('Keyword search query e.g. "best CRM for startups". Provide this and/or start_urls. When combined with a bare subreddit start_url (e.g. ".../r/sales/"), the query is applied as an in-subreddit search so only matching posts are returned (a bare subreddit URL alone would otherwise return its whole feed, ignoring the keyword).'),
15
15
  search_type: z.enum(['posts', 'comments', 'communities', 'users']).default('posts')
16
16
  .describe('What the search query returns: posts, comments, communities, or users.'),
17
17
  search_community_name: z.string().optional()
@@ -309,6 +309,72 @@ describe('signalbase-job-change', () => {
309
309
  })
310
310
  })
311
311
 
312
+ // ---------------------------------------------------------------------------
313
+ // theirstack-intent-discovery
314
+ // ---------------------------------------------------------------------------
315
+
316
+ describe('theirstack-intent-discovery', () => {
317
+ const p = () => get('theirstack-intent-discovery')
318
+
319
+ it('routes to the company search endpoint', () => {
320
+ expect(p().endpoint).toBe('/theirstack/companies/search')
321
+ expect(p().method).toBe('POST')
322
+ })
323
+
324
+ it('isApplicable: true for intent with topics and no companies/domains', () => {
325
+ expect(p().isApplicable!({ signal_type: 'intent', topics: ['sales-automation'] })).toBe(true)
326
+ })
327
+
328
+ it('isApplicable: false when companies present (verify mode handles that)', () => {
329
+ expect(p().isApplicable!({ signal_type: 'intent', topics: ['sales-automation'], companies: ['ColdIQ'] })).toBe(false)
330
+ })
331
+
332
+ it('isApplicable: false when domains present', () => {
333
+ expect(p().isApplicable!({ signal_type: 'intent', topics: ['sales-automation'], domains: ['coldiq.com'] })).toBe(false)
334
+ })
335
+
336
+ it('isApplicable: false when no topics', () => {
337
+ expect(p().isApplicable!({ signal_type: 'intent' })).toBe(false)
338
+ })
339
+
340
+ it('isApplicable: false for other signal types', () => {
341
+ expect(p().isApplicable!({ signal_type: 'funding', topics: ['x'] })).toBe(false)
342
+ })
343
+
344
+ it('mapParams forwards topics as company_keyword_slug_or', () => {
345
+ const result = p().mapParams({ signal_type: 'intent', topics: ['sales-automation', 'lead-generation'], limit: 20 })
346
+ const body = result.body as Record<string, unknown>
347
+ expect(body.company_keyword_slug_or).toEqual(['sales-automation', 'lead-generation'])
348
+ expect(body.limit).toBe(20)
349
+ expect(body.include_total_results).toBe(true)
350
+ })
351
+
352
+ it('mapParams forwards industries and countries when present', () => {
353
+ const result = p().mapParams({
354
+ signal_type: 'intent',
355
+ topics: ['sales-automation'],
356
+ industries: ['Software'],
357
+ countries: ['US', 'GB'],
358
+ })
359
+ const body = result.body as Record<string, unknown>
360
+ expect(body.industry_or).toEqual(['Software'])
361
+ expect(body.company_country_code_or).toEqual(['US', 'GB'])
362
+ })
363
+
364
+ it('mapParams caps limit at 100', () => {
365
+ const result = p().mapParams({ signal_type: 'intent', topics: ['x'], limit: 999 })
366
+ expect((result.body as Record<string, unknown>).limit).toBe(100)
367
+ })
368
+
369
+ it('hasResult: true when data non-empty', () => {
370
+ expect(p().hasResult({ data: [{ name: 'ColdIQ' }] })).toBe(true)
371
+ })
372
+
373
+ it('hasResult: false on empty data', () => {
374
+ expect(p().hasResult({ data: [] })).toBe(false)
375
+ })
376
+ })
377
+
312
378
  // ---------------------------------------------------------------------------
313
379
  // theirstack-buying-intents
314
380
  // ---------------------------------------------------------------------------