@coldiq/mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. package/dist/client.d.ts +8 -0
  2. package/dist/client.d.ts.map +1 -0
  3. package/dist/client.js +47 -0
  4. package/dist/client.js.map +1 -0
  5. package/dist/executor.d.ts +21 -0
  6. package/dist/executor.d.ts.map +1 -0
  7. package/dist/executor.js +130 -0
  8. package/dist/executor.js.map +1 -0
  9. package/dist/index.d.ts +3 -0
  10. package/dist/index.d.ts.map +1 -0
  11. package/dist/index.js +49 -0
  12. package/dist/index.js.map +1 -0
  13. package/dist/registry.d.ts +49 -0
  14. package/dist/registry.d.ts.map +1 -0
  15. package/dist/registry.js +3104 -0
  16. package/dist/registry.js.map +1 -0
  17. package/dist/tools/enrich-company.d.ts +22 -0
  18. package/dist/tools/enrich-company.d.ts.map +1 -0
  19. package/dist/tools/enrich-company.js +21 -0
  20. package/dist/tools/enrich-company.js.map +1 -0
  21. package/dist/tools/enrich-email.d.ts +24 -0
  22. package/dist/tools/enrich-email.d.ts.map +1 -0
  23. package/dist/tools/enrich-email.js +19 -0
  24. package/dist/tools/enrich-email.js.map +1 -0
  25. package/dist/tools/enrich-emails.d.ts +31 -0
  26. package/dist/tools/enrich-emails.d.ts.map +1 -0
  27. package/dist/tools/enrich-emails.js +146 -0
  28. package/dist/tools/enrich-emails.js.map +1 -0
  29. package/dist/tools/enrich-person.d.ts +26 -0
  30. package/dist/tools/enrich-person.d.ts.map +1 -0
  31. package/dist/tools/enrich-person.js +23 -0
  32. package/dist/tools/enrich-person.js.map +1 -0
  33. package/dist/tools/fetch-page-content.d.ts +22 -0
  34. package/dist/tools/fetch-page-content.d.ts.map +1 -0
  35. package/dist/tools/fetch-page-content.js +32 -0
  36. package/dist/tools/fetch-page-content.js.map +1 -0
  37. package/dist/tools/find-email.d.ts +24 -0
  38. package/dist/tools/find-email.d.ts.map +1 -0
  39. package/dist/tools/find-email.js +19 -0
  40. package/dist/tools/find-email.js.map +1 -0
  41. package/dist/tools/find-emails.d.ts +31 -0
  42. package/dist/tools/find-emails.d.ts.map +1 -0
  43. package/dist/tools/find-emails.js +146 -0
  44. package/dist/tools/find-emails.js.map +1 -0
  45. package/dist/tools/find-influencers.d.ts +29 -0
  46. package/dist/tools/find-influencers.d.ts.map +1 -0
  47. package/dist/tools/find-influencers.js +30 -0
  48. package/dist/tools/find-influencers.js.map +1 -0
  49. package/dist/tools/find-people.d.ts +26 -0
  50. package/dist/tools/find-people.d.ts.map +1 -0
  51. package/dist/tools/find-people.js +61 -0
  52. package/dist/tools/find-people.js.map +1 -0
  53. package/dist/tools/find-phone.d.ts +24 -0
  54. package/dist/tools/find-phone.d.ts.map +1 -0
  55. package/dist/tools/find-phone.js +48 -0
  56. package/dist/tools/find-phone.js.map +1 -0
  57. package/dist/tools/find-signals.d.ts +26 -0
  58. package/dist/tools/find-signals.d.ts.map +1 -0
  59. package/dist/tools/find-signals.js +82 -0
  60. package/dist/tools/find-signals.js.map +1 -0
  61. package/dist/tools/search-ads.d.ts +33 -0
  62. package/dist/tools/search-ads.d.ts.map +1 -0
  63. package/dist/tools/search-ads.js +33 -0
  64. package/dist/tools/search-ads.js.map +1 -0
  65. package/dist/tools/search-companies.d.ts +42 -0
  66. package/dist/tools/search-companies.d.ts.map +1 -0
  67. package/dist/tools/search-companies.js +37 -0
  68. package/dist/tools/search-companies.js.map +1 -0
  69. package/dist/tools/search-jobs.d.ts +51 -0
  70. package/dist/tools/search-jobs.d.ts.map +1 -0
  71. package/dist/tools/search-jobs.js +64 -0
  72. package/dist/tools/search-jobs.js.map +1 -0
  73. package/dist/tools/search-places.d.ts +47 -0
  74. package/dist/tools/search-places.d.ts.map +1 -0
  75. package/dist/tools/search-places.js +42 -0
  76. package/dist/tools/search-places.js.map +1 -0
  77. package/dist/tools/search-reddit.d.ts +27 -0
  78. package/dist/tools/search-reddit.d.ts.map +1 -0
  79. package/dist/tools/search-reddit.js +30 -0
  80. package/dist/tools/search-reddit.js.map +1 -0
  81. package/dist/tools/search-seo.d.ts +37 -0
  82. package/dist/tools/search-seo.d.ts.map +1 -0
  83. package/dist/tools/search-seo.js +49 -0
  84. package/dist/tools/search-seo.js.map +1 -0
  85. package/dist/tools/search-web.d.ts +23 -0
  86. package/dist/tools/search-web.d.ts.map +1 -0
  87. package/dist/tools/search-web.js +20 -0
  88. package/dist/tools/search-web.js.map +1 -0
  89. package/dist/tools/verify-email.d.ts +20 -0
  90. package/dist/tools/verify-email.d.ts.map +1 -0
  91. package/dist/tools/verify-email.js +15 -0
  92. package/dist/tools/verify-email.js.map +1 -0
  93. package/package.json +28 -0
  94. package/src/client.ts +60 -0
  95. package/src/executor.ts +182 -0
  96. package/src/index.ts +155 -0
  97. package/src/registry.ts +3159 -0
  98. package/src/tools/enrich-company.ts +25 -0
  99. package/src/tools/enrich-person.ts +27 -0
  100. package/src/tools/fetch-page-content.ts +36 -0
  101. package/src/tools/find-email.ts +23 -0
  102. package/src/tools/find-emails.ts +190 -0
  103. package/src/tools/find-influencers.ts +34 -0
  104. package/src/tools/find-people.ts +69 -0
  105. package/src/tools/find-phone.ts +53 -0
  106. package/src/tools/find-signals.ts +93 -0
  107. package/src/tools/search-ads.ts +44 -0
  108. package/src/tools/search-companies.ts +41 -0
  109. package/src/tools/search-jobs.ts +73 -0
  110. package/src/tools/search-places.ts +52 -0
  111. package/src/tools/search-reddit.ts +34 -0
  112. package/src/tools/search-seo.ts +59 -0
  113. package/src/tools/search-web.ts +24 -0
  114. package/src/tools/verify-email.ts +19 -0
  115. package/test-ads-live.ts +77 -0
  116. package/test-company-live.ts +91 -0
  117. package/test-email-live.ts +171 -0
  118. package/test-influencers-live.ts +66 -0
  119. package/test-jobs-live.ts +69 -0
  120. package/test-linkupapi-live.ts +137 -0
  121. package/test-phone-live.ts +41 -0
  122. package/test-places-live.ts +89 -0
  123. package/test-reddit-live.ts +66 -0
  124. package/test-search-live.ts +79 -0
  125. package/test-seo-live.ts +68 -0
  126. package/test-web-live.ts +67 -0
  127. package/tests/client.test.ts +90 -0
  128. package/tests/executor.test.ts +83 -0
  129. package/tests/gtm/01-icp-to-emails.test.ts +43 -0
  130. package/tests/gtm/02-icp-bulk-emails.test.ts +38 -0
  131. package/tests/gtm/03-icp-to-phones.test.ts +39 -0
  132. package/tests/gtm/04-funding-signal-outreach.test.ts +42 -0
  133. package/tests/gtm/05-hiring-signal-decisionmakers.test.ts +41 -0
  134. package/tests/gtm/06-intent-signal-outreach.test.ts +44 -0
  135. package/tests/gtm/07-places-to-content.test.ts +50 -0
  136. package/tests/gtm/08-domain-to-account.test.ts +44 -0
  137. package/tests/gtm/09-linkedin-to-everything.test.ts +41 -0
  138. package/tests/gtm/10-jobs-vs-signals-routing.test.ts +38 -0
  139. package/tests/gtm/11-find-vs-enrich-routing.test.ts +39 -0
  140. package/tests/gtm/12-bogus-domain-graceful.test.ts +42 -0
  141. package/tests/gtm/13-private-linkedin-graceful.test.ts +44 -0
  142. package/tests/gtm/14-empty-handoff.test.ts +43 -0
  143. package/tests/gtm/15-seo-reddit-research.test.ts +38 -0
  144. package/tests/gtm/README.md +59 -0
  145. package/tests/gtm/harness.ts +217 -0
  146. package/tests/gtm/tools-bridge.ts +232 -0
  147. package/tests/gtm-scenarios.md +32 -0
  148. package/tests/live/smoke-report.ts +255 -0
  149. package/tests/live/smoke.test.ts +134 -0
  150. package/tests/registry-enrich-person.test.ts +447 -0
  151. package/tests/registry-fetch-page-content.test.ts +90 -0
  152. package/tests/registry-find-people.test.ts +467 -0
  153. package/tests/registry-find-signals.test.ts +470 -0
  154. package/tests/registry-linkupapi.test.ts +331 -0
  155. package/tests/registry-search-companies.test.ts +188 -0
  156. package/tests/registry-search-jobs.test.ts +116 -0
  157. package/tests/registry.test.ts +2210 -0
  158. package/tests/tools/enrich-company.test.ts +92 -0
  159. package/tests/tools/enrich-email.test.ts +94 -0
  160. package/tests/tools/enrich-emails.test.ts +271 -0
  161. package/tests/tools/enrich-person.test.ts +140 -0
  162. package/tests/tools/fetch-page-content.test.ts +108 -0
  163. package/tests/tools/find-influencers.test.ts +91 -0
  164. package/tests/tools/find-people.test.ts +344 -0
  165. package/tests/tools/find-phone.test.ts +100 -0
  166. package/tests/tools/find-signals.test.ts +110 -0
  167. package/tests/tools/search-ads.test.ts +182 -0
  168. package/tests/tools/search-companies.test.ts +58 -0
  169. package/tests/tools/search-jobs.test.ts +210 -0
  170. package/tests/tools/search-places.test.ts +114 -0
  171. package/tests/tools/search-reddit.test.ts +125 -0
  172. package/tests/tools/search-seo.test.ts +183 -0
  173. package/tests/tools/search-web.test.ts +79 -0
  174. package/tests/tools/verify-email.test.ts +68 -0
  175. package/tsconfig.json +17 -0
  176. package/vitest.config.ts +7 -0
@@ -0,0 +1,42 @@
1
+ /**
2
+ * GTM-12: Edge case — nonexistent domain, graceful no-result
3
+ *
4
+ * The email lookup for a clearly fake domain should:
5
+ * 1. Call find_email (not crash, not skip the call)
6
+ * 2. Return no result (not a server error on our side)
7
+ * 3. Agent reports "not found" gracefully — does NOT proceed to verify_email
8
+ *
9
+ * Expected: find_email called, isError falsy OR explicit no-result response
10
+ * Forbidden: verify_email (nothing to verify when email is not found), crash
11
+ * Est. credits: ~1
12
+ */
13
+
14
+ import { describe, it, expect, beforeAll } from 'vitest'
15
+ import { initClient } from '../../src/client.js'
16
+ import { runAgent, expectTool, expectNoTool, writeArtifact } from './harness.js'
17
+
18
+ const RUN_LIVE =
19
+ process.env.LIVE_TESTS === '1' && !!process.env.ANTHROPIC_API_KEY && !!process.env.COLDIQ_API_KEY
20
+
21
+ describe.runIf(RUN_LIVE)('gtm-12: bogus domain → graceful no-result', () => {
22
+ beforeAll(() => {
23
+ initClient(process.env.COLDIQ_API_URL, process.env.COLDIQ_API_KEY)
24
+ })
25
+
26
+ it('calls find_email, gets no result, does not proceed to verify, reports gracefully', async () => {
27
+ const result = await runAgent({
28
+ prompt:
29
+ 'Find the professional email for Jane Smith at the company xyz-totally-fake-domain-9823746.io. ' +
30
+ 'Their domain is xyz-totally-fake-domain-9823746.io.',
31
+ maxToolCalls: 4,
32
+ })
33
+
34
+ expectTool(result.transcript, 'find_email')
35
+ expectNoTool(result.transcript, 'verify_email')
36
+
37
+ // Final text should mention failure gracefully, not a 500 error
38
+ expect(result.finalText.length, 'Agent should produce a final response').toBeGreaterThan(0)
39
+
40
+ writeArtifact('12-bogus-domain-graceful', result)
41
+ }, 3 * 60_000)
42
+ })
@@ -0,0 +1,44 @@
1
+ /**
2
+ * GTM-13: Edge case — private/nonexistent LinkedIn profile, graceful empty result
3
+ *
4
+ * find_phone on a profile with no public phone should:
5
+ * 1. Call find_phone (not skip, not enrich_person instead)
6
+ * 2. Return empty or no-result — not a thrown error
7
+ * 3. Agent acknowledges no phone was found
8
+ *
9
+ * Expected: find_phone called, result gracefully empty
10
+ * Forbidden: enrich_person (wrong tool for phone lookup), crash
11
+ * Est. credits: ~2
12
+ */
13
+
14
+ import { describe, it, expect, beforeAll } from 'vitest'
15
+ import { initClient } from '../../src/client.js'
16
+ import { runAgent, expectTool, expectNoTool, writeArtifact } from './harness.js'
17
+
18
+ const RUN_LIVE =
19
+ process.env.LIVE_TESTS === '1' && !!process.env.ANTHROPIC_API_KEY && !!process.env.COLDIQ_API_KEY
20
+
21
+ describe.runIf(RUN_LIVE)('gtm-13: private/nonexistent LinkedIn → graceful phone result', () => {
22
+ beforeAll(() => {
23
+ initClient(process.env.COLDIQ_API_URL, process.env.COLDIQ_API_KEY)
24
+ })
25
+
26
+ it('calls find_phone on a profile that has no public number, reports gracefully', async () => {
27
+ const result = await runAgent({
28
+ prompt:
29
+ 'Get the phone number for the person at this LinkedIn URL: ' +
30
+ 'https://www.linkedin.com/in/john-doe-private-profile-xyz99887766. ' +
31
+ "Just the phone number. If you can't find one, say so.",
32
+ maxToolCalls: 4,
33
+ })
34
+
35
+ expectTool(result.transcript, 'find_phone')
36
+ expectNoTool(result.transcript, 'enrich_person')
37
+
38
+ // Agent must produce a final text — no crash, no infinite retry
39
+ expect(result.finalText.length, 'Agent should produce a final response').toBeGreaterThan(0)
40
+ expect(result.transcript.length, 'Should not retry excessively').toBeLessThanOrEqual(4)
41
+
42
+ writeArtifact('13-private-linkedin-graceful', result)
43
+ }, 4 * 60_000)
44
+ })
@@ -0,0 +1,43 @@
1
+ /**
2
+ * GTM-14: Edge case — nonexistent company domain → empty enrich → agent stops cleanly
3
+ *
4
+ * When enrich_company returns no data for a clearly fake domain, the agent must NOT
5
+ * call find_people (nothing to hand off). Tests the empty-handoff stop behaviour.
6
+ *
7
+ * Note: search_companies with impossible filters is NOT reliable for this test because
8
+ * all live providers use semantic/approximate matching and always return something.
9
+ * enrich_company on a fake domain is the stable zero-result signal.
10
+ *
11
+ * Expected: enrich_company called, returns empty/error, agent stops
12
+ * Forbidden: find_people called after empty enrich_company result
13
+ * Est. credits: ~1
14
+ */
15
+
16
+ import { describe, it, expect, beforeAll } from 'vitest'
17
+ import { initClient } from '../../src/client.js'
18
+ import { runAgent, expectTool, expectNoTool, writeArtifact } from './harness.js'
19
+
20
+ const RUN_LIVE =
21
+ process.env.LIVE_TESTS === '1' && !!process.env.ANTHROPIC_API_KEY && !!process.env.COLDIQ_API_KEY
22
+
23
+ describe.runIf(RUN_LIVE)('gtm-14: empty enrich result → agent stops chain cleanly', () => {
24
+ beforeAll(() => {
25
+ initClient(process.env.COLDIQ_API_URL, process.env.COLDIQ_API_KEY)
26
+ })
27
+
28
+ it('does not call find_people when enrich_company returns no data', async () => {
29
+ const result = await runAgent({
30
+ prompt:
31
+ 'Enrich the company at domain xxy--totally-fake-99823746.io and then find their CEO.',
32
+ maxToolCalls: 6,
33
+ })
34
+
35
+ expectTool(result.transcript, 'enrich_company')
36
+ expectNoTool(result.transcript, 'find_people')
37
+
38
+ // Agent should explain there are no results — not crash or make up a company
39
+ expect(result.finalText.length, 'Agent should produce a final response').toBeGreaterThan(0)
40
+
41
+ writeArtifact('14-empty-handoff', result)
42
+ }, 4 * 60_000)
43
+ })
@@ -0,0 +1,38 @@
1
+ /**
2
+ * GTM-15: Market research — SEO data + Reddit sentiment (parallel tools)
3
+ *
4
+ * Prompt: GTM engineer wants to understand market positioning and community sentiment.
5
+ * Expected flow: search_seo AND search_reddit (both must be called; order may vary)
6
+ * Forbidden: search_web (for SERP queries, use search_seo; for community, use search_reddit)
7
+ * Est. credits: ~4
8
+ */
9
+
10
+ import { describe, it, beforeAll } from 'vitest'
11
+ import { initClient } from '../../src/client.js'
12
+ import { runAgent, expectTool, expectNoTool, writeArtifact } from './harness.js'
13
+
14
+ const RUN_LIVE =
15
+ process.env.LIVE_TESTS === '1' && !!process.env.ANTHROPIC_API_KEY && !!process.env.COLDIQ_API_KEY
16
+
17
+ describe.runIf(RUN_LIVE)('gtm-15: SEO SERP + Reddit sentiment research', () => {
18
+ beforeAll(() => {
19
+ initClient(process.env.COLDIQ_API_URL, process.env.COLDIQ_API_KEY)
20
+ })
21
+
22
+ it('calls both search_seo and search_reddit for market research', async () => {
23
+ const result = await runAgent({
24
+ prompt:
25
+ "I want to understand the market for 'cold email tools'. " +
26
+ "1) Show me the top Google search results for the keyword 'cold email tool' (SERP, limit 2). " +
27
+ "2) Show me what people are saying about it on Reddit (limit 1 thread). " +
28
+ 'Give me both results separately. Do not use a generic web search for either.',
29
+ maxToolCalls: 6,
30
+ })
31
+
32
+ expectTool(result.transcript, 'search_seo')
33
+ expectTool(result.transcript, 'search_reddit')
34
+ expectNoTool(result.transcript, 'search_web')
35
+
36
+ writeArtifact('15-seo-reddit-research', result)
37
+ }, 8 * 60_000)
38
+ })
@@ -0,0 +1,59 @@
1
+ # GTM Prompt-Based Eval Suite
2
+
3
+ End-to-end scenarios that verify the MCP server routes plain-English prompts to the correct tools and produces valid data. Each file is independent and runnable individually.
4
+
5
+ ## Required env vars
6
+
7
+ | Var | Purpose |
8
+ |---|---|
9
+ | `COLDIQ_API_KEY` | ColdIQ API key (live provider calls) |
10
+ | `COLDIQ_API_URL` | API base URL (default: `http://localhost:8787`) |
11
+ | `ANTHROPIC_API_KEY` | Anthropic API key (Claude agent loop) |
12
+ | `AGENT_MODEL` | Override Claude model (default: `claude-sonnet-4-6`) |
13
+
14
+ ## Run commands
15
+
16
+ ```bash
17
+ cd mcp
18
+
19
+ # Full suite (~50 ColdIQ credits, ~$1-3 Anthropic, 5–15 min)
20
+ LIVE_TESTS=1 ANTHROPIC_API_KEY=… COLDIQ_API_KEY=… COLDIQ_API_URL=https://api.coldiq.com npm run test:gtm
21
+
22
+ # Single scenario (cheap, ~1–4 credits)
23
+ LIVE_TESTS=1 ANTHROPIC_API_KEY=… COLDIQ_API_KEY=… COLDIQ_API_URL=https://api.coldiq.com \
24
+ npx vitest run tests/gtm/11-find-vs-enrich-routing.test.ts --reporter=verbose
25
+
26
+ # Without LIVE_TESTS=1, all GTM tests are silently skipped (safe for CI)
27
+ npm test
28
+ ```
29
+
30
+ ## Scenario index
31
+
32
+ | File | Scenario | Key routing assertion | Est. credits |
33
+ |---|---|---|---|
34
+ | 01 | ICP → emails (CSV) | search_companies → find_people → find_email → verify_email | 4 |
35
+ | 02 | Multi-company bulk emails | find_people → find_emails (batch, not per-person) | 4 |
36
+ | 03 | Person → phone number | find_people → find_phone (not enrich_person) | 3 |
37
+ | 04 | Funding signal → outreach | find_signals(funding) → find_people → find_email | 4 |
38
+ | 05 | Job posting → VP Sales | search_jobs (not find_signals) → find_people | 4 |
39
+ | 06 | Intent signal → outreach | find_signals(intent) → find_people → find_email | 4 |
40
+ | 07 | Place → website content | search_places → fetch_page_content | 3 |
41
+ | 08 | Domain → account intel | enrich_company → find_people → find_email | 4 |
42
+ | 09 | LinkedIn URL → full profile | enrich_person → enrich_company (not find_people) | 3 |
43
+ | 10 | Routing: jobs vs signals | search_jobs only (NOT find_signals) | 2 |
44
+ | 11 | Routing: find_email vs enrich | find_email only (NOT enrich_person) | 1 |
45
+ | 12 | Bogus domain graceful | find_email → no result → no verify_email | 1 |
46
+ | 13 | Private LinkedIn graceful | find_phone → empty → agent stops | 2 |
47
+ | 14 | Empty handoff | search_companies → 0 results → no find_people | 1 |
48
+ | 15 | SEO + Reddit research | search_seo + search_reddit (not search_web) | 4 |
49
+
50
+ ## Artifacts
51
+
52
+ Each test run writes `tests/gtm/.artifacts/<slug>.json` with the full transcript: every tool called, its input params, the parsed output, elapsed time, and the `_meta.provider` field. Inspect these to debug failures or audit credit usage. The `.artifacts/` directory is gitignored.
53
+
54
+ ## What a failure means
55
+
56
+ - **Wrong tool called** or **expected tool not called** → routing bug in a tool description or schema. Fix the description, re-run.
57
+ - **Tool called with wrong params** → schema field description is unclear. Fix the `describe()` on the Zod field.
58
+ - **`isError: true` on every provider** → provider is down or API key is invalid. Not an MCP bug; re-run later.
59
+ - **Timeout** → async provider (search_jobs, search_places, search_reddit) is slow. Increase timeout or check provider health.
@@ -0,0 +1,217 @@
1
+ import Anthropic from '@anthropic-ai/sdk'
2
+ import { zodToJsonSchema } from 'zod-to-json-schema'
3
+ import * as fs from 'node:fs'
4
+ import * as path from 'node:path'
5
+ import * as url from 'node:url'
6
+
7
+ import { MCP_TOOLS } from './tools-bridge.js'
8
+
9
+ export interface ToolCall {
10
+ tool: string
11
+ input: Record<string, unknown>
12
+ output: Record<string, unknown>
13
+ ms: number
14
+ provider?: string
15
+ isError?: boolean
16
+ }
17
+
18
+ export interface AgentResult {
19
+ transcript: ToolCall[]
20
+ finalText: string
21
+ totalMs: number
22
+ }
23
+
24
+ export interface RunAgentOptions {
25
+ prompt: string
26
+ maxToolCalls?: number
27
+ model?: string
28
+ }
29
+
30
+ const SYSTEM_PROMPT = `You are a GTM engineer assistant with access to ColdIQ intelligence tools. Use them to answer the user's request.
31
+
32
+ Rules:
33
+ - Always use the minimum sample size: limit 1, max_profiles 1, or the smallest valid value for every size/count parameter.
34
+ - Never invent company names, domains, emails, LinkedIn URLs, or IDs. Only use values returned by tools.
35
+ - When chaining tools, pass actual values from the previous tool's output — never fabricate data.
36
+ - If a tool returns no results, report that clearly and stop the chain. Do not retry with fabricated inputs.
37
+ - Always call tools to answer the user's request — do not answer from your own training knowledge.`
38
+
39
+ export async function runAgent(options: RunAgentOptions): Promise<AgentResult> {
40
+ const {
41
+ prompt,
42
+ maxToolCalls = 8,
43
+ model = process.env.AGENT_MODEL ?? 'claude-sonnet-4-6',
44
+ } = options
45
+
46
+ const client = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY })
47
+
48
+ const anthropicTools: Anthropic.Tool[] = MCP_TOOLS.map((t) => ({
49
+ name: t.name,
50
+ description: t.description,
51
+ input_schema: zodToJsonSchema(t.schema, { target: 'openApi3' }) as Anthropic.Tool['input_schema'],
52
+ }))
53
+
54
+ const messages: Anthropic.MessageParam[] = [{ role: 'user', content: prompt }]
55
+ const transcript: ToolCall[] = []
56
+ const startMs = Date.now()
57
+ let finalText = ''
58
+
59
+ for (let round = 0; round < maxToolCalls; round++) {
60
+ const response = await client.messages.create({
61
+ model,
62
+ max_tokens: 4096,
63
+ system: SYSTEM_PROMPT,
64
+ tools: anthropicTools,
65
+ messages,
66
+ })
67
+
68
+ if (response.stop_reason !== 'tool_use') {
69
+ finalText = response.content
70
+ .filter((b): b is Anthropic.TextBlock => b.type === 'text')
71
+ .map((b) => b.text)
72
+ .join('\n')
73
+ break
74
+ }
75
+
76
+ const assistantContent = response.content
77
+ messages.push({ role: 'assistant', content: assistantContent })
78
+
79
+ const toolResults: Anthropic.ToolResultBlockParam[] = []
80
+
81
+ for (const block of assistantContent) {
82
+ if (block.type !== 'tool_use') continue
83
+
84
+ const entry = MCP_TOOLS.find((t) => t.name === block.name)
85
+
86
+ if (!entry) {
87
+ toolResults.push({
88
+ type: 'tool_result',
89
+ tool_use_id: block.id,
90
+ content: JSON.stringify({ error: `Unknown tool: ${block.name}` }),
91
+ is_error: true,
92
+ })
93
+ continue
94
+ }
95
+
96
+ const t0 = Date.now()
97
+ let rawResult: { content: { text: string }[]; isError?: boolean }
98
+
99
+ try {
100
+ rawResult = await entry.handler(block.input as Record<string, unknown>)
101
+ } catch (err) {
102
+ rawResult = {
103
+ content: [{ text: JSON.stringify({ error: String(err) }) }],
104
+ isError: true,
105
+ }
106
+ }
107
+
108
+ const ms = Date.now() - t0
109
+
110
+ let parsed: Record<string, unknown> = {}
111
+ try {
112
+ parsed = JSON.parse(rawResult.content[0].text) as Record<string, unknown>
113
+ } catch {
114
+ parsed = { raw: rawResult.content[0].text }
115
+ }
116
+
117
+ const meta = parsed._meta as Record<string, unknown> | undefined
118
+
119
+ transcript.push({
120
+ tool: block.name,
121
+ input: block.input as Record<string, unknown>,
122
+ output: parsed,
123
+ ms,
124
+ provider: meta?.provider as string | undefined,
125
+ isError: rawResult.isError,
126
+ })
127
+
128
+ toolResults.push({
129
+ type: 'tool_result',
130
+ tool_use_id: block.id,
131
+ content: rawResult.content[0].text,
132
+ is_error: rawResult.isError,
133
+ })
134
+ }
135
+
136
+ messages.push({ role: 'user', content: toolResults })
137
+ }
138
+
139
+ return { transcript, finalText, totalMs: Date.now() - startMs }
140
+ }
141
+
142
+ // ---------------------------------------------------------------------------
143
+ // Assertion helpers — all throw with descriptive messages on failure
144
+ // ---------------------------------------------------------------------------
145
+
146
+ export function expectTool(transcript: ToolCall[], name: string): void {
147
+ if (!transcript.some((c) => c.tool === name)) {
148
+ const called = transcript.map((c) => c.tool).join(', ') || '(none)'
149
+ throw new Error(`Expected tool "${name}" to be called. Tools actually called: [${called}]`)
150
+ }
151
+ }
152
+
153
+ export function expectNoTool(transcript: ToolCall[], name: string): void {
154
+ const hit = transcript.find((c) => c.tool === name)
155
+ if (hit) {
156
+ throw new Error(
157
+ `Expected tool "${name}" NOT to be called, but it was called with: ${JSON.stringify(hit.input)}`,
158
+ )
159
+ }
160
+ }
161
+
162
+ export function expectOrder(transcript: ToolCall[], order: string[]): void {
163
+ const indices = order.map((name) => {
164
+ const idx = transcript.findIndex((c) => c.tool === name)
165
+ if (idx === -1)
166
+ throw new Error(`Cannot enforce ordering: tool "${name}" was never called. Transcript: [${transcript.map((c) => c.tool).join(', ')}]`)
167
+ return { name, idx }
168
+ })
169
+
170
+ for (let i = 1; i < indices.length; i++) {
171
+ const prev = indices[i - 1]
172
+ const curr = indices[i]
173
+ if (curr.idx <= prev.idx) {
174
+ throw new Error(`Expected "${prev.name}" (pos ${prev.idx}) before "${curr.name}" (pos ${curr.idx})`)
175
+ }
176
+ }
177
+ }
178
+
179
+ export function expectParam(
180
+ transcript: ToolCall[],
181
+ toolName: string,
182
+ key: string,
183
+ predicate: (value: unknown) => boolean,
184
+ ): void {
185
+ const calls = transcript.filter((c) => c.tool === toolName)
186
+ if (calls.length === 0) throw new Error(`Tool "${toolName}" was not called — cannot check param "${key}"`)
187
+ if (!calls.some((c) => predicate(c.input[key]))) {
188
+ throw new Error(
189
+ `Tool "${toolName}" was called but param "${key}" did not satisfy predicate. Values: ${JSON.stringify(calls.map((c) => c.input[key]))}`,
190
+ )
191
+ }
192
+ }
193
+
194
+ export function expectFinalContains(finalText: string, substrings: string[]): void {
195
+ for (const sub of substrings) {
196
+ if (!finalText.toLowerCase().includes(sub.toLowerCase())) {
197
+ const preview = finalText.slice(0, 400)
198
+ throw new Error(`Expected final text to contain "${sub}". Got (first 400 chars): ${preview}`)
199
+ }
200
+ }
201
+ }
202
+
203
+ // ---------------------------------------------------------------------------
204
+ // Artifact writer — writes JSON transcript to tests/gtm/.artifacts/<slug>.json
205
+ // ---------------------------------------------------------------------------
206
+
207
+ const __dirname = path.dirname(url.fileURLToPath(import.meta.url))
208
+
209
+ export function writeArtifact(slug: string, result: AgentResult): void {
210
+ const dir = path.join(__dirname, '.artifacts')
211
+ if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true })
212
+ const file = path.join(dir, `${slug}.json`)
213
+ fs.writeFileSync(
214
+ file,
215
+ JSON.stringify({ scenario: slug, timestamp: new Date().toISOString(), ...result }, null, 2),
216
+ )
217
+ }
@@ -0,0 +1,232 @@
1
+ import { z } from 'zod'
2
+
3
+ import {
4
+ searchCompaniesName,
5
+ searchCompaniesDescription,
6
+ searchCompaniesSchema,
7
+ searchCompaniesHandler,
8
+ } from '../../src/tools/search-companies.js'
9
+
10
+ import {
11
+ findPeopleName,
12
+ findPeopleDescription,
13
+ findPeopleSchema,
14
+ findPeopleHandler,
15
+ } from '../../src/tools/find-people.js'
16
+
17
+ import {
18
+ findEmailName,
19
+ findEmailDescription,
20
+ findEmailSchema,
21
+ findEmailHandler,
22
+ } from '../../src/tools/find-email.js'
23
+
24
+ import {
25
+ findEmailsName,
26
+ findEmailsDescription,
27
+ findEmailsSchema,
28
+ findEmailsHandler,
29
+ } from '../../src/tools/find-emails.js'
30
+
31
+ import {
32
+ verifyEmailName,
33
+ verifyEmailDescription,
34
+ verifyEmailSchema,
35
+ verifyEmailHandler,
36
+ } from '../../src/tools/verify-email.js'
37
+
38
+ import {
39
+ findPhoneName,
40
+ findPhoneDescription,
41
+ findPhoneSchema,
42
+ findPhoneHandler,
43
+ } from '../../src/tools/find-phone.js'
44
+
45
+ import {
46
+ enrichCompanyName,
47
+ enrichCompanyDescription,
48
+ enrichCompanySchema,
49
+ enrichCompanyHandler,
50
+ } from '../../src/tools/enrich-company.js'
51
+
52
+ import {
53
+ enrichPersonName,
54
+ enrichPersonDescription,
55
+ enrichPersonSchema,
56
+ enrichPersonHandler,
57
+ } from '../../src/tools/enrich-person.js'
58
+
59
+ import {
60
+ searchWebName,
61
+ searchWebDescription,
62
+ searchWebSchema,
63
+ searchWebHandler,
64
+ } from '../../src/tools/search-web.js'
65
+
66
+ import {
67
+ searchJobsName,
68
+ searchJobsDescription,
69
+ searchJobsSchema,
70
+ searchJobsHandler,
71
+ } from '../../src/tools/search-jobs.js'
72
+
73
+ import {
74
+ searchAdsName,
75
+ searchAdsDescription,
76
+ searchAdsSchema,
77
+ searchAdsHandler,
78
+ } from '../../src/tools/search-ads.js'
79
+
80
+ import {
81
+ searchPlacesName,
82
+ searchPlacesDescription,
83
+ searchPlacesSchema,
84
+ searchPlacesHandler,
85
+ } from '../../src/tools/search-places.js'
86
+
87
+ import {
88
+ findInfluencersName,
89
+ findInfluencersDescription,
90
+ findInfluencersSchema,
91
+ findInfluencersHandler,
92
+ } from '../../src/tools/find-influencers.js'
93
+
94
+ import {
95
+ searchRedditName,
96
+ searchRedditDescription,
97
+ searchRedditSchema,
98
+ searchRedditHandler,
99
+ } from '../../src/tools/search-reddit.js'
100
+
101
+ import {
102
+ searchSeoName,
103
+ searchSeoDescription,
104
+ searchSeoSchema,
105
+ searchSeoHandler,
106
+ } from '../../src/tools/search-seo.js'
107
+
108
+ import {
109
+ findSignalsName,
110
+ findSignalsDescription,
111
+ findSignalsSchema,
112
+ findSignalsHandler,
113
+ } from '../../src/tools/find-signals.js'
114
+
115
+ import {
116
+ fetchPageContentName,
117
+ fetchPageContentDescription,
118
+ fetchPageContentSchema,
119
+ fetchPageContentHandler,
120
+ } from '../../src/tools/fetch-page-content.js'
121
+
122
+ export interface McpToolEntry {
123
+ name: string
124
+ description: string
125
+ schema: z.ZodObject<z.ZodRawShape>
126
+ handler: (input: Record<string, unknown>) => Promise<{ content: { text: string }[]; isError?: boolean }>
127
+ }
128
+
129
+ export const MCP_TOOLS: McpToolEntry[] = [
130
+ {
131
+ name: searchCompaniesName,
132
+ description: searchCompaniesDescription,
133
+ schema: z.object(searchCompaniesSchema),
134
+ handler: searchCompaniesHandler,
135
+ },
136
+ {
137
+ name: findPeopleName,
138
+ description: findPeopleDescription,
139
+ schema: z.object(findPeopleSchema),
140
+ handler: findPeopleHandler,
141
+ },
142
+ {
143
+ name: findEmailName,
144
+ description: findEmailDescription,
145
+ schema: z.object(findEmailSchema),
146
+ handler: findEmailHandler,
147
+ },
148
+ {
149
+ name: findEmailsName,
150
+ description: findEmailsDescription,
151
+ schema: z.object(findEmailsSchema),
152
+ handler: findEmailsHandler,
153
+ },
154
+ {
155
+ name: verifyEmailName,
156
+ description: verifyEmailDescription,
157
+ schema: z.object(verifyEmailSchema),
158
+ handler: verifyEmailHandler,
159
+ },
160
+ {
161
+ name: findPhoneName,
162
+ description: findPhoneDescription,
163
+ schema: z.object(findPhoneSchema),
164
+ handler: findPhoneHandler,
165
+ },
166
+ {
167
+ name: enrichCompanyName,
168
+ description: enrichCompanyDescription,
169
+ schema: z.object(enrichCompanySchema),
170
+ handler: enrichCompanyHandler,
171
+ },
172
+ {
173
+ name: enrichPersonName,
174
+ description: enrichPersonDescription,
175
+ schema: z.object(enrichPersonSchema),
176
+ handler: enrichPersonHandler,
177
+ },
178
+ {
179
+ name: searchWebName,
180
+ description: searchWebDescription,
181
+ schema: z.object(searchWebSchema),
182
+ handler: searchWebHandler,
183
+ },
184
+ {
185
+ name: searchJobsName,
186
+ description: searchJobsDescription,
187
+ schema: z.object(searchJobsSchema),
188
+ handler: searchJobsHandler,
189
+ },
190
+ {
191
+ name: searchAdsName,
192
+ description: searchAdsDescription,
193
+ schema: z.object(searchAdsSchema),
194
+ handler: searchAdsHandler,
195
+ },
196
+ {
197
+ name: searchPlacesName,
198
+ description: searchPlacesDescription,
199
+ schema: z.object(searchPlacesSchema),
200
+ handler: searchPlacesHandler,
201
+ },
202
+ {
203
+ name: findInfluencersName,
204
+ description: findInfluencersDescription,
205
+ schema: z.object(findInfluencersSchema),
206
+ handler: findInfluencersHandler,
207
+ },
208
+ {
209
+ name: searchRedditName,
210
+ description: searchRedditDescription,
211
+ schema: z.object(searchRedditSchema),
212
+ handler: searchRedditHandler,
213
+ },
214
+ {
215
+ name: searchSeoName,
216
+ description: searchSeoDescription,
217
+ schema: z.object(searchSeoSchema),
218
+ handler: searchSeoHandler,
219
+ },
220
+ {
221
+ name: findSignalsName,
222
+ description: findSignalsDescription,
223
+ schema: z.object(findSignalsSchema),
224
+ handler: findSignalsHandler,
225
+ },
226
+ {
227
+ name: fetchPageContentName,
228
+ description: fetchPageContentDescription,
229
+ schema: z.object(fetchPageContentSchema),
230
+ handler: fetchPageContentHandler,
231
+ },
232
+ ]