@coldiq/mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/client.d.ts +8 -0
- package/dist/client.d.ts.map +1 -0
- package/dist/client.js +47 -0
- package/dist/client.js.map +1 -0
- package/dist/executor.d.ts +21 -0
- package/dist/executor.d.ts.map +1 -0
- package/dist/executor.js +130 -0
- package/dist/executor.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +49 -0
- package/dist/index.js.map +1 -0
- package/dist/registry.d.ts +49 -0
- package/dist/registry.d.ts.map +1 -0
- package/dist/registry.js +3104 -0
- package/dist/registry.js.map +1 -0
- package/dist/tools/enrich-company.d.ts +22 -0
- package/dist/tools/enrich-company.d.ts.map +1 -0
- package/dist/tools/enrich-company.js +21 -0
- package/dist/tools/enrich-company.js.map +1 -0
- package/dist/tools/enrich-email.d.ts +24 -0
- package/dist/tools/enrich-email.d.ts.map +1 -0
- package/dist/tools/enrich-email.js +19 -0
- package/dist/tools/enrich-email.js.map +1 -0
- package/dist/tools/enrich-emails.d.ts +31 -0
- package/dist/tools/enrich-emails.d.ts.map +1 -0
- package/dist/tools/enrich-emails.js +146 -0
- package/dist/tools/enrich-emails.js.map +1 -0
- package/dist/tools/enrich-person.d.ts +26 -0
- package/dist/tools/enrich-person.d.ts.map +1 -0
- package/dist/tools/enrich-person.js +23 -0
- package/dist/tools/enrich-person.js.map +1 -0
- package/dist/tools/fetch-page-content.d.ts +22 -0
- package/dist/tools/fetch-page-content.d.ts.map +1 -0
- package/dist/tools/fetch-page-content.js +32 -0
- package/dist/tools/fetch-page-content.js.map +1 -0
- package/dist/tools/find-email.d.ts +24 -0
- package/dist/tools/find-email.d.ts.map +1 -0
- package/dist/tools/find-email.js +19 -0
- package/dist/tools/find-email.js.map +1 -0
- package/dist/tools/find-emails.d.ts +31 -0
- package/dist/tools/find-emails.d.ts.map +1 -0
- package/dist/tools/find-emails.js +146 -0
- package/dist/tools/find-emails.js.map +1 -0
- package/dist/tools/find-influencers.d.ts +29 -0
- package/dist/tools/find-influencers.d.ts.map +1 -0
- package/dist/tools/find-influencers.js +30 -0
- package/dist/tools/find-influencers.js.map +1 -0
- package/dist/tools/find-people.d.ts +26 -0
- package/dist/tools/find-people.d.ts.map +1 -0
- package/dist/tools/find-people.js +61 -0
- package/dist/tools/find-people.js.map +1 -0
- package/dist/tools/find-phone.d.ts +24 -0
- package/dist/tools/find-phone.d.ts.map +1 -0
- package/dist/tools/find-phone.js +48 -0
- package/dist/tools/find-phone.js.map +1 -0
- package/dist/tools/find-signals.d.ts +26 -0
- package/dist/tools/find-signals.d.ts.map +1 -0
- package/dist/tools/find-signals.js +82 -0
- package/dist/tools/find-signals.js.map +1 -0
- package/dist/tools/search-ads.d.ts +33 -0
- package/dist/tools/search-ads.d.ts.map +1 -0
- package/dist/tools/search-ads.js +33 -0
- package/dist/tools/search-ads.js.map +1 -0
- package/dist/tools/search-companies.d.ts +42 -0
- package/dist/tools/search-companies.d.ts.map +1 -0
- package/dist/tools/search-companies.js +37 -0
- package/dist/tools/search-companies.js.map +1 -0
- package/dist/tools/search-jobs.d.ts +51 -0
- package/dist/tools/search-jobs.d.ts.map +1 -0
- package/dist/tools/search-jobs.js +64 -0
- package/dist/tools/search-jobs.js.map +1 -0
- package/dist/tools/search-places.d.ts +47 -0
- package/dist/tools/search-places.d.ts.map +1 -0
- package/dist/tools/search-places.js +42 -0
- package/dist/tools/search-places.js.map +1 -0
- package/dist/tools/search-reddit.d.ts +27 -0
- package/dist/tools/search-reddit.d.ts.map +1 -0
- package/dist/tools/search-reddit.js +30 -0
- package/dist/tools/search-reddit.js.map +1 -0
- package/dist/tools/search-seo.d.ts +37 -0
- package/dist/tools/search-seo.d.ts.map +1 -0
- package/dist/tools/search-seo.js +49 -0
- package/dist/tools/search-seo.js.map +1 -0
- package/dist/tools/search-web.d.ts +23 -0
- package/dist/tools/search-web.d.ts.map +1 -0
- package/dist/tools/search-web.js +20 -0
- package/dist/tools/search-web.js.map +1 -0
- package/dist/tools/verify-email.d.ts +20 -0
- package/dist/tools/verify-email.d.ts.map +1 -0
- package/dist/tools/verify-email.js +15 -0
- package/dist/tools/verify-email.js.map +1 -0
- package/package.json +28 -0
- package/src/client.ts +60 -0
- package/src/executor.ts +182 -0
- package/src/index.ts +155 -0
- package/src/registry.ts +3159 -0
- package/src/tools/enrich-company.ts +25 -0
- package/src/tools/enrich-person.ts +27 -0
- package/src/tools/fetch-page-content.ts +36 -0
- package/src/tools/find-email.ts +23 -0
- package/src/tools/find-emails.ts +190 -0
- package/src/tools/find-influencers.ts +34 -0
- package/src/tools/find-people.ts +69 -0
- package/src/tools/find-phone.ts +53 -0
- package/src/tools/find-signals.ts +93 -0
- package/src/tools/search-ads.ts +44 -0
- package/src/tools/search-companies.ts +41 -0
- package/src/tools/search-jobs.ts +73 -0
- package/src/tools/search-places.ts +52 -0
- package/src/tools/search-reddit.ts +34 -0
- package/src/tools/search-seo.ts +59 -0
- package/src/tools/search-web.ts +24 -0
- package/src/tools/verify-email.ts +19 -0
- package/test-ads-live.ts +77 -0
- package/test-company-live.ts +91 -0
- package/test-email-live.ts +171 -0
- package/test-influencers-live.ts +66 -0
- package/test-jobs-live.ts +69 -0
- package/test-linkupapi-live.ts +137 -0
- package/test-phone-live.ts +41 -0
- package/test-places-live.ts +89 -0
- package/test-reddit-live.ts +66 -0
- package/test-search-live.ts +79 -0
- package/test-seo-live.ts +68 -0
- package/test-web-live.ts +67 -0
- package/tests/client.test.ts +90 -0
- package/tests/executor.test.ts +83 -0
- package/tests/gtm/01-icp-to-emails.test.ts +43 -0
- package/tests/gtm/02-icp-bulk-emails.test.ts +38 -0
- package/tests/gtm/03-icp-to-phones.test.ts +39 -0
- package/tests/gtm/04-funding-signal-outreach.test.ts +42 -0
- package/tests/gtm/05-hiring-signal-decisionmakers.test.ts +41 -0
- package/tests/gtm/06-intent-signal-outreach.test.ts +44 -0
- package/tests/gtm/07-places-to-content.test.ts +50 -0
- package/tests/gtm/08-domain-to-account.test.ts +44 -0
- package/tests/gtm/09-linkedin-to-everything.test.ts +41 -0
- package/tests/gtm/10-jobs-vs-signals-routing.test.ts +38 -0
- package/tests/gtm/11-find-vs-enrich-routing.test.ts +39 -0
- package/tests/gtm/12-bogus-domain-graceful.test.ts +42 -0
- package/tests/gtm/13-private-linkedin-graceful.test.ts +44 -0
- package/tests/gtm/14-empty-handoff.test.ts +43 -0
- package/tests/gtm/15-seo-reddit-research.test.ts +38 -0
- package/tests/gtm/README.md +59 -0
- package/tests/gtm/harness.ts +217 -0
- package/tests/gtm/tools-bridge.ts +232 -0
- package/tests/gtm-scenarios.md +32 -0
- package/tests/live/smoke-report.ts +255 -0
- package/tests/live/smoke.test.ts +134 -0
- package/tests/registry-enrich-person.test.ts +447 -0
- package/tests/registry-fetch-page-content.test.ts +90 -0
- package/tests/registry-find-people.test.ts +467 -0
- package/tests/registry-find-signals.test.ts +470 -0
- package/tests/registry-linkupapi.test.ts +331 -0
- package/tests/registry-search-companies.test.ts +188 -0
- package/tests/registry-search-jobs.test.ts +116 -0
- package/tests/registry.test.ts +2210 -0
- package/tests/tools/enrich-company.test.ts +92 -0
- package/tests/tools/enrich-email.test.ts +94 -0
- package/tests/tools/enrich-emails.test.ts +271 -0
- package/tests/tools/enrich-person.test.ts +140 -0
- package/tests/tools/fetch-page-content.test.ts +108 -0
- package/tests/tools/find-influencers.test.ts +91 -0
- package/tests/tools/find-people.test.ts +344 -0
- package/tests/tools/find-phone.test.ts +100 -0
- package/tests/tools/find-signals.test.ts +110 -0
- package/tests/tools/search-ads.test.ts +182 -0
- package/tests/tools/search-companies.test.ts +58 -0
- package/tests/tools/search-jobs.test.ts +210 -0
- package/tests/tools/search-places.test.ts +114 -0
- package/tests/tools/search-reddit.test.ts +125 -0
- package/tests/tools/search-seo.test.ts +183 -0
- package/tests/tools/search-web.test.ts +79 -0
- package/tests/tools/verify-email.test.ts +68 -0
- package/tsconfig.json +17 -0
- package/vitest.config.ts +7 -0
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GTM-12: Edge case — nonexistent domain, graceful no-result
|
|
3
|
+
*
|
|
4
|
+
* The email lookup for a clearly fake domain should:
|
|
5
|
+
* 1. Call find_email (not crash, not skip the call)
|
|
6
|
+
* 2. Return no result (not a server error on our side)
|
|
7
|
+
* 3. Agent reports "not found" gracefully — does NOT proceed to verify_email
|
|
8
|
+
*
|
|
9
|
+
* Expected: find_email called, isError falsy OR explicit no-result response
|
|
10
|
+
* Forbidden: verify_email (nothing to verify when email is not found), crash
|
|
11
|
+
* Est. credits: ~1
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { describe, it, expect, beforeAll } from 'vitest'
|
|
15
|
+
import { initClient } from '../../src/client.js'
|
|
16
|
+
import { runAgent, expectTool, expectNoTool, writeArtifact } from './harness.js'
|
|
17
|
+
|
|
18
|
+
const RUN_LIVE =
|
|
19
|
+
process.env.LIVE_TESTS === '1' && !!process.env.ANTHROPIC_API_KEY && !!process.env.COLDIQ_API_KEY
|
|
20
|
+
|
|
21
|
+
describe.runIf(RUN_LIVE)('gtm-12: bogus domain → graceful no-result', () => {
|
|
22
|
+
beforeAll(() => {
|
|
23
|
+
initClient(process.env.COLDIQ_API_URL, process.env.COLDIQ_API_KEY)
|
|
24
|
+
})
|
|
25
|
+
|
|
26
|
+
it('calls find_email, gets no result, does not proceed to verify, reports gracefully', async () => {
|
|
27
|
+
const result = await runAgent({
|
|
28
|
+
prompt:
|
|
29
|
+
'Find the professional email for Jane Smith at the company xyz-totally-fake-domain-9823746.io. ' +
|
|
30
|
+
'Their domain is xyz-totally-fake-domain-9823746.io.',
|
|
31
|
+
maxToolCalls: 4,
|
|
32
|
+
})
|
|
33
|
+
|
|
34
|
+
expectTool(result.transcript, 'find_email')
|
|
35
|
+
expectNoTool(result.transcript, 'verify_email')
|
|
36
|
+
|
|
37
|
+
// Final text should mention failure gracefully, not a 500 error
|
|
38
|
+
expect(result.finalText.length, 'Agent should produce a final response').toBeGreaterThan(0)
|
|
39
|
+
|
|
40
|
+
writeArtifact('12-bogus-domain-graceful', result)
|
|
41
|
+
}, 3 * 60_000)
|
|
42
|
+
})
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GTM-13: Edge case — private/nonexistent LinkedIn profile, graceful empty result
|
|
3
|
+
*
|
|
4
|
+
* find_phone on a profile with no public phone should:
|
|
5
|
+
* 1. Call find_phone (not skip, not enrich_person instead)
|
|
6
|
+
* 2. Return empty or no-result — not a thrown error
|
|
7
|
+
* 3. Agent acknowledges no phone was found
|
|
8
|
+
*
|
|
9
|
+
* Expected: find_phone called, result gracefully empty
|
|
10
|
+
* Forbidden: enrich_person (wrong tool for phone lookup), crash
|
|
11
|
+
* Est. credits: ~2
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { describe, it, expect, beforeAll } from 'vitest'
|
|
15
|
+
import { initClient } from '../../src/client.js'
|
|
16
|
+
import { runAgent, expectTool, expectNoTool, writeArtifact } from './harness.js'
|
|
17
|
+
|
|
18
|
+
const RUN_LIVE =
|
|
19
|
+
process.env.LIVE_TESTS === '1' && !!process.env.ANTHROPIC_API_KEY && !!process.env.COLDIQ_API_KEY
|
|
20
|
+
|
|
21
|
+
describe.runIf(RUN_LIVE)('gtm-13: private/nonexistent LinkedIn → graceful phone result', () => {
|
|
22
|
+
beforeAll(() => {
|
|
23
|
+
initClient(process.env.COLDIQ_API_URL, process.env.COLDIQ_API_KEY)
|
|
24
|
+
})
|
|
25
|
+
|
|
26
|
+
it('calls find_phone on a profile that has no public number, reports gracefully', async () => {
|
|
27
|
+
const result = await runAgent({
|
|
28
|
+
prompt:
|
|
29
|
+
'Get the phone number for the person at this LinkedIn URL: ' +
|
|
30
|
+
'https://www.linkedin.com/in/john-doe-private-profile-xyz99887766. ' +
|
|
31
|
+
"Just the phone number. If you can't find one, say so.",
|
|
32
|
+
maxToolCalls: 4,
|
|
33
|
+
})
|
|
34
|
+
|
|
35
|
+
expectTool(result.transcript, 'find_phone')
|
|
36
|
+
expectNoTool(result.transcript, 'enrich_person')
|
|
37
|
+
|
|
38
|
+
// Agent must produce a final text — no crash, no infinite retry
|
|
39
|
+
expect(result.finalText.length, 'Agent should produce a final response').toBeGreaterThan(0)
|
|
40
|
+
expect(result.transcript.length, 'Should not retry excessively').toBeLessThanOrEqual(4)
|
|
41
|
+
|
|
42
|
+
writeArtifact('13-private-linkedin-graceful', result)
|
|
43
|
+
}, 4 * 60_000)
|
|
44
|
+
})
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GTM-14: Edge case — nonexistent company domain → empty enrich → agent stops cleanly
|
|
3
|
+
*
|
|
4
|
+
* When enrich_company returns no data for a clearly fake domain, the agent must NOT
|
|
5
|
+
* call find_people (nothing to hand off). Tests the empty-handoff stop behaviour.
|
|
6
|
+
*
|
|
7
|
+
* Note: search_companies with impossible filters is NOT reliable for this test because
|
|
8
|
+
* all live providers use semantic/approximate matching and always return something.
|
|
9
|
+
* enrich_company on a fake domain is the stable zero-result signal.
|
|
10
|
+
*
|
|
11
|
+
* Expected: enrich_company called, returns empty/error, agent stops
|
|
12
|
+
* Forbidden: find_people called after empty enrich_company result
|
|
13
|
+
* Est. credits: ~1
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { describe, it, expect, beforeAll } from 'vitest'
|
|
17
|
+
import { initClient } from '../../src/client.js'
|
|
18
|
+
import { runAgent, expectTool, expectNoTool, writeArtifact } from './harness.js'
|
|
19
|
+
|
|
20
|
+
const RUN_LIVE =
|
|
21
|
+
process.env.LIVE_TESTS === '1' && !!process.env.ANTHROPIC_API_KEY && !!process.env.COLDIQ_API_KEY
|
|
22
|
+
|
|
23
|
+
describe.runIf(RUN_LIVE)('gtm-14: empty enrich result → agent stops chain cleanly', () => {
|
|
24
|
+
beforeAll(() => {
|
|
25
|
+
initClient(process.env.COLDIQ_API_URL, process.env.COLDIQ_API_KEY)
|
|
26
|
+
})
|
|
27
|
+
|
|
28
|
+
it('does not call find_people when enrich_company returns no data', async () => {
|
|
29
|
+
const result = await runAgent({
|
|
30
|
+
prompt:
|
|
31
|
+
'Enrich the company at domain xxy--totally-fake-99823746.io and then find their CEO.',
|
|
32
|
+
maxToolCalls: 6,
|
|
33
|
+
})
|
|
34
|
+
|
|
35
|
+
expectTool(result.transcript, 'enrich_company')
|
|
36
|
+
expectNoTool(result.transcript, 'find_people')
|
|
37
|
+
|
|
38
|
+
// Agent should explain there are no results — not crash or make up a company
|
|
39
|
+
expect(result.finalText.length, 'Agent should produce a final response').toBeGreaterThan(0)
|
|
40
|
+
|
|
41
|
+
writeArtifact('14-empty-handoff', result)
|
|
42
|
+
}, 4 * 60_000)
|
|
43
|
+
})
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GTM-15: Market research — SEO data + Reddit sentiment (parallel tools)
|
|
3
|
+
*
|
|
4
|
+
* Prompt: GTM engineer wants to understand market positioning and community sentiment.
|
|
5
|
+
* Expected flow: search_seo AND search_reddit (both must be called; order may vary)
|
|
6
|
+
* Forbidden: search_web (for SERP queries, use search_seo; for community, use search_reddit)
|
|
7
|
+
* Est. credits: ~4
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { describe, it, beforeAll } from 'vitest'
|
|
11
|
+
import { initClient } from '../../src/client.js'
|
|
12
|
+
import { runAgent, expectTool, expectNoTool, writeArtifact } from './harness.js'
|
|
13
|
+
|
|
14
|
+
const RUN_LIVE =
|
|
15
|
+
process.env.LIVE_TESTS === '1' && !!process.env.ANTHROPIC_API_KEY && !!process.env.COLDIQ_API_KEY
|
|
16
|
+
|
|
17
|
+
describe.runIf(RUN_LIVE)('gtm-15: SEO SERP + Reddit sentiment research', () => {
|
|
18
|
+
beforeAll(() => {
|
|
19
|
+
initClient(process.env.COLDIQ_API_URL, process.env.COLDIQ_API_KEY)
|
|
20
|
+
})
|
|
21
|
+
|
|
22
|
+
it('calls both search_seo and search_reddit for market research', async () => {
|
|
23
|
+
const result = await runAgent({
|
|
24
|
+
prompt:
|
|
25
|
+
"I want to understand the market for 'cold email tools'. " +
|
|
26
|
+
"1) Show me the top Google search results for the keyword 'cold email tool' (SERP, limit 2). " +
|
|
27
|
+
"2) Show me what people are saying about it on Reddit (limit 1 thread). " +
|
|
28
|
+
'Give me both results separately. Do not use a generic web search for either.',
|
|
29
|
+
maxToolCalls: 6,
|
|
30
|
+
})
|
|
31
|
+
|
|
32
|
+
expectTool(result.transcript, 'search_seo')
|
|
33
|
+
expectTool(result.transcript, 'search_reddit')
|
|
34
|
+
expectNoTool(result.transcript, 'search_web')
|
|
35
|
+
|
|
36
|
+
writeArtifact('15-seo-reddit-research', result)
|
|
37
|
+
}, 8 * 60_000)
|
|
38
|
+
})
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# GTM Prompt-Based Eval Suite
|
|
2
|
+
|
|
3
|
+
End-to-end scenarios that verify the MCP server routes plain-English prompts to the correct tools and produces valid data. Each file is independent and runnable individually.
|
|
4
|
+
|
|
5
|
+
## Required env vars
|
|
6
|
+
|
|
7
|
+
| Var | Purpose |
|
|
8
|
+
|---|---|
|
|
9
|
+
| `COLDIQ_API_KEY` | ColdIQ API key (live provider calls) |
|
|
10
|
+
| `COLDIQ_API_URL` | API base URL (default: `http://localhost:8787`) |
|
|
11
|
+
| `ANTHROPIC_API_KEY` | Anthropic API key (Claude agent loop) |
|
|
12
|
+
| `AGENT_MODEL` | Override Claude model (default: `claude-sonnet-4-6`) |
|
|
13
|
+
|
|
14
|
+
## Run commands
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
cd mcp
|
|
18
|
+
|
|
19
|
+
# Full suite (~50 ColdIQ credits, ~$1-3 Anthropic, 5–15 min)
|
|
20
|
+
LIVE_TESTS=1 ANTHROPIC_API_KEY=… COLDIQ_API_KEY=… COLDIQ_API_URL=https://api.coldiq.com npm run test:gtm
|
|
21
|
+
|
|
22
|
+
# Single scenario (cheap, ~1–4 credits)
|
|
23
|
+
LIVE_TESTS=1 ANTHROPIC_API_KEY=… COLDIQ_API_KEY=… COLDIQ_API_URL=https://api.coldiq.com \
|
|
24
|
+
npx vitest run tests/gtm/11-find-vs-enrich-routing.test.ts --reporter=verbose
|
|
25
|
+
|
|
26
|
+
# Without LIVE_TESTS=1, all GTM tests are silently skipped (safe for CI)
|
|
27
|
+
npm test
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Scenario index
|
|
31
|
+
|
|
32
|
+
| File | Scenario | Key routing assertion | Est. credits |
|
|
33
|
+
|---|---|---|---|
|
|
34
|
+
| 01 | ICP → emails (CSV) | search_companies → find_people → find_email → verify_email | 4 |
|
|
35
|
+
| 02 | Multi-company bulk emails | find_people → find_emails (batch, not per-person) | 4 |
|
|
36
|
+
| 03 | Person → phone number | find_people → find_phone (not enrich_person) | 3 |
|
|
37
|
+
| 04 | Funding signal → outreach | find_signals(funding) → find_people → find_email | 4 |
|
|
38
|
+
| 05 | Job posting → VP Sales | search_jobs (not find_signals) → find_people | 4 |
|
|
39
|
+
| 06 | Intent signal → outreach | find_signals(intent) → find_people → find_email | 4 |
|
|
40
|
+
| 07 | Place → website content | search_places → fetch_page_content | 3 |
|
|
41
|
+
| 08 | Domain → account intel | enrich_company → find_people → find_email | 4 |
|
|
42
|
+
| 09 | LinkedIn URL → full profile | enrich_person → enrich_company (not find_people) | 3 |
|
|
43
|
+
| 10 | Routing: jobs vs signals | search_jobs only (NOT find_signals) | 2 |
|
|
44
|
+
| 11 | Routing: find_email vs enrich | find_email only (NOT enrich_person) | 1 |
|
|
45
|
+
| 12 | Bogus domain graceful | find_email → no result → no verify_email | 1 |
|
|
46
|
+
| 13 | Private LinkedIn graceful | find_phone → empty → agent stops | 2 |
|
|
47
|
+
| 14 | Empty handoff | search_companies → 0 results → no find_people | 1 |
|
|
48
|
+
| 15 | SEO + Reddit research | search_seo + search_reddit (not search_web) | 4 |
|
|
49
|
+
|
|
50
|
+
## Artifacts
|
|
51
|
+
|
|
52
|
+
Each test run writes `tests/gtm/.artifacts/<slug>.json` with the full transcript: every tool called, its input params, the parsed output, elapsed time, and the `_meta.provider` field. Inspect these to debug failures or audit credit usage. The `.artifacts/` directory is gitignored.
|
|
53
|
+
|
|
54
|
+
## What a failure means
|
|
55
|
+
|
|
56
|
+
- **Wrong tool called** or **expected tool not called** → routing bug in a tool description or schema. Fix the description, re-run.
|
|
57
|
+
- **Tool called with wrong params** → schema field description is unclear. Fix the `describe()` on the Zod field.
|
|
58
|
+
- **`isError: true` on every provider** → provider is down or API key is invalid. Not an MCP bug; re-run later.
|
|
59
|
+
- **Timeout** → async provider (search_jobs, search_places, search_reddit) is slow. Increase timeout or check provider health.
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
import Anthropic from '@anthropic-ai/sdk'
|
|
2
|
+
import { zodToJsonSchema } from 'zod-to-json-schema'
|
|
3
|
+
import * as fs from 'node:fs'
|
|
4
|
+
import * as path from 'node:path'
|
|
5
|
+
import * as url from 'node:url'
|
|
6
|
+
|
|
7
|
+
import { MCP_TOOLS } from './tools-bridge.js'
|
|
8
|
+
|
|
9
|
+
export interface ToolCall {
|
|
10
|
+
tool: string
|
|
11
|
+
input: Record<string, unknown>
|
|
12
|
+
output: Record<string, unknown>
|
|
13
|
+
ms: number
|
|
14
|
+
provider?: string
|
|
15
|
+
isError?: boolean
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export interface AgentResult {
|
|
19
|
+
transcript: ToolCall[]
|
|
20
|
+
finalText: string
|
|
21
|
+
totalMs: number
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export interface RunAgentOptions {
|
|
25
|
+
prompt: string
|
|
26
|
+
maxToolCalls?: number
|
|
27
|
+
model?: string
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const SYSTEM_PROMPT = `You are a GTM engineer assistant with access to ColdIQ intelligence tools. Use them to answer the user's request.
|
|
31
|
+
|
|
32
|
+
Rules:
|
|
33
|
+
- Always use the minimum sample size: limit 1, max_profiles 1, or the smallest valid value for every size/count parameter.
|
|
34
|
+
- Never invent company names, domains, emails, LinkedIn URLs, or IDs. Only use values returned by tools.
|
|
35
|
+
- When chaining tools, pass actual values from the previous tool's output — never fabricate data.
|
|
36
|
+
- If a tool returns no results, report that clearly and stop the chain. Do not retry with fabricated inputs.
|
|
37
|
+
- Always call tools to answer the user's request — do not answer from your own training knowledge.`
|
|
38
|
+
|
|
39
|
+
export async function runAgent(options: RunAgentOptions): Promise<AgentResult> {
|
|
40
|
+
const {
|
|
41
|
+
prompt,
|
|
42
|
+
maxToolCalls = 8,
|
|
43
|
+
model = process.env.AGENT_MODEL ?? 'claude-sonnet-4-6',
|
|
44
|
+
} = options
|
|
45
|
+
|
|
46
|
+
const client = new Anthropic({ apiKey: process.env.ANTHROPIC_API_KEY })
|
|
47
|
+
|
|
48
|
+
const anthropicTools: Anthropic.Tool[] = MCP_TOOLS.map((t) => ({
|
|
49
|
+
name: t.name,
|
|
50
|
+
description: t.description,
|
|
51
|
+
input_schema: zodToJsonSchema(t.schema, { target: 'openApi3' }) as Anthropic.Tool['input_schema'],
|
|
52
|
+
}))
|
|
53
|
+
|
|
54
|
+
const messages: Anthropic.MessageParam[] = [{ role: 'user', content: prompt }]
|
|
55
|
+
const transcript: ToolCall[] = []
|
|
56
|
+
const startMs = Date.now()
|
|
57
|
+
let finalText = ''
|
|
58
|
+
|
|
59
|
+
for (let round = 0; round < maxToolCalls; round++) {
|
|
60
|
+
const response = await client.messages.create({
|
|
61
|
+
model,
|
|
62
|
+
max_tokens: 4096,
|
|
63
|
+
system: SYSTEM_PROMPT,
|
|
64
|
+
tools: anthropicTools,
|
|
65
|
+
messages,
|
|
66
|
+
})
|
|
67
|
+
|
|
68
|
+
if (response.stop_reason !== 'tool_use') {
|
|
69
|
+
finalText = response.content
|
|
70
|
+
.filter((b): b is Anthropic.TextBlock => b.type === 'text')
|
|
71
|
+
.map((b) => b.text)
|
|
72
|
+
.join('\n')
|
|
73
|
+
break
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const assistantContent = response.content
|
|
77
|
+
messages.push({ role: 'assistant', content: assistantContent })
|
|
78
|
+
|
|
79
|
+
const toolResults: Anthropic.ToolResultBlockParam[] = []
|
|
80
|
+
|
|
81
|
+
for (const block of assistantContent) {
|
|
82
|
+
if (block.type !== 'tool_use') continue
|
|
83
|
+
|
|
84
|
+
const entry = MCP_TOOLS.find((t) => t.name === block.name)
|
|
85
|
+
|
|
86
|
+
if (!entry) {
|
|
87
|
+
toolResults.push({
|
|
88
|
+
type: 'tool_result',
|
|
89
|
+
tool_use_id: block.id,
|
|
90
|
+
content: JSON.stringify({ error: `Unknown tool: ${block.name}` }),
|
|
91
|
+
is_error: true,
|
|
92
|
+
})
|
|
93
|
+
continue
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const t0 = Date.now()
|
|
97
|
+
let rawResult: { content: { text: string }[]; isError?: boolean }
|
|
98
|
+
|
|
99
|
+
try {
|
|
100
|
+
rawResult = await entry.handler(block.input as Record<string, unknown>)
|
|
101
|
+
} catch (err) {
|
|
102
|
+
rawResult = {
|
|
103
|
+
content: [{ text: JSON.stringify({ error: String(err) }) }],
|
|
104
|
+
isError: true,
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
const ms = Date.now() - t0
|
|
109
|
+
|
|
110
|
+
let parsed: Record<string, unknown> = {}
|
|
111
|
+
try {
|
|
112
|
+
parsed = JSON.parse(rawResult.content[0].text) as Record<string, unknown>
|
|
113
|
+
} catch {
|
|
114
|
+
parsed = { raw: rawResult.content[0].text }
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const meta = parsed._meta as Record<string, unknown> | undefined
|
|
118
|
+
|
|
119
|
+
transcript.push({
|
|
120
|
+
tool: block.name,
|
|
121
|
+
input: block.input as Record<string, unknown>,
|
|
122
|
+
output: parsed,
|
|
123
|
+
ms,
|
|
124
|
+
provider: meta?.provider as string | undefined,
|
|
125
|
+
isError: rawResult.isError,
|
|
126
|
+
})
|
|
127
|
+
|
|
128
|
+
toolResults.push({
|
|
129
|
+
type: 'tool_result',
|
|
130
|
+
tool_use_id: block.id,
|
|
131
|
+
content: rawResult.content[0].text,
|
|
132
|
+
is_error: rawResult.isError,
|
|
133
|
+
})
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
messages.push({ role: 'user', content: toolResults })
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
return { transcript, finalText, totalMs: Date.now() - startMs }
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// ---------------------------------------------------------------------------
|
|
143
|
+
// Assertion helpers — all throw with descriptive messages on failure
|
|
144
|
+
// ---------------------------------------------------------------------------
|
|
145
|
+
|
|
146
|
+
export function expectTool(transcript: ToolCall[], name: string): void {
|
|
147
|
+
if (!transcript.some((c) => c.tool === name)) {
|
|
148
|
+
const called = transcript.map((c) => c.tool).join(', ') || '(none)'
|
|
149
|
+
throw new Error(`Expected tool "${name}" to be called. Tools actually called: [${called}]`)
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
export function expectNoTool(transcript: ToolCall[], name: string): void {
|
|
154
|
+
const hit = transcript.find((c) => c.tool === name)
|
|
155
|
+
if (hit) {
|
|
156
|
+
throw new Error(
|
|
157
|
+
`Expected tool "${name}" NOT to be called, but it was called with: ${JSON.stringify(hit.input)}`,
|
|
158
|
+
)
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
export function expectOrder(transcript: ToolCall[], order: string[]): void {
|
|
163
|
+
const indices = order.map((name) => {
|
|
164
|
+
const idx = transcript.findIndex((c) => c.tool === name)
|
|
165
|
+
if (idx === -1)
|
|
166
|
+
throw new Error(`Cannot enforce ordering: tool "${name}" was never called. Transcript: [${transcript.map((c) => c.tool).join(', ')}]`)
|
|
167
|
+
return { name, idx }
|
|
168
|
+
})
|
|
169
|
+
|
|
170
|
+
for (let i = 1; i < indices.length; i++) {
|
|
171
|
+
const prev = indices[i - 1]
|
|
172
|
+
const curr = indices[i]
|
|
173
|
+
if (curr.idx <= prev.idx) {
|
|
174
|
+
throw new Error(`Expected "${prev.name}" (pos ${prev.idx}) before "${curr.name}" (pos ${curr.idx})`)
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
export function expectParam(
|
|
180
|
+
transcript: ToolCall[],
|
|
181
|
+
toolName: string,
|
|
182
|
+
key: string,
|
|
183
|
+
predicate: (value: unknown) => boolean,
|
|
184
|
+
): void {
|
|
185
|
+
const calls = transcript.filter((c) => c.tool === toolName)
|
|
186
|
+
if (calls.length === 0) throw new Error(`Tool "${toolName}" was not called — cannot check param "${key}"`)
|
|
187
|
+
if (!calls.some((c) => predicate(c.input[key]))) {
|
|
188
|
+
throw new Error(
|
|
189
|
+
`Tool "${toolName}" was called but param "${key}" did not satisfy predicate. Values: ${JSON.stringify(calls.map((c) => c.input[key]))}`,
|
|
190
|
+
)
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
export function expectFinalContains(finalText: string, substrings: string[]): void {
|
|
195
|
+
for (const sub of substrings) {
|
|
196
|
+
if (!finalText.toLowerCase().includes(sub.toLowerCase())) {
|
|
197
|
+
const preview = finalText.slice(0, 400)
|
|
198
|
+
throw new Error(`Expected final text to contain "${sub}". Got (first 400 chars): ${preview}`)
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// ---------------------------------------------------------------------------
|
|
204
|
+
// Artifact writer — writes JSON transcript to tests/gtm/.artifacts/<slug>.json
|
|
205
|
+
// ---------------------------------------------------------------------------
|
|
206
|
+
|
|
207
|
+
const __dirname = path.dirname(url.fileURLToPath(import.meta.url))
|
|
208
|
+
|
|
209
|
+
export function writeArtifact(slug: string, result: AgentResult): void {
|
|
210
|
+
const dir = path.join(__dirname, '.artifacts')
|
|
211
|
+
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true })
|
|
212
|
+
const file = path.join(dir, `${slug}.json`)
|
|
213
|
+
fs.writeFileSync(
|
|
214
|
+
file,
|
|
215
|
+
JSON.stringify({ scenario: slug, timestamp: new Date().toISOString(), ...result }, null, 2),
|
|
216
|
+
)
|
|
217
|
+
}
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
import { z } from 'zod'
|
|
2
|
+
|
|
3
|
+
import {
|
|
4
|
+
searchCompaniesName,
|
|
5
|
+
searchCompaniesDescription,
|
|
6
|
+
searchCompaniesSchema,
|
|
7
|
+
searchCompaniesHandler,
|
|
8
|
+
} from '../../src/tools/search-companies.js'
|
|
9
|
+
|
|
10
|
+
import {
|
|
11
|
+
findPeopleName,
|
|
12
|
+
findPeopleDescription,
|
|
13
|
+
findPeopleSchema,
|
|
14
|
+
findPeopleHandler,
|
|
15
|
+
} from '../../src/tools/find-people.js'
|
|
16
|
+
|
|
17
|
+
import {
|
|
18
|
+
findEmailName,
|
|
19
|
+
findEmailDescription,
|
|
20
|
+
findEmailSchema,
|
|
21
|
+
findEmailHandler,
|
|
22
|
+
} from '../../src/tools/find-email.js'
|
|
23
|
+
|
|
24
|
+
import {
|
|
25
|
+
findEmailsName,
|
|
26
|
+
findEmailsDescription,
|
|
27
|
+
findEmailsSchema,
|
|
28
|
+
findEmailsHandler,
|
|
29
|
+
} from '../../src/tools/find-emails.js'
|
|
30
|
+
|
|
31
|
+
import {
|
|
32
|
+
verifyEmailName,
|
|
33
|
+
verifyEmailDescription,
|
|
34
|
+
verifyEmailSchema,
|
|
35
|
+
verifyEmailHandler,
|
|
36
|
+
} from '../../src/tools/verify-email.js'
|
|
37
|
+
|
|
38
|
+
import {
|
|
39
|
+
findPhoneName,
|
|
40
|
+
findPhoneDescription,
|
|
41
|
+
findPhoneSchema,
|
|
42
|
+
findPhoneHandler,
|
|
43
|
+
} from '../../src/tools/find-phone.js'
|
|
44
|
+
|
|
45
|
+
import {
|
|
46
|
+
enrichCompanyName,
|
|
47
|
+
enrichCompanyDescription,
|
|
48
|
+
enrichCompanySchema,
|
|
49
|
+
enrichCompanyHandler,
|
|
50
|
+
} from '../../src/tools/enrich-company.js'
|
|
51
|
+
|
|
52
|
+
import {
|
|
53
|
+
enrichPersonName,
|
|
54
|
+
enrichPersonDescription,
|
|
55
|
+
enrichPersonSchema,
|
|
56
|
+
enrichPersonHandler,
|
|
57
|
+
} from '../../src/tools/enrich-person.js'
|
|
58
|
+
|
|
59
|
+
import {
|
|
60
|
+
searchWebName,
|
|
61
|
+
searchWebDescription,
|
|
62
|
+
searchWebSchema,
|
|
63
|
+
searchWebHandler,
|
|
64
|
+
} from '../../src/tools/search-web.js'
|
|
65
|
+
|
|
66
|
+
import {
|
|
67
|
+
searchJobsName,
|
|
68
|
+
searchJobsDescription,
|
|
69
|
+
searchJobsSchema,
|
|
70
|
+
searchJobsHandler,
|
|
71
|
+
} from '../../src/tools/search-jobs.js'
|
|
72
|
+
|
|
73
|
+
import {
|
|
74
|
+
searchAdsName,
|
|
75
|
+
searchAdsDescription,
|
|
76
|
+
searchAdsSchema,
|
|
77
|
+
searchAdsHandler,
|
|
78
|
+
} from '../../src/tools/search-ads.js'
|
|
79
|
+
|
|
80
|
+
import {
|
|
81
|
+
searchPlacesName,
|
|
82
|
+
searchPlacesDescription,
|
|
83
|
+
searchPlacesSchema,
|
|
84
|
+
searchPlacesHandler,
|
|
85
|
+
} from '../../src/tools/search-places.js'
|
|
86
|
+
|
|
87
|
+
import {
|
|
88
|
+
findInfluencersName,
|
|
89
|
+
findInfluencersDescription,
|
|
90
|
+
findInfluencersSchema,
|
|
91
|
+
findInfluencersHandler,
|
|
92
|
+
} from '../../src/tools/find-influencers.js'
|
|
93
|
+
|
|
94
|
+
import {
|
|
95
|
+
searchRedditName,
|
|
96
|
+
searchRedditDescription,
|
|
97
|
+
searchRedditSchema,
|
|
98
|
+
searchRedditHandler,
|
|
99
|
+
} from '../../src/tools/search-reddit.js'
|
|
100
|
+
|
|
101
|
+
import {
|
|
102
|
+
searchSeoName,
|
|
103
|
+
searchSeoDescription,
|
|
104
|
+
searchSeoSchema,
|
|
105
|
+
searchSeoHandler,
|
|
106
|
+
} from '../../src/tools/search-seo.js'
|
|
107
|
+
|
|
108
|
+
import {
|
|
109
|
+
findSignalsName,
|
|
110
|
+
findSignalsDescription,
|
|
111
|
+
findSignalsSchema,
|
|
112
|
+
findSignalsHandler,
|
|
113
|
+
} from '../../src/tools/find-signals.js'
|
|
114
|
+
|
|
115
|
+
import {
|
|
116
|
+
fetchPageContentName,
|
|
117
|
+
fetchPageContentDescription,
|
|
118
|
+
fetchPageContentSchema,
|
|
119
|
+
fetchPageContentHandler,
|
|
120
|
+
} from '../../src/tools/fetch-page-content.js'
|
|
121
|
+
|
|
122
|
+
export interface McpToolEntry {
|
|
123
|
+
name: string
|
|
124
|
+
description: string
|
|
125
|
+
schema: z.ZodObject<z.ZodRawShape>
|
|
126
|
+
handler: (input: Record<string, unknown>) => Promise<{ content: { text: string }[]; isError?: boolean }>
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
export const MCP_TOOLS: McpToolEntry[] = [
|
|
130
|
+
{
|
|
131
|
+
name: searchCompaniesName,
|
|
132
|
+
description: searchCompaniesDescription,
|
|
133
|
+
schema: z.object(searchCompaniesSchema),
|
|
134
|
+
handler: searchCompaniesHandler,
|
|
135
|
+
},
|
|
136
|
+
{
|
|
137
|
+
name: findPeopleName,
|
|
138
|
+
description: findPeopleDescription,
|
|
139
|
+
schema: z.object(findPeopleSchema),
|
|
140
|
+
handler: findPeopleHandler,
|
|
141
|
+
},
|
|
142
|
+
{
|
|
143
|
+
name: findEmailName,
|
|
144
|
+
description: findEmailDescription,
|
|
145
|
+
schema: z.object(findEmailSchema),
|
|
146
|
+
handler: findEmailHandler,
|
|
147
|
+
},
|
|
148
|
+
{
|
|
149
|
+
name: findEmailsName,
|
|
150
|
+
description: findEmailsDescription,
|
|
151
|
+
schema: z.object(findEmailsSchema),
|
|
152
|
+
handler: findEmailsHandler,
|
|
153
|
+
},
|
|
154
|
+
{
|
|
155
|
+
name: verifyEmailName,
|
|
156
|
+
description: verifyEmailDescription,
|
|
157
|
+
schema: z.object(verifyEmailSchema),
|
|
158
|
+
handler: verifyEmailHandler,
|
|
159
|
+
},
|
|
160
|
+
{
|
|
161
|
+
name: findPhoneName,
|
|
162
|
+
description: findPhoneDescription,
|
|
163
|
+
schema: z.object(findPhoneSchema),
|
|
164
|
+
handler: findPhoneHandler,
|
|
165
|
+
},
|
|
166
|
+
{
|
|
167
|
+
name: enrichCompanyName,
|
|
168
|
+
description: enrichCompanyDescription,
|
|
169
|
+
schema: z.object(enrichCompanySchema),
|
|
170
|
+
handler: enrichCompanyHandler,
|
|
171
|
+
},
|
|
172
|
+
{
|
|
173
|
+
name: enrichPersonName,
|
|
174
|
+
description: enrichPersonDescription,
|
|
175
|
+
schema: z.object(enrichPersonSchema),
|
|
176
|
+
handler: enrichPersonHandler,
|
|
177
|
+
},
|
|
178
|
+
{
|
|
179
|
+
name: searchWebName,
|
|
180
|
+
description: searchWebDescription,
|
|
181
|
+
schema: z.object(searchWebSchema),
|
|
182
|
+
handler: searchWebHandler,
|
|
183
|
+
},
|
|
184
|
+
{
|
|
185
|
+
name: searchJobsName,
|
|
186
|
+
description: searchJobsDescription,
|
|
187
|
+
schema: z.object(searchJobsSchema),
|
|
188
|
+
handler: searchJobsHandler,
|
|
189
|
+
},
|
|
190
|
+
{
|
|
191
|
+
name: searchAdsName,
|
|
192
|
+
description: searchAdsDescription,
|
|
193
|
+
schema: z.object(searchAdsSchema),
|
|
194
|
+
handler: searchAdsHandler,
|
|
195
|
+
},
|
|
196
|
+
{
|
|
197
|
+
name: searchPlacesName,
|
|
198
|
+
description: searchPlacesDescription,
|
|
199
|
+
schema: z.object(searchPlacesSchema),
|
|
200
|
+
handler: searchPlacesHandler,
|
|
201
|
+
},
|
|
202
|
+
{
|
|
203
|
+
name: findInfluencersName,
|
|
204
|
+
description: findInfluencersDescription,
|
|
205
|
+
schema: z.object(findInfluencersSchema),
|
|
206
|
+
handler: findInfluencersHandler,
|
|
207
|
+
},
|
|
208
|
+
{
|
|
209
|
+
name: searchRedditName,
|
|
210
|
+
description: searchRedditDescription,
|
|
211
|
+
schema: z.object(searchRedditSchema),
|
|
212
|
+
handler: searchRedditHandler,
|
|
213
|
+
},
|
|
214
|
+
{
|
|
215
|
+
name: searchSeoName,
|
|
216
|
+
description: searchSeoDescription,
|
|
217
|
+
schema: z.object(searchSeoSchema),
|
|
218
|
+
handler: searchSeoHandler,
|
|
219
|
+
},
|
|
220
|
+
{
|
|
221
|
+
name: findSignalsName,
|
|
222
|
+
description: findSignalsDescription,
|
|
223
|
+
schema: z.object(findSignalsSchema),
|
|
224
|
+
handler: findSignalsHandler,
|
|
225
|
+
},
|
|
226
|
+
{
|
|
227
|
+
name: fetchPageContentName,
|
|
228
|
+
description: fetchPageContentDescription,
|
|
229
|
+
schema: z.object(fetchPageContentSchema),
|
|
230
|
+
handler: fetchPageContentHandler,
|
|
231
|
+
},
|
|
232
|
+
]
|