gitlab-mcp 0.1.4 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.dockerignore +7 -0
- package/.editorconfig +9 -0
- package/.env.example +75 -0
- package/.github/workflows/nodejs.yml +31 -0
- package/.github/workflows/npm-publish.yml +31 -0
- package/.husky/pre-commit +1 -0
- package/.nvmrc +1 -0
- package/.prettierrc.json +6 -0
- package/Dockerfile +20 -0
- package/README.md +416 -251
- package/docker-compose.yml +10 -0
- package/docs/architecture.md +310 -0
- package/docs/authentication.md +299 -0
- package/docs/configuration.md +149 -0
- package/docs/deployment.md +336 -0
- package/docs/tools.md +294 -0
- package/eslint.config.js +23 -0
- package/package.json +70 -32
- package/scripts/get-oauth-token.example.sh +15 -0
- package/src/config/env.ts +171 -0
- package/src/http.ts +605 -0
- package/src/index.ts +77 -0
- package/src/lib/auth-context.ts +19 -0
- package/src/lib/gitlab-client.ts +1810 -0
- package/src/lib/logger.ts +17 -0
- package/src/lib/network.ts +45 -0
- package/src/lib/oauth.ts +287 -0
- package/src/lib/output.ts +51 -0
- package/src/lib/policy.ts +78 -0
- package/src/lib/request-runtime.ts +376 -0
- package/src/lib/sanitize.ts +25 -0
- package/src/server/build-server.ts +17 -0
- package/src/tools/gitlab.ts +3128 -0
- package/src/tools/health.ts +27 -0
- package/src/tools/mr-code-context.ts +473 -0
- package/src/types/context.ts +13 -0
- package/tests/auth-context.test.ts +102 -0
- package/tests/gitlab-client.test.ts +674 -0
- package/tests/graphql-guard.test.ts +121 -0
- package/tests/integration/agent-loop.integration.test.ts +552 -0
- package/tests/integration/server.integration.test.ts +543 -0
- package/tests/mr-code-context.test.ts +600 -0
- package/tests/oauth.test.ts +43 -0
- package/tests/output.test.ts +186 -0
- package/tests/policy.test.ts +324 -0
- package/tests/request-runtime.test.ts +252 -0
- package/tests/sanitize.test.ts +123 -0
- package/tests/upload-reference.test.ts +84 -0
- package/tsconfig.build.json +11 -0
- package/tsconfig.json +21 -0
- package/vitest.config.ts +12 -0
- package/LICENSE +0 -21
- package/build/index.js +0 -1641
- package/build/schemas.js +0 -684
- package/build/test-note.js +0 -54
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
|
|
3
|
+
import { containsGraphqlMutation, shouldDisableGraphqlTools } from "../src/tools/gitlab.js";
|
|
4
|
+
|
|
5
|
+
describe("containsGraphqlMutation", () => {
|
|
6
|
+
it("detects mutation operations", () => {
|
|
7
|
+
expect(containsGraphqlMutation("mutation { createIssue(input: {}) { id } }")).toBeTruthy();
|
|
8
|
+
});
|
|
9
|
+
|
|
10
|
+
it("detects named mutation operations", () => {
|
|
11
|
+
expect(
|
|
12
|
+
containsGraphqlMutation("mutation CreateIssue { createIssue(input: {}) { id } }")
|
|
13
|
+
).toBeTruthy();
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
it("detects mutation with variables", () => {
|
|
17
|
+
expect(
|
|
18
|
+
containsGraphqlMutation(
|
|
19
|
+
"mutation CreateIssue($input: CreateIssueInput!) { createIssue(input: $input) { id } }"
|
|
20
|
+
)
|
|
21
|
+
).toBeTruthy();
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it("detects mutation operations not at the beginning of a document", () => {
|
|
25
|
+
const document = `
|
|
26
|
+
fragment SharedFields on Issue {
|
|
27
|
+
id
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
mutation CreateIssue($title: String!) {
|
|
31
|
+
createIssue(input: { title: $title }) {
|
|
32
|
+
issue {
|
|
33
|
+
...SharedFields
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
`;
|
|
38
|
+
|
|
39
|
+
expect(containsGraphqlMutation(document)).toBeTruthy();
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it("does not flag query operations", () => {
|
|
43
|
+
expect(containsGraphqlMutation('query { project(fullPath: "group/app") { id } }')).toBeFalsy();
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it("does not flag named query operations", () => {
|
|
47
|
+
expect(
|
|
48
|
+
containsGraphqlMutation('query GetProject { project(fullPath: "group/app") { id } }')
|
|
49
|
+
).toBeFalsy();
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
it("ignores mutation keyword inside string literals", () => {
|
|
53
|
+
const document =
|
|
54
|
+
'query { search(query: "mutation { createIssue(input:{}) { id } }") { blobs { id } } }';
|
|
55
|
+
expect(containsGraphqlMutation(document)).toBeFalsy();
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
it("ignores mutation keyword inside triple-quoted strings", () => {
|
|
59
|
+
const document = `query {
|
|
60
|
+
search(query: """mutation { createIssue(input:{}) { id } }""") {
|
|
61
|
+
blobs { id }
|
|
62
|
+
}
|
|
63
|
+
}`;
|
|
64
|
+
expect(containsGraphqlMutation(document)).toBeFalsy();
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
it("ignores mutation keyword in comments", () => {
|
|
68
|
+
const document = `
|
|
69
|
+
# mutation CreateIssue { ... }
|
|
70
|
+
query { project { id } }
|
|
71
|
+
`;
|
|
72
|
+
expect(containsGraphqlMutation(document)).toBeFalsy();
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
it("returns false for empty string", () => {
|
|
76
|
+
expect(containsGraphqlMutation("")).toBeFalsy();
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
it("returns false for whitespace only", () => {
|
|
80
|
+
expect(containsGraphqlMutation(" \n ")).toBeFalsy();
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it("detects case-insensitive mutation keyword", () => {
|
|
84
|
+
expect(containsGraphqlMutation("MUTATION { createIssue { id } }")).toBeTruthy();
|
|
85
|
+
expect(containsGraphqlMutation("Mutation { createIssue { id } }")).toBeTruthy();
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
it("handles subscription operations (not mutation)", () => {
|
|
89
|
+
expect(containsGraphqlMutation("subscription { issueUpdated { id status } }")).toBeFalsy();
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
it("detects mutation with leading whitespace/newlines", () => {
|
|
93
|
+
expect(containsGraphqlMutation("\n\n mutation { deleteIssue { id } }")).toBeTruthy();
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
describe("shouldDisableGraphqlTools", () => {
|
|
98
|
+
it("disables graphql tools by default when project scope restrictions are active", () => {
|
|
99
|
+
expect(shouldDisableGraphqlTools(["123"], false)).toBeTruthy();
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
it("keeps graphql tools enabled when explicit override is set", () => {
|
|
103
|
+
expect(shouldDisableGraphqlTools(["123"], true)).toBeFalsy();
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
it("keeps graphql tools enabled without project scope restrictions", () => {
|
|
107
|
+
expect(shouldDisableGraphqlTools([], false)).toBeFalsy();
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
it("keeps graphql tools enabled with empty project IDs and override", () => {
|
|
111
|
+
expect(shouldDisableGraphqlTools([], true)).toBeFalsy();
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
it("disables with multiple project IDs", () => {
|
|
115
|
+
expect(shouldDisableGraphqlTools(["1", "2", "3"], false)).toBeTruthy();
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
it("enables with multiple project IDs and override", () => {
|
|
119
|
+
expect(shouldDisableGraphqlTools(["1", "2", "3"], true)).toBeFalsy();
|
|
120
|
+
});
|
|
121
|
+
});
|
|
@@ -0,0 +1,552 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Integration tests simulating a full agent loop:
|
|
3
|
+
* LLM (scripted) → MCP client → MCP server (InMemoryTransport)
|
|
4
|
+
*
|
|
5
|
+
* This validates the entire closed-loop flow:
|
|
6
|
+
* 1. Client calls tools/list to discover available tools
|
|
7
|
+
* 2. ScriptedLLM decides which tool to call
|
|
8
|
+
* 3. Client calls tools/call on the MCP server
|
|
9
|
+
* 4. Tool result is returned to the LLM
|
|
10
|
+
* 5. LLM produces final text output
|
|
11
|
+
*
|
|
12
|
+
* The GitLabClient is stubbed so no real network calls are made.
|
|
13
|
+
*/
|
|
14
|
+
import { describe, expect, it, vi } from "vitest";
|
|
15
|
+
import { Client } from "@modelcontextprotocol/sdk/client/index.js";
|
|
16
|
+
import { InMemoryTransport } from "@modelcontextprotocol/sdk/inMemory.js";
|
|
17
|
+
import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
18
|
+
|
|
19
|
+
import { createMcpServer } from "../../src/server/build-server.js";
|
|
20
|
+
import { OutputFormatter } from "../../src/lib/output.js";
|
|
21
|
+
import { ToolPolicyEngine } from "../../src/lib/policy.js";
|
|
22
|
+
import type { AppContext } from "../../src/types/context.js";
|
|
23
|
+
|
|
24
|
+
/* ------------------------------------------------------------------ */
|
|
25
|
+
/* LLM abstraction */
|
|
26
|
+
/* ------------------------------------------------------------------ */
|
|
27
|
+
|
|
28
|
+
type LLMContent =
|
|
29
|
+
| { type: "text"; text: string }
|
|
30
|
+
| { type: "tool_use"; id: string; name: string; input: Record<string, unknown> };
|
|
31
|
+
|
|
32
|
+
interface LLMResponse {
|
|
33
|
+
content: LLMContent[];
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
interface LLM {
|
|
37
|
+
create(args: {
|
|
38
|
+
messages: unknown[];
|
|
39
|
+
tools: Array<{ name: string; description?: string; input_schema?: unknown }>;
|
|
40
|
+
}): Promise<LLMResponse>;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* A scripted LLM that returns pre-defined responses in order.
|
|
45
|
+
* Each call to create() pops the next response from the script.
|
|
46
|
+
*/
|
|
47
|
+
class ScriptedLLM implements LLM {
|
|
48
|
+
private callIndex = 0;
|
|
49
|
+
|
|
50
|
+
constructor(private script: LLMResponse[]) {}
|
|
51
|
+
|
|
52
|
+
async create(): Promise<LLMResponse> {
|
|
53
|
+
if (this.callIndex >= this.script.length) {
|
|
54
|
+
throw new Error(`ScriptedLLM: script exhausted after ${this.callIndex} calls`);
|
|
55
|
+
}
|
|
56
|
+
return this.script[this.callIndex++];
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/* ------------------------------------------------------------------ */
|
|
61
|
+
/* Agent loop implementation */
|
|
62
|
+
/* ------------------------------------------------------------------ */
|
|
63
|
+
|
|
64
|
+
interface AgentResult {
|
|
65
|
+
finalText: string[];
|
|
66
|
+
toolCalls: Array<{ name: string; arguments: Record<string, unknown> }>;
|
|
67
|
+
toolResults: unknown[];
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
async function runAgentLoop(params: {
|
|
71
|
+
client: Client;
|
|
72
|
+
llm: LLM;
|
|
73
|
+
query: string;
|
|
74
|
+
maxIterations?: number;
|
|
75
|
+
}): Promise<AgentResult> {
|
|
76
|
+
const { client, llm, query, maxIterations = 10 } = params;
|
|
77
|
+
|
|
78
|
+
// 1. Discover tools
|
|
79
|
+
const toolsResponse = await client.listTools();
|
|
80
|
+
const tools = toolsResponse.tools.map((t) => ({
|
|
81
|
+
name: t.name,
|
|
82
|
+
description: t.description,
|
|
83
|
+
input_schema: t.inputSchema
|
|
84
|
+
}));
|
|
85
|
+
|
|
86
|
+
// 2. Conversation loop
|
|
87
|
+
const messages: unknown[] = [{ role: "user", content: query }];
|
|
88
|
+
const finalText: string[] = [];
|
|
89
|
+
const toolCalls: AgentResult["toolCalls"] = [];
|
|
90
|
+
const toolResults: unknown[] = [];
|
|
91
|
+
|
|
92
|
+
for (let i = 0; i < maxIterations; i++) {
|
|
93
|
+
const response = await llm.create({ messages, tools });
|
|
94
|
+
let didToolCall = false;
|
|
95
|
+
|
|
96
|
+
for (const content of response.content) {
|
|
97
|
+
if (content.type === "text") {
|
|
98
|
+
finalText.push(content.text);
|
|
99
|
+
continue;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (content.type === "tool_use") {
|
|
103
|
+
didToolCall = true;
|
|
104
|
+
toolCalls.push({ name: content.name, arguments: content.input });
|
|
105
|
+
|
|
106
|
+
// Call the actual MCP server tool
|
|
107
|
+
const toolResult = await client.callTool({
|
|
108
|
+
name: content.name,
|
|
109
|
+
arguments: content.input
|
|
110
|
+
});
|
|
111
|
+
toolResults.push(toolResult);
|
|
112
|
+
|
|
113
|
+
// Build conversation history (Anthropic-style)
|
|
114
|
+
messages.push({ role: "assistant", content: response.content });
|
|
115
|
+
messages.push({
|
|
116
|
+
role: "user",
|
|
117
|
+
content: [
|
|
118
|
+
{
|
|
119
|
+
type: "tool_result",
|
|
120
|
+
tool_use_id: content.id,
|
|
121
|
+
content: [
|
|
122
|
+
{
|
|
123
|
+
type: "text",
|
|
124
|
+
text: JSON.stringify(toolResult.content)
|
|
125
|
+
}
|
|
126
|
+
]
|
|
127
|
+
}
|
|
128
|
+
]
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
break; // process next LLM turn
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
if (!didToolCall) break;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
return { finalText, toolCalls, toolResults };
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/* ------------------------------------------------------------------ */
|
|
142
|
+
/* Test helpers */
|
|
143
|
+
/* ------------------------------------------------------------------ */
|
|
144
|
+
|
|
145
|
+
function buildTestContext(gitlabStub?: Partial<AppContext["gitlab"]>): AppContext {
|
|
146
|
+
const defaultFeatures = {
|
|
147
|
+
wiki: true,
|
|
148
|
+
milestone: true,
|
|
149
|
+
pipeline: true,
|
|
150
|
+
release: true
|
|
151
|
+
};
|
|
152
|
+
|
|
153
|
+
return {
|
|
154
|
+
env: {
|
|
155
|
+
NODE_ENV: "test",
|
|
156
|
+
LOG_LEVEL: "silent",
|
|
157
|
+
MCP_SERVER_NAME: "agent-test-server",
|
|
158
|
+
MCP_SERVER_VERSION: "0.0.1",
|
|
159
|
+
GITLAB_API_URL: "https://gitlab.example.com/api/v4",
|
|
160
|
+
GITLAB_API_URLS: ["https://gitlab.example.com/api/v4"],
|
|
161
|
+
GITLAB_PERSONAL_ACCESS_TOKEN: "test-token",
|
|
162
|
+
GITLAB_USE_OAUTH: false,
|
|
163
|
+
GITLAB_OAUTH_AUTO_OPEN_BROWSER: false,
|
|
164
|
+
GITLAB_OAUTH_SCOPES: "api",
|
|
165
|
+
GITLAB_READ_ONLY_MODE: false,
|
|
166
|
+
GITLAB_ALLOWED_PROJECT_IDS: [],
|
|
167
|
+
GITLAB_ALLOWED_TOOLS: [],
|
|
168
|
+
GITLAB_ALLOW_GRAPHQL_WITH_PROJECT_SCOPE: false,
|
|
169
|
+
GITLAB_RESPONSE_MODE: "json",
|
|
170
|
+
GITLAB_MAX_RESPONSE_BYTES: 200_000,
|
|
171
|
+
GITLAB_HTTP_TIMEOUT_MS: 20_000,
|
|
172
|
+
GITLAB_ERROR_DETAIL_MODE: "full",
|
|
173
|
+
GITLAB_CLOUDFLARE_BYPASS: false,
|
|
174
|
+
GITLAB_ALLOW_INSECURE_TOKEN_FILE: false,
|
|
175
|
+
GITLAB_ALLOW_INSECURE_TLS: false,
|
|
176
|
+
GITLAB_COOKIE_WARMUP_PATH: "/user",
|
|
177
|
+
USE_GITLAB_WIKI: true,
|
|
178
|
+
USE_MILESTONE: true,
|
|
179
|
+
USE_PIPELINE: true,
|
|
180
|
+
USE_RELEASE: true,
|
|
181
|
+
REMOTE_AUTHORIZATION: false,
|
|
182
|
+
ENABLE_DYNAMIC_API_URL: false,
|
|
183
|
+
HTTP_JSON_ONLY: false,
|
|
184
|
+
SSE: false,
|
|
185
|
+
SESSION_TIMEOUT_SECONDS: 3600,
|
|
186
|
+
MAX_SESSIONS: 1000,
|
|
187
|
+
MAX_REQUESTS_PER_MINUTE: 300,
|
|
188
|
+
HTTP_HOST: "127.0.0.1",
|
|
189
|
+
HTTP_PORT: 3333,
|
|
190
|
+
GITLAB_TOKEN_CACHE_SECONDS: 300,
|
|
191
|
+
GITLAB_TOKEN_SCRIPT_TIMEOUT_MS: 10_000,
|
|
192
|
+
GITLAB_OAUTH_GITLAB_URL: undefined,
|
|
193
|
+
GITLAB_OAUTH_CLIENT_ID: undefined,
|
|
194
|
+
GITLAB_OAUTH_CLIENT_SECRET: undefined,
|
|
195
|
+
GITLAB_OAUTH_REDIRECT_URI: undefined,
|
|
196
|
+
GITLAB_OAUTH_TOKEN_PATH: undefined,
|
|
197
|
+
GITLAB_AUTH_COOKIE_PATH: undefined,
|
|
198
|
+
GITLAB_USER_AGENT: undefined,
|
|
199
|
+
GITLAB_ACCEPT_LANGUAGE: undefined,
|
|
200
|
+
GITLAB_TOKEN_SCRIPT: undefined,
|
|
201
|
+
GITLAB_TOKEN_FILE: undefined,
|
|
202
|
+
GITLAB_CA_CERT_PATH: undefined,
|
|
203
|
+
GITLAB_DENIED_TOOLS_REGEX: undefined,
|
|
204
|
+
NODE_TLS_REJECT_UNAUTHORIZED: undefined,
|
|
205
|
+
HTTP_PROXY: undefined,
|
|
206
|
+
HTTPS_PROXY: undefined
|
|
207
|
+
} as AppContext["env"],
|
|
208
|
+
logger: {
|
|
209
|
+
info: vi.fn(),
|
|
210
|
+
warn: vi.fn(),
|
|
211
|
+
error: vi.fn(),
|
|
212
|
+
debug: vi.fn(),
|
|
213
|
+
trace: vi.fn(),
|
|
214
|
+
fatal: vi.fn(),
|
|
215
|
+
child: () => ({}) as never
|
|
216
|
+
} as unknown as AppContext["logger"],
|
|
217
|
+
gitlab: {
|
|
218
|
+
listProjects: vi.fn().mockResolvedValue([
|
|
219
|
+
{ id: 1, name: "project-alpha", path_with_namespace: "group/project-alpha" },
|
|
220
|
+
{ id: 2, name: "project-beta", path_with_namespace: "group/project-beta" }
|
|
221
|
+
]),
|
|
222
|
+
getProject: vi.fn().mockResolvedValue({
|
|
223
|
+
id: 1,
|
|
224
|
+
name: "project-alpha",
|
|
225
|
+
path_with_namespace: "group/project-alpha",
|
|
226
|
+
description: "Test project",
|
|
227
|
+
default_branch: "main",
|
|
228
|
+
web_url: "https://gitlab.example.com/group/project-alpha"
|
|
229
|
+
}),
|
|
230
|
+
listIssues: vi.fn().mockResolvedValue([
|
|
231
|
+
{ iid: 1, title: "Fix bug", state: "opened" },
|
|
232
|
+
{ iid: 2, title: "Add feature", state: "opened" }
|
|
233
|
+
]),
|
|
234
|
+
...gitlabStub
|
|
235
|
+
} as unknown as AppContext["gitlab"],
|
|
236
|
+
policy: new ToolPolicyEngine({
|
|
237
|
+
readOnlyMode: false,
|
|
238
|
+
allowedTools: [],
|
|
239
|
+
enabledFeatures: defaultFeatures
|
|
240
|
+
}),
|
|
241
|
+
formatter: new OutputFormatter({
|
|
242
|
+
responseMode: "json",
|
|
243
|
+
maxBytes: 200_000
|
|
244
|
+
})
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
async function createAgentTestPair(gitlabStub?: Partial<AppContext["gitlab"]>): Promise<{
|
|
249
|
+
client: Client;
|
|
250
|
+
server: McpServer;
|
|
251
|
+
clientTransport: InMemoryTransport;
|
|
252
|
+
serverTransport: InMemoryTransport;
|
|
253
|
+
context: AppContext;
|
|
254
|
+
}> {
|
|
255
|
+
const context = buildTestContext(gitlabStub);
|
|
256
|
+
const server = createMcpServer(context);
|
|
257
|
+
const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair();
|
|
258
|
+
|
|
259
|
+
await server.connect(serverTransport);
|
|
260
|
+
|
|
261
|
+
const client = new Client({ name: "agent-test-client", version: "0.0.1" }, { capabilities: {} });
|
|
262
|
+
await client.connect(clientTransport);
|
|
263
|
+
|
|
264
|
+
return { client, server, clientTransport, serverTransport, context };
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
/* ------------------------------------------------------------------ */
|
|
268
|
+
/* Agent loop tests */
|
|
269
|
+
/* ------------------------------------------------------------------ */
|
|
270
|
+
|
|
271
|
+
describe("Agent Loop Integration (ScriptedLLM + MCP server)", () => {
|
|
272
|
+
describe("single tool call flow", () => {
|
|
273
|
+
it("completes a health_check tool call cycle", async () => {
|
|
274
|
+
const { client, clientTransport, serverTransport } = await createAgentTestPair();
|
|
275
|
+
|
|
276
|
+
try {
|
|
277
|
+
const llm = new ScriptedLLM([
|
|
278
|
+
// Turn 1: LLM decides to call health_check
|
|
279
|
+
{
|
|
280
|
+
content: [{ type: "tool_use", id: "call-1", name: "health_check", input: {} }]
|
|
281
|
+
},
|
|
282
|
+
// Turn 2: LLM sees tool result, produces final text
|
|
283
|
+
{
|
|
284
|
+
content: [{ type: "text", text: "Server is healthy!" }]
|
|
285
|
+
}
|
|
286
|
+
]);
|
|
287
|
+
|
|
288
|
+
const result = await runAgentLoop({
|
|
289
|
+
client,
|
|
290
|
+
llm,
|
|
291
|
+
query: "Is the server healthy?"
|
|
292
|
+
});
|
|
293
|
+
|
|
294
|
+
// Verify tool was called
|
|
295
|
+
expect(result.toolCalls).toHaveLength(1);
|
|
296
|
+
expect(result.toolCalls[0].name).toBe("health_check");
|
|
297
|
+
|
|
298
|
+
// Verify tool result was successful
|
|
299
|
+
const toolResult = result.toolResults[0] as { isError?: boolean };
|
|
300
|
+
expect(toolResult.isError).toBeFalsy();
|
|
301
|
+
|
|
302
|
+
// Verify final output
|
|
303
|
+
expect(result.finalText).toContain("Server is healthy!");
|
|
304
|
+
} finally {
|
|
305
|
+
await clientTransport.close();
|
|
306
|
+
await serverTransport.close();
|
|
307
|
+
}
|
|
308
|
+
});
|
|
309
|
+
|
|
310
|
+
it("completes a gitlab_list_projects tool call with mocked data", async () => {
|
|
311
|
+
const { client, clientTransport, serverTransport, context } = await createAgentTestPair();
|
|
312
|
+
|
|
313
|
+
try {
|
|
314
|
+
const llm = new ScriptedLLM([
|
|
315
|
+
{
|
|
316
|
+
content: [{ type: "tool_use", id: "call-1", name: "gitlab_list_projects", input: {} }]
|
|
317
|
+
},
|
|
318
|
+
{
|
|
319
|
+
content: [{ type: "text", text: "Found 2 projects." }]
|
|
320
|
+
}
|
|
321
|
+
]);
|
|
322
|
+
|
|
323
|
+
const result = await runAgentLoop({
|
|
324
|
+
client,
|
|
325
|
+
llm,
|
|
326
|
+
query: "List all projects"
|
|
327
|
+
});
|
|
328
|
+
|
|
329
|
+
expect(result.toolCalls).toHaveLength(1);
|
|
330
|
+
expect(result.toolCalls[0].name).toBe("gitlab_list_projects");
|
|
331
|
+
|
|
332
|
+
// Verify the mocked gitlab client was called
|
|
333
|
+
expect(context.gitlab.listProjects).toHaveBeenCalled();
|
|
334
|
+
|
|
335
|
+
// Verify tool result contains data
|
|
336
|
+
const toolResult = result.toolResults[0] as {
|
|
337
|
+
isError?: boolean;
|
|
338
|
+
content: Array<{ type: string; text: string }>;
|
|
339
|
+
};
|
|
340
|
+
expect(toolResult.isError).toBeFalsy();
|
|
341
|
+
|
|
342
|
+
const text = toolResult.content.find((c) => c.type === "text")!.text;
|
|
343
|
+
expect(text).toContain("project-alpha");
|
|
344
|
+
expect(text).toContain("project-beta");
|
|
345
|
+
|
|
346
|
+
expect(result.finalText).toContain("Found 2 projects.");
|
|
347
|
+
} finally {
|
|
348
|
+
await clientTransport.close();
|
|
349
|
+
await serverTransport.close();
|
|
350
|
+
}
|
|
351
|
+
});
|
|
352
|
+
});
|
|
353
|
+
|
|
354
|
+
describe("multi-turn tool call flow", () => {
|
|
355
|
+
it("handles sequential tool calls across multiple turns", async () => {
|
|
356
|
+
const { client, clientTransport, serverTransport, context } = await createAgentTestPair();
|
|
357
|
+
|
|
358
|
+
try {
|
|
359
|
+
const llm = new ScriptedLLM([
|
|
360
|
+
// Turn 1: List projects
|
|
361
|
+
{
|
|
362
|
+
content: [{ type: "tool_use", id: "call-1", name: "gitlab_list_projects", input: {} }]
|
|
363
|
+
},
|
|
364
|
+
// Turn 2: Get details on a specific project
|
|
365
|
+
{
|
|
366
|
+
content: [
|
|
367
|
+
{
|
|
368
|
+
type: "tool_use",
|
|
369
|
+
id: "call-2",
|
|
370
|
+
name: "gitlab_get_project",
|
|
371
|
+
input: { project_id: "group/project-alpha" }
|
|
372
|
+
}
|
|
373
|
+
]
|
|
374
|
+
},
|
|
375
|
+
// Turn 3: Final answer
|
|
376
|
+
{
|
|
377
|
+
content: [
|
|
378
|
+
{
|
|
379
|
+
type: "text",
|
|
380
|
+
text: "project-alpha has default branch main."
|
|
381
|
+
}
|
|
382
|
+
]
|
|
383
|
+
}
|
|
384
|
+
]);
|
|
385
|
+
|
|
386
|
+
const result = await runAgentLoop({
|
|
387
|
+
client,
|
|
388
|
+
llm,
|
|
389
|
+
query: "Tell me about the first project"
|
|
390
|
+
});
|
|
391
|
+
|
|
392
|
+
expect(result.toolCalls).toHaveLength(2);
|
|
393
|
+
expect(result.toolCalls[0].name).toBe("gitlab_list_projects");
|
|
394
|
+
expect(result.toolCalls[1].name).toBe("gitlab_get_project");
|
|
395
|
+
expect(result.toolCalls[1].arguments).toEqual({ project_id: "group/project-alpha" });
|
|
396
|
+
|
|
397
|
+
expect(context.gitlab.listProjects).toHaveBeenCalled();
|
|
398
|
+
expect(context.gitlab.getProject).toHaveBeenCalledWith("group/project-alpha");
|
|
399
|
+
|
|
400
|
+
expect(result.finalText).toContain("project-alpha has default branch main.");
|
|
401
|
+
} finally {
|
|
402
|
+
await clientTransport.close();
|
|
403
|
+
await serverTransport.close();
|
|
404
|
+
}
|
|
405
|
+
});
|
|
406
|
+
});
|
|
407
|
+
|
|
408
|
+
describe("text-only response (no tool call)", () => {
|
|
409
|
+
it("handles LLM responding with text only", async () => {
|
|
410
|
+
const { client, clientTransport, serverTransport } = await createAgentTestPair();
|
|
411
|
+
|
|
412
|
+
try {
|
|
413
|
+
const llm = new ScriptedLLM([
|
|
414
|
+
{
|
|
415
|
+
content: [{ type: "text", text: "I don't need any tools for this." }]
|
|
416
|
+
}
|
|
417
|
+
]);
|
|
418
|
+
|
|
419
|
+
const result = await runAgentLoop({
|
|
420
|
+
client,
|
|
421
|
+
llm,
|
|
422
|
+
query: "Hello, how are you?"
|
|
423
|
+
});
|
|
424
|
+
|
|
425
|
+
expect(result.toolCalls).toHaveLength(0);
|
|
426
|
+
expect(result.toolResults).toHaveLength(0);
|
|
427
|
+
expect(result.finalText).toContain("I don't need any tools for this.");
|
|
428
|
+
} finally {
|
|
429
|
+
await clientTransport.close();
|
|
430
|
+
await serverTransport.close();
|
|
431
|
+
}
|
|
432
|
+
});
|
|
433
|
+
});
|
|
434
|
+
|
|
435
|
+
describe("tool error handling in agent loop", () => {
|
|
436
|
+
it("propagates tool errors back to LLM gracefully", async () => {
|
|
437
|
+
const { client, clientTransport, serverTransport } = await createAgentTestPair({
|
|
438
|
+
getProject: vi.fn().mockRejectedValue(new Error("Network timeout"))
|
|
439
|
+
});
|
|
440
|
+
|
|
441
|
+
try {
|
|
442
|
+
const llm = new ScriptedLLM([
|
|
443
|
+
{
|
|
444
|
+
content: [
|
|
445
|
+
{
|
|
446
|
+
type: "tool_use",
|
|
447
|
+
id: "call-1",
|
|
448
|
+
name: "gitlab_get_project",
|
|
449
|
+
input: { project_id: "broken/project" }
|
|
450
|
+
}
|
|
451
|
+
]
|
|
452
|
+
},
|
|
453
|
+
// LLM sees the error and produces a helpful message
|
|
454
|
+
{
|
|
455
|
+
content: [
|
|
456
|
+
{
|
|
457
|
+
type: "text",
|
|
458
|
+
text: "Sorry, I couldn't fetch the project due to a network error."
|
|
459
|
+
}
|
|
460
|
+
]
|
|
461
|
+
}
|
|
462
|
+
]);
|
|
463
|
+
|
|
464
|
+
const result = await runAgentLoop({
|
|
465
|
+
client,
|
|
466
|
+
llm,
|
|
467
|
+
query: "Get project details"
|
|
468
|
+
});
|
|
469
|
+
|
|
470
|
+
expect(result.toolCalls).toHaveLength(1);
|
|
471
|
+
|
|
472
|
+
// Tool result should be an error
|
|
473
|
+
const toolResult = result.toolResults[0] as { isError?: boolean };
|
|
474
|
+
expect(toolResult.isError).toBe(true);
|
|
475
|
+
|
|
476
|
+
// Agent still completes with final text from the scripted LLM
|
|
477
|
+
expect(result.finalText.join("\n")).toContain("couldn't fetch the project");
|
|
478
|
+
} finally {
|
|
479
|
+
await clientTransport.close();
|
|
480
|
+
await serverTransport.close();
|
|
481
|
+
}
|
|
482
|
+
});
|
|
483
|
+
});
|
|
484
|
+
|
|
485
|
+
describe("tool discovery", () => {
|
|
486
|
+
it("agent discovers tools and LLM receives tool list", async () => {
|
|
487
|
+
const { client, clientTransport, serverTransport } = await createAgentTestPair();
|
|
488
|
+
|
|
489
|
+
try {
|
|
490
|
+
let receivedTools: Array<{ name: string }> = [];
|
|
491
|
+
|
|
492
|
+
const llm: LLM = {
|
|
493
|
+
async create({ tools }) {
|
|
494
|
+
receivedTools = tools;
|
|
495
|
+
return { content: [{ type: "text", text: "Done" }] };
|
|
496
|
+
}
|
|
497
|
+
};
|
|
498
|
+
|
|
499
|
+
await runAgentLoop({ client, llm, query: "test" });
|
|
500
|
+
|
|
501
|
+
expect(receivedTools.length).toBeGreaterThan(0);
|
|
502
|
+
|
|
503
|
+
const names = receivedTools.map((t) => t.name);
|
|
504
|
+
expect(names).toContain("health_check");
|
|
505
|
+
expect(names).toContain("gitlab_get_project");
|
|
506
|
+
expect(names).toContain("gitlab_list_projects");
|
|
507
|
+
} finally {
|
|
508
|
+
await clientTransport.close();
|
|
509
|
+
await serverTransport.close();
|
|
510
|
+
}
|
|
511
|
+
});
|
|
512
|
+
});
|
|
513
|
+
|
|
514
|
+
describe("iteration safety", () => {
|
|
515
|
+
it("respects maxIterations to prevent infinite loops", async () => {
|
|
516
|
+
const { client, clientTransport, serverTransport } = await createAgentTestPair();
|
|
517
|
+
|
|
518
|
+
try {
|
|
519
|
+
// LLM always requests another tool call, never stops
|
|
520
|
+
const infiniteLLM: LLM = {
|
|
521
|
+
callCount: 0,
|
|
522
|
+
async create() {
|
|
523
|
+
(this as { callCount: number }).callCount++;
|
|
524
|
+
return {
|
|
525
|
+
content: [
|
|
526
|
+
{
|
|
527
|
+
type: "tool_use",
|
|
528
|
+
id: `call-${(this as { callCount: number }).callCount}`,
|
|
529
|
+
name: "health_check",
|
|
530
|
+
input: {}
|
|
531
|
+
}
|
|
532
|
+
]
|
|
533
|
+
};
|
|
534
|
+
}
|
|
535
|
+
} as LLM & { callCount: number };
|
|
536
|
+
|
|
537
|
+
const result = await runAgentLoop({
|
|
538
|
+
client,
|
|
539
|
+
llm: infiniteLLM,
|
|
540
|
+
query: "loop forever",
|
|
541
|
+
maxIterations: 3
|
|
542
|
+
});
|
|
543
|
+
|
|
544
|
+
// Should stop after maxIterations
|
|
545
|
+
expect(result.toolCalls).toHaveLength(3);
|
|
546
|
+
} finally {
|
|
547
|
+
await clientTransport.close();
|
|
548
|
+
await serverTransport.close();
|
|
549
|
+
}
|
|
550
|
+
});
|
|
551
|
+
});
|
|
552
|
+
});
|