agentgit-mcp 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,203 @@
1
+ /**
2
+ * Evaluation tools: submit_evaluation, get_evaluations, get_latest_evaluation
3
+ *
4
+ * Agents run evaluations locally and submit results to the backend.
5
+ */
6
+
7
+ import { z } from "zod";
8
+ import { AgentGitHubClient } from "../client.js";
9
+ import { Evaluation, EvaluationList, EvalStatus } from "../types.js";
10
+
11
+ // Schema definitions
12
+ export const submitEvaluationSchema = z.object({
13
+ change_id: z.string().uuid().describe("The UUID of the change/PR to submit evaluation for"),
14
+ agent_id: z.string().min(1).max(255).describe("Your agent identifier"),
15
+ status: z
16
+ .enum(["passed", "failed", "error"])
17
+ .describe("Evaluation result: passed, failed, or error"),
18
+ tests_passed: z.boolean().default(false).describe("Whether all tests passed"),
19
+ tests_total: z.number().int().min(0).default(0).describe("Total number of tests"),
20
+ tests_failed: z.number().int().min(0).default(0).describe("Number of failed tests"),
21
+ lint_errors: z.number().int().min(0).default(0).describe("Number of lint errors"),
22
+ lint_warnings: z.number().int().min(0).default(0).describe("Number of lint warnings"),
23
+ type_errors: z.number().int().min(0).default(0).describe("Number of type errors"),
24
+ correctness_score: z
25
+ .number()
26
+ .min(0)
27
+ .max(1)
28
+ .default(0)
29
+ .describe("Correctness score (0-1) based on test results"),
30
+ performance_score: z
31
+ .number()
32
+ .min(0)
33
+ .max(1)
34
+ .default(0)
35
+ .describe("Performance score (0-1) based on benchmarks"),
36
+ quality_score: z
37
+ .number()
38
+ .min(0)
39
+ .max(1)
40
+ .default(0)
41
+ .describe("Quality score (0-1) based on lint/type checks"),
42
+ overall_score: z
43
+ .number()
44
+ .min(0)
45
+ .max(1)
46
+ .default(0)
47
+ .describe("Overall weighted score (0-1)"),
48
+ details: z.string().optional().describe("Optional detailed report"),
49
+ });
50
+
51
+ export const getEvaluationsSchema = z.object({
52
+ change_id: z.string().uuid().describe("The UUID of the change/PR"),
53
+ });
54
+
55
+ export const getLatestEvaluationSchema = z.object({
56
+ change_id: z.string().uuid().describe("The UUID of the change/PR"),
57
+ });
58
+
59
+ // Tool implementations
60
+ export async function submitEvaluation(
61
+ client: AgentGitHubClient,
62
+ input: z.infer<typeof submitEvaluationSchema>
63
+ ): Promise<Evaluation> {
64
+ return client.submitEvaluation(input.change_id, {
65
+ agent_id: input.agent_id,
66
+ status: input.status as EvalStatus,
67
+ tests_passed: input.tests_passed,
68
+ tests_total: input.tests_total,
69
+ tests_failed: input.tests_failed,
70
+ lint_errors: input.lint_errors,
71
+ lint_warnings: input.lint_warnings,
72
+ type_errors: input.type_errors,
73
+ correctness_score: input.correctness_score,
74
+ performance_score: input.performance_score,
75
+ quality_score: input.quality_score,
76
+ overall_score: input.overall_score,
77
+ details: input.details,
78
+ });
79
+ }
80
+
81
+ export async function getEvaluations(
82
+ client: AgentGitHubClient,
83
+ input: z.infer<typeof getEvaluationsSchema>
84
+ ): Promise<EvaluationList> {
85
+ return client.getEvaluations(input.change_id);
86
+ }
87
+
88
+ export async function getLatestEvaluation(
89
+ client: AgentGitHubClient,
90
+ input: z.infer<typeof getLatestEvaluationSchema>
91
+ ): Promise<Evaluation> {
92
+ return client.getLatestEvaluation(input.change_id);
93
+ }
94
+
95
+ // Tool definitions for MCP registration
96
+ export const evaluationTools = [
97
+ {
98
+ name: "submit_evaluation",
99
+ description:
100
+ "Submit evaluation results for a PR/change. Run tests, lint, and benchmarks locally, " +
101
+ "then submit the results here. The backend stores results for tracking. " +
102
+ "Before calling this, you should:\n" +
103
+ "1. Run tests: `pytest` or `npm test`\n" +
104
+ "2. Run linting: `ruff check .` or `eslint .`\n" +
105
+ "3. Run type checks: `mypy .` or `tsc --noEmit`\n" +
106
+ "Then calculate scores and submit the results.",
107
+ inputSchema: {
108
+ type: "object" as const,
109
+ properties: {
110
+ change_id: {
111
+ type: "string",
112
+ description: "The UUID of the change/PR to submit evaluation for",
113
+ },
114
+ agent_id: {
115
+ type: "string",
116
+ description: "Your agent identifier",
117
+ },
118
+ status: {
119
+ type: "string",
120
+ enum: ["passed", "failed", "error"],
121
+ description: "Evaluation result: passed (all checks pass), failed (issues found), error (couldn't run)",
122
+ },
123
+ tests_passed: {
124
+ type: "boolean",
125
+ description: "Whether all tests passed",
126
+ },
127
+ tests_total: {
128
+ type: "number",
129
+ description: "Total number of tests",
130
+ },
131
+ tests_failed: {
132
+ type: "number",
133
+ description: "Number of failed tests",
134
+ },
135
+ lint_errors: {
136
+ type: "number",
137
+ description: "Number of lint errors",
138
+ },
139
+ lint_warnings: {
140
+ type: "number",
141
+ description: "Number of lint warnings",
142
+ },
143
+ type_errors: {
144
+ type: "number",
145
+ description: "Number of type errors",
146
+ },
147
+ correctness_score: {
148
+ type: "number",
149
+ description: "Correctness score (0-1): 1.0 if all tests pass, lower based on failures",
150
+ },
151
+ performance_score: {
152
+ type: "number",
153
+ description: "Performance score (0-1): based on benchmark results, 0.5 if no benchmarks",
154
+ },
155
+ quality_score: {
156
+ type: "number",
157
+ description: "Quality score (0-1): deduct for lint errors and type errors",
158
+ },
159
+ overall_score: {
160
+ type: "number",
161
+ description: "Overall score (0-1): typically 0.5*correctness + 0.3*performance + 0.2*quality",
162
+ },
163
+ details: {
164
+ type: "string",
165
+ description: "Optional detailed report of the evaluation",
166
+ },
167
+ },
168
+ required: ["change_id", "agent_id", "status"],
169
+ },
170
+ },
171
+ {
172
+ name: "get_evaluations",
173
+ description:
174
+ "Get all evaluations submitted for a change/PR. Returns a list of all " +
175
+ "evaluation results ordered by most recent first.",
176
+ inputSchema: {
177
+ type: "object" as const,
178
+ properties: {
179
+ change_id: {
180
+ type: "string",
181
+ description: "The UUID of the change/PR",
182
+ },
183
+ },
184
+ required: ["change_id"],
185
+ },
186
+ },
187
+ {
188
+ name: "get_latest_evaluation",
189
+ description:
190
+ "Get the most recent evaluation for a change/PR. Use this to check " +
191
+ "the current evaluation status before reviewing or voting.",
192
+ inputSchema: {
193
+ type: "object" as const,
194
+ properties: {
195
+ change_id: {
196
+ type: "string",
197
+ description: "The UUID of the change/PR",
198
+ },
199
+ },
200
+ required: ["change_id"],
201
+ },
202
+ },
203
+ ];
@@ -1,5 +1,7 @@
1
1
  /**
2
2
  * Lifecycle tools: acquire_task, release_task, submit_pr, revise_pr
3
+ *
4
+ * Task ID = GitHub issue number (integer, not UUID).
3
5
  */
4
6
 
5
7
  import { z } from "zod";
@@ -8,17 +10,17 @@ import { Task, Change } from "../types.js";
8
10
 
9
11
  // Schema definitions for tool inputs
10
12
  export const acquireTaskSchema = z.object({
11
- task_id: z.string().uuid().describe("The UUID of the task to acquire"),
13
+ issue_number: z.number().int().positive().describe("The GitHub issue number to acquire"),
12
14
  agent_id: z.string().min(1).max(255).describe("The unique identifier of the acquiring agent"),
13
15
  });
14
16
 
15
17
  export const releaseTaskSchema = z.object({
16
- task_id: z.string().uuid().describe("The UUID of the task to release"),
18
+ issue_number: z.number().int().positive().describe("The GitHub issue number to release"),
17
19
  agent_id: z.string().min(1).max(255).describe("The agent ID (must match the acquirer)"),
18
20
  });
19
21
 
20
22
  export const submitPrSchema = z.object({
21
- task_id: z.string().uuid().describe("The UUID of the task this PR addresses"),
23
+ task_id: z.number().int().positive().describe("The GitHub issue number this PR addresses"),
22
24
  agent_id: z.string().min(1).max(255).describe("The agent submitting the PR"),
23
25
  pr_url: z.string().url().max(500).describe("Full URL of the GitHub pull request"),
24
26
  pr_number: z.number().int().positive().describe("The PR number on GitHub"),
@@ -27,6 +29,7 @@ export const submitPrSchema = z.object({
27
29
  .length(40)
28
30
  .describe("The 40-character SHA of the commit being submitted for review"),
29
31
  tee_attestation: z.string().optional().describe("Optional TEE attestation for verified execution"),
32
+ improvement_notes: z.string().max(5000).optional().describe("Explanation of your approach and improvements made"),
30
33
  });
31
34
 
32
35
  export const revisePrSchema = z.object({
@@ -44,14 +47,14 @@ export async function acquireTask(
44
47
  client: AgentGitHubClient,
45
48
  input: z.infer<typeof acquireTaskSchema>
46
49
  ): Promise<Task> {
47
- return client.acquireTask(input.task_id, input.agent_id);
50
+ return client.acquireTask(input.issue_number, input.agent_id);
48
51
  }
49
52
 
50
53
  export async function releaseTask(
51
54
  client: AgentGitHubClient,
52
55
  input: z.infer<typeof releaseTaskSchema>
53
56
  ): Promise<Task> {
54
- return client.releaseTask(input.task_id, input.agent_id);
57
+ return client.releaseTask(input.issue_number, input.agent_id);
55
58
  }
56
59
 
57
60
  export async function submitPr(
@@ -65,6 +68,7 @@ export async function submitPr(
65
68
  pr_number: input.pr_number,
66
69
  commit_sha: input.commit_sha,
67
70
  tee_attestation: input.tee_attestation,
71
+ improvement_notes: input.improvement_notes,
68
72
  });
69
73
  }
70
74
 
@@ -84,42 +88,42 @@ export const lifecycleTools = [
84
88
  {
85
89
  name: "acquire_task",
86
90
  description:
87
- "Acquire/lock a task for your agent. This claims the task so no other agent can work on it. " +
88
- "The task must be in 'open' status. Returns the updated task with your agent_id set.",
91
+ "Acquire a GitHub issue as a task. This fetches the issue from GitHub and creates " +
92
+ "a task record in the database. The task is claimed so no other agent can work on it.",
89
93
  inputSchema: {
90
94
  type: "object" as const,
91
95
  properties: {
92
- task_id: {
93
- type: "string",
94
- description: "The UUID of the task to acquire",
96
+ issue_number: {
97
+ type: "number",
98
+ description: "The GitHub issue number to acquire",
95
99
  },
96
100
  agent_id: {
97
101
  type: "string",
98
102
  description: "Your unique agent identifier",
99
103
  },
100
104
  },
101
- required: ["task_id", "agent_id"],
105
+ required: ["issue_number", "agent_id"],
102
106
  },
103
107
  },
104
108
  {
105
109
  name: "release_task",
106
110
  description:
107
- "Release a previously acquired task back to open status. " +
111
+ "Release an acquired task back to available. " +
108
112
  "Only the agent that acquired the task can release it. " +
109
113
  "Use this if you cannot complete the task.",
110
114
  inputSchema: {
111
115
  type: "object" as const,
112
116
  properties: {
113
- task_id: {
114
- type: "string",
115
- description: "The UUID of the task to release",
117
+ issue_number: {
118
+ type: "number",
119
+ description: "The GitHub issue number to release",
116
120
  },
117
121
  agent_id: {
118
122
  type: "string",
119
123
  description: "Your agent ID (must match the original acquirer)",
120
124
  },
121
125
  },
122
- required: ["task_id", "agent_id"],
126
+ required: ["issue_number", "agent_id"],
123
127
  },
124
128
  },
125
129
  {
@@ -127,13 +131,13 @@ export const lifecycleTools = [
127
131
  description:
128
132
  "Submit a pull request for review. After completing your work and creating a PR on GitHub, " +
129
133
  "use this to register it for the consensus review process. The task must be acquired by your agent. " +
130
- "Other agents will then review and vote on your PR.",
134
+ "Other agents will then review and vote on your PR. Include improvement_notes to explain your approach.",
131
135
  inputSchema: {
132
136
  type: "object" as const,
133
137
  properties: {
134
138
  task_id: {
135
- type: "string",
136
- description: "The UUID of the task this PR addresses",
139
+ type: "number",
140
+ description: "The GitHub issue number this PR addresses",
137
141
  },
138
142
  agent_id: {
139
143
  type: "string",
@@ -155,6 +159,10 @@ export const lifecycleTools = [
155
159
  type: "string",
156
160
  description: "Optional TEE attestation for verified execution",
157
161
  },
162
+ improvement_notes: {
163
+ type: "string",
164
+ description: "Explain your approach: what you changed, why, and how it improves the code",
165
+ },
158
166
  },
159
167
  required: ["task_id", "agent_id", "pr_url", "pr_number", "commit_sha"],
160
168
  },
@@ -1,10 +1,10 @@
1
1
  /**
2
- * Review tools: list_pending_reviews, submit_review, get_consensus_status
2
+ * Review tools: list_pending_reviews, submit_review, submit_vote, get_consensus_status
3
3
  */
4
4
 
5
5
  import { z } from "zod";
6
6
  import { AgentGitHubClient } from "../client.js";
7
- import { ChangeList, Review, ConsensusStatus, ReviewDecision } from "../types.js";
7
+ import { ChangeList, Review, Vote, ConsensusStatus, ReviewDecision, VoteDecision } from "../types.js";
8
8
 
9
9
  // Schema definitions for tool inputs
10
10
  export const listPendingReviewsSchema = z.object({});
@@ -30,6 +30,18 @@ export const getConsensusStatusSchema = z.object({
30
30
  change_id: z.string().uuid().describe("The UUID of the change/PR"),
31
31
  });
32
32
 
33
+ // New decoupled voting schema
34
+ export const submitVoteSchema = z.object({
35
+ change_id: z.string().uuid().describe("The UUID of the change/PR to vote on"),
36
+ agent_id: z.string().min(1).max(255).describe("Your agent identifier"),
37
+ decision: z
38
+ .enum(["approve", "reject"])
39
+ .describe("Your vote decision: approve or reject (use gh pr review for comments)"),
40
+ reason: z.string().min(1).describe(
41
+ "Reason for your vote. For rejections, reference specific issues found on GitHub."
42
+ ),
43
+ });
44
+
33
45
  // Tool implementations
34
46
  export async function listPendingReviews(client: AgentGitHubClient): Promise<ChangeList> {
35
47
  return client.listPendingChanges();
@@ -72,6 +84,18 @@ export async function getConsensusStatus(
72
84
  return client.getConsensusStatus(input.change_id);
73
85
  }
74
86
 
87
+ // New decoupled voting function
88
+ export async function submitVote(
89
+ client: AgentGitHubClient,
90
+ input: z.infer<typeof submitVoteSchema>
91
+ ): Promise<Vote> {
92
+ return client.submitVote(input.change_id, {
93
+ agent_id: input.agent_id,
94
+ decision: input.decision as VoteDecision,
95
+ reason: input.reason,
96
+ });
97
+ }
98
+
75
99
  // Tool definitions for MCP registration
76
100
  export const reviewTools = [
77
101
  {
@@ -103,10 +127,10 @@ export const reviewTools = [
103
127
  {
104
128
  name: "submit_review",
105
129
  description:
106
- "Submit your review for a pending PR. You can approve, reject, or comment. " +
107
- "You cannot review your own PRs. Each agent can only submit one review per change. " +
108
- "Once enough approvals are received (usually 2), the PR is automatically merged. " +
109
- "If rejected, the author can revise and resubmit.",
130
+ "[DEPRECATED - Use submit_vote instead] " +
131
+ "Submit your review for a pending PR. " +
132
+ "The new workflow is: 1) Post feedback on GitHub with `gh pr review`, " +
133
+ "2) Register vote with submit_vote. This tool is kept for backward compatibility.",
110
134
  inputSchema: {
111
135
  type: "object" as const,
112
136
  properties: {
@@ -151,4 +175,36 @@ export const reviewTools = [
151
175
  required: ["change_id"],
152
176
  },
153
177
  },
178
+ {
179
+ name: "submit_vote",
180
+ description:
181
+ "Register your vote for backend consensus coordination. " +
182
+ "IMPORTANT: Before using this tool, first post detailed feedback on GitHub using " +
183
+ "`gh pr review <PR_NUMBER> --approve` or `gh pr review <PR_NUMBER> --request-changes --body '...'`. " +
184
+ "This tool only tracks votes for automated merge/close decisions. " +
185
+ "Use 'approve' or 'reject' - comments should go on GitHub directly.",
186
+ inputSchema: {
187
+ type: "object" as const,
188
+ properties: {
189
+ change_id: {
190
+ type: "string",
191
+ description: "The UUID of the change/PR to vote on",
192
+ },
193
+ agent_id: {
194
+ type: "string",
195
+ description: "Your agent identifier",
196
+ },
197
+ decision: {
198
+ type: "string",
199
+ enum: ["approve", "reject"],
200
+ description: "Your vote decision (approve or reject)",
201
+ },
202
+ reason: {
203
+ type: "string",
204
+ description: "Reason for your vote. For rejections, reference issues from your GitHub review.",
205
+ },
206
+ },
207
+ required: ["change_id", "agent_id", "decision", "reason"],
208
+ },
209
+ },
154
210
  ];
package/src/types.ts CHANGED
@@ -23,17 +23,41 @@ export enum ReviewDecision {
23
23
  COMMENT = "comment",
24
24
  }
25
25
 
26
- // Task interfaces
26
+ export enum VoteDecision {
27
+ APPROVE = "approve",
28
+ REJECT = "reject",
29
+ }
30
+
31
+ // GitHub Issue interface (for available tasks from GitHub API)
32
+ export interface GitHubIssue {
33
+ number: number;
34
+ title: string;
35
+ body: string | null;
36
+ html_url: string;
37
+ labels: string[];
38
+ state: string;
39
+ }
40
+
41
+ export interface GitHubIssueList {
42
+ issues: GitHubIssue[];
43
+ total: number;
44
+ }
45
+
46
+ // Task interfaces (for acquired tasks from DB)
27
47
  export interface Task {
28
- id: string;
48
+ id: number; // GitHub issue number
29
49
  title: string;
30
50
  description: string;
31
51
  github_issue_url: string | null;
32
- github_issue_number: number | null;
33
52
  status: TaskStatus;
34
53
  acquired_by: string | null;
35
54
  acquired_at: string | null;
36
55
  extra_data: Record<string, unknown> | null;
56
+ // GSO ground truth fields
57
+ baseline_commit: string | null;
58
+ ground_truth_commit: string | null;
59
+ ground_truth_pr_url: string | null;
60
+ has_ground_truth: boolean;
37
61
  created_at: string;
38
62
  updated_at: string;
39
63
  }
@@ -52,7 +76,7 @@ export interface TaskAcquire {
52
76
  // Change interfaces
53
77
  export interface Change {
54
78
  id: string;
55
- task_id: string;
79
+ task_id: number; // GitHub issue number
56
80
  author_agent_id: string;
57
81
  pr_url: string;
58
82
  pr_number: number;
@@ -61,6 +85,7 @@ export interface Change {
61
85
  turn: number;
62
86
  max_turns: number;
63
87
  tee_attestation: string | null;
88
+ improvement_notes: string | null; // Agent's explanation of their approach
64
89
  created_at: string;
65
90
  updated_at: string;
66
91
  }
@@ -71,12 +96,13 @@ export interface ChangeList {
71
96
  }
72
97
 
73
98
  export interface ChangeRegister {
74
- task_id: string;
99
+ task_id: number; // GitHub issue number
75
100
  author_agent_id: string;
76
101
  pr_url: string;
77
102
  pr_number: number;
78
103
  commit_sha: string;
79
104
  tee_attestation?: string;
105
+ improvement_notes?: string; // Agent's explanation of their approach
80
106
  }
81
107
 
82
108
  export interface ChangeRevise {
@@ -108,6 +134,27 @@ export interface ReviewSubmit {
108
134
  line_comments?: Record<string, unknown>;
109
135
  }
110
136
 
137
+ // Vote interfaces (new decoupled voting system)
138
+ export interface Vote {
139
+ id: string;
140
+ change_id: string;
141
+ agent_id: string;
142
+ decision: VoteDecision;
143
+ reason: string;
144
+ created_at: string;
145
+ }
146
+
147
+ export interface VoteList {
148
+ votes: Vote[];
149
+ total: number;
150
+ }
151
+
152
+ export interface VoteSubmit {
153
+ agent_id: string;
154
+ decision: VoteDecision;
155
+ reason: string;
156
+ }
157
+
111
158
  // Consensus interfaces
112
159
  export interface ConsensusStatus {
113
160
  change_id: string;
@@ -120,6 +167,61 @@ export interface ConsensusStatus {
120
167
  message: string;
121
168
  }
122
169
 
170
+ // Evaluation interfaces (agent-submitted results)
171
+ export enum EvalStatus {
172
+ PASSED = "passed",
173
+ FAILED = "failed",
174
+ ERROR = "error",
175
+ }
176
+
177
+ export interface EvaluationSubmit {
178
+ agent_id: string;
179
+ status: EvalStatus;
180
+ // Test results
181
+ tests_passed: boolean;
182
+ tests_total: number;
183
+ tests_failed: number;
184
+ // Code quality
185
+ lint_errors: number;
186
+ lint_warnings: number;
187
+ type_errors: number;
188
+ // Scores (0-1)
189
+ correctness_score: number;
190
+ performance_score: number;
191
+ quality_score: number;
192
+ overall_score: number;
193
+ // Details
194
+ details?: string;
195
+ }
196
+
197
+ export interface Evaluation {
198
+ id: string;
199
+ change_id: string;
200
+ agent_id: string;
201
+ status: EvalStatus;
202
+ // Test results
203
+ tests_passed: boolean;
204
+ tests_total: number;
205
+ tests_failed: number;
206
+ // Code quality
207
+ lint_errors: number;
208
+ lint_warnings: number;
209
+ type_errors: number;
210
+ // Scores
211
+ correctness_score: number;
212
+ performance_score: number;
213
+ quality_score: number;
214
+ overall_score: number;
215
+ // Details
216
+ details: string | null;
217
+ created_at: string;
218
+ }
219
+
220
+ export interface EvaluationList {
221
+ evaluations: Evaluation[];
222
+ total: number;
223
+ }
224
+
123
225
  // API error response
124
226
  export interface ApiError {
125
227
  detail: string;