glide-mcp 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- glide_mcp-0.1.1.dist-info/METADATA +67 -0
- glide_mcp-0.1.1.dist-info/RECORD +9 -0
- glide_mcp-0.1.1.dist-info/WHEEL +4 -0
- glide_mcp-0.1.1.dist-info/entry_points.txt +2 -0
- glide_mcp-0.1.1.dist-info/licenses/LICENSE +21 -0
- src/core/LLM/cerebras_inference.py +111 -0
- src/kite_exclusive/commit_splitter/prompts/prompt1.md +11 -0
- src/kite_exclusive/commit_splitter/services/voyage_service.py +49 -0
- src/mcp/app.py +492 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: glide-mcp
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: mcp server that tries to save you from git troubles
|
|
5
|
+
License-File: LICENSE
|
|
6
|
+
Requires-Python: >=3.13
|
|
7
|
+
Requires-Dist: black>=25.9.0
|
|
8
|
+
Requires-Dist: cerebras-cloud-sdk>=1.56.1
|
|
9
|
+
Requires-Dist: fastmcp>=2.12.5
|
|
10
|
+
Requires-Dist: helix-py>=0.2.30
|
|
11
|
+
Requires-Dist: numpy>=2.3.4
|
|
12
|
+
Requires-Dist: pytest-asyncio>=1.2.0
|
|
13
|
+
Requires-Dist: pytest>=8.4.2
|
|
14
|
+
Requires-Dist: python-dotenv>=1.1.1
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
|
|
17
|
+
## How to Use
|
|
18
|
+
|
|
19
|
+
### 1. Clone the repository
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
git clone https://github.com/SoarAILabs/glide.git
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
### 2. Navigate to the project directory
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
cd glide
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
### 3. Start the server
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
uv run python -m src.mcp.app
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
> **Note:** Currently, only [Cursor](https://www.cursor.so/) is supported as the MCP Client.
|
|
38
|
+
|
|
39
|
+
### 4. Configure Cursor to use your local MCP server
|
|
40
|
+
|
|
41
|
+
**One-Click Install:**
|
|
42
|
+
|
|
43
|
+
[](https://cursor.com/en-US/install-mcp?name=glide&config=eyJjb21tYW5kIjoidXYgcnVuIC0tZGlyZWN0b3J5IFBBVEhcXHRvXFx5b3VyXFxnbGlkZVxcZGlyZWN0b3J5IHB5dGhvbiAtbSBzcmMubWNwLmFwcCJ9)
|
|
44
|
+
|
|
45
|
+
**Manual Installation:**
|
|
46
|
+
|
|
47
|
+
Add the following to your `mcp.json` configuration in Cursor:
|
|
48
|
+
|
|
49
|
+
```json
|
|
50
|
+
{
|
|
51
|
+
"mcpServers": {
|
|
52
|
+
"glide": {
|
|
53
|
+
"url": "http://127.0.0.1:8000/mcp"
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
> **Note:** The port (`8000` above) is just an example.
|
|
60
|
+
> To use a different port, open `src/mcp/app.py` and update the following lines accordingly:
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
if __name__ == "__main__":
|
|
64
|
+
mcp.run(transport="streamable-http", host="127.0.0.1", port=8000)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Replace `8000` with your desired port number.
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
src/core/LLM/cerebras_inference.py,sha256=zO4ff4Urhk195gQvZouXYWhtz8P-gSJJAzWzdUjvkTQ,3481
|
|
2
|
+
src/kite_exclusive/commit_splitter/prompts/prompt1.md,sha256=eOTX0H_n0vOuyfanEvbC8MAj4pF7W7FMiCdJw3zSL5g,1255
|
|
3
|
+
src/kite_exclusive/commit_splitter/services/voyage_service.py,sha256=dgAMRypj2uHhDJOJx-dE4bKvRawiApHKbuHOvbNBTLE,1639
|
|
4
|
+
src/mcp/app.py,sha256=JeRldSYEroigMKdLZTAY9Sz-Q1nHgryT26mrvyGzi60,21131
|
|
5
|
+
glide_mcp-0.1.1.dist-info/METADATA,sha256=fLblWuESXH_7xznuEfTczj5HGoPPScwIcCXChpR45aY,1622
|
|
6
|
+
glide_mcp-0.1.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
7
|
+
glide_mcp-0.1.1.dist-info/entry_points.txt,sha256=o7BktmIS8150BSx3cwAOaVL6ntLoEv3mCxyDkcoeUhc,47
|
|
8
|
+
glide_mcp-0.1.1.dist-info/licenses/LICENSE,sha256=bqVuW787bFw2uBL31Xlee7ydibnr_8TkCWlHdi7LXEM,1067
|
|
9
|
+
glide_mcp-0.1.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 SoarAILabs
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import asyncio
|
|
3
|
+
from typing import Any, Dict, List, Optional
|
|
4
|
+
from dotenv import load_dotenv
|
|
5
|
+
from cerebras.cloud.sdk import AsyncCerebras
|
|
6
|
+
|
|
7
|
+
load_dotenv()
|
|
8
|
+
|
|
9
|
+
# Default model; override per-call via the `model` argument
|
|
10
|
+
DEFAULT_MODEL_ID: str = os.getenv("CEREBRAS_MODEL_ID", "qwen-3-32b")
|
|
11
|
+
|
|
12
|
+
_async_client: Optional[AsyncCerebras] = None
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _get_api_key(explicit_api_key: Optional[str] = None) -> str:
|
|
16
|
+
api_key = explicit_api_key or os.getenv("CEREBRAS_API_KEY", "")
|
|
17
|
+
if not api_key:
|
|
18
|
+
raise RuntimeError(
|
|
19
|
+
"CEREBRAS_API_KEY is not set. Set it in the environment or pass api_key explicitly."
|
|
20
|
+
)
|
|
21
|
+
return api_key
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def init_cerebras_async_client(api_key: Optional[str] = None) -> AsyncCerebras:
|
|
25
|
+
"""
|
|
26
|
+
Initialize and cache a global AsyncCerebras client.
|
|
27
|
+
|
|
28
|
+
Safe to call multiple times; subsequent calls return the cached instance.
|
|
29
|
+
"""
|
|
30
|
+
global _async_client
|
|
31
|
+
if _async_client is None:
|
|
32
|
+
_async_client = AsyncCerebras(api_key=_get_api_key(api_key))
|
|
33
|
+
return _async_client
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_cerebras_async_client() -> AsyncCerebras:
|
|
37
|
+
"""Return the cached AsyncCerebras client, initializing it if needed."""
|
|
38
|
+
return init_cerebras_async_client()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
async def chat(
|
|
42
|
+
messages: List[Dict[str, str]],
|
|
43
|
+
*,
|
|
44
|
+
model: Optional[str] = None,
|
|
45
|
+
temperature: Optional[float] = None,
|
|
46
|
+
max_tokens: Optional[int] = None,
|
|
47
|
+
api_key: Optional[str] = None,
|
|
48
|
+
extra_params: Optional[Dict[str, Any]] = None,
|
|
49
|
+
) -> str:
|
|
50
|
+
"""
|
|
51
|
+
Send a structured chat to Cerebras and return the assistant's message content.
|
|
52
|
+
|
|
53
|
+
messages: List of {"role": "user"|"system"|"assistant", "content": str}
|
|
54
|
+
model: Model name; defaults to DEFAULT_MODEL_ID
|
|
55
|
+
temperature, max_tokens: Optional generation controls
|
|
56
|
+
api_key: Optional override for API key (avoids relying on env)
|
|
57
|
+
extra_params: Additional keyword arguments passed through to the API
|
|
58
|
+
"""
|
|
59
|
+
client = init_cerebras_async_client(api_key)
|
|
60
|
+
response = await client.chat.completions.create(
|
|
61
|
+
messages=messages,
|
|
62
|
+
model=model or DEFAULT_MODEL_ID,
|
|
63
|
+
**({"temperature": temperature} if temperature is not None else {}),
|
|
64
|
+
**({"max_tokens": max_tokens} if max_tokens is not None else {}),
|
|
65
|
+
**(extra_params or {}),
|
|
66
|
+
)
|
|
67
|
+
return response.choices[0].message.content
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
async def complete(
|
|
71
|
+
prompt: str,
|
|
72
|
+
*,
|
|
73
|
+
system: Optional[str] = None,
|
|
74
|
+
model: Optional[str] = None,
|
|
75
|
+
temperature: Optional[float] = None,
|
|
76
|
+
max_tokens: Optional[int] = None,
|
|
77
|
+
api_key: Optional[str] = None,
|
|
78
|
+
extra_params: Optional[Dict[str, Any]] = None,
|
|
79
|
+
) -> str:
|
|
80
|
+
"""
|
|
81
|
+
Convenience wrapper for single-turn prompts. Builds messages from `system` and `prompt`.
|
|
82
|
+
"""
|
|
83
|
+
messages: List[Dict[str, str]] = []
|
|
84
|
+
if system:
|
|
85
|
+
messages.append({"role": "system", "content": system})
|
|
86
|
+
messages.append({"role": "user", "content": prompt})
|
|
87
|
+
return await chat(
|
|
88
|
+
messages,
|
|
89
|
+
model=model,
|
|
90
|
+
temperature=temperature,
|
|
91
|
+
max_tokens=max_tokens,
|
|
92
|
+
api_key=api_key,
|
|
93
|
+
extra_params=extra_params,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
__all__ = [
|
|
98
|
+
"init_cerebras_async_client",
|
|
99
|
+
"get_cerebras_async_client",
|
|
100
|
+
"chat",
|
|
101
|
+
"complete",
|
|
102
|
+
"DEFAULT_MODEL_ID",
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
if __name__ == "__main__":
|
|
107
|
+
async def _demo() -> None:
|
|
108
|
+
reply = await complete("Why is fast inference important?")
|
|
109
|
+
print(reply)
|
|
110
|
+
|
|
111
|
+
asyncio.run(_demo())
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
You are the assistant of Glide by the Interaction Company of California. You are the "execution engine" of Glide, helping split large commits for Glide-MCP, while the MCP client talks to the user. Your job is to execute and accomplish a goal, and you do not have direct access to the user.
|
|
2
|
+
|
|
3
|
+
Your final output is directed to Glide MCP Client, which handles user conversations and presents your results to the user. Focus on providing Glide with adequate contextual information; you are not responsible for framing responses in a user-friendly way.
|
|
4
|
+
|
|
5
|
+
If it needs more data from Glide or the user, you should also include it in your final output message.
|
|
6
|
+
|
|
7
|
+
If you ever need to send a message to the user, you should tell Glide to forward that message to the user.
|
|
8
|
+
|
|
9
|
+
You should seek to accomplish tasks with as much parallelism as possible. If tasks don't need to be sequential, launch them in parallel. This includes spawning multiple subagents simultaneously for both search operations and MCP integrations when the information could be found in multiple sources.
|
|
10
|
+
|
|
11
|
+
EXTREMELY IMPORTANT: Never make up information if you can't find it. If you can't find something or you aren't sure about something, relay this to the inbound agent instead of guessing.
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from helix.embedding.voyageai_client import VoyageAIEmbedder
|
|
2
|
+
from helix import Chunk
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
voyage_embedder = VoyageAIEmbedder()
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def embed_code(code: str, file_path: str = None):
|
|
9
|
+
|
|
10
|
+
# For diffs, use token_chunk instead of code_chunk since diffs are text format
|
|
11
|
+
# and code_chunk has API compatibility issues
|
|
12
|
+
try:
|
|
13
|
+
# Try code_chunk first if we have a valid language
|
|
14
|
+
if file_path:
|
|
15
|
+
ext = os.path.splitext(file_path)[1].lstrip(".")
|
|
16
|
+
lang_map = {
|
|
17
|
+
"py": "python",
|
|
18
|
+
"js": "javascript",
|
|
19
|
+
"ts": "typescript",
|
|
20
|
+
"jsx": "javascript",
|
|
21
|
+
"tsx": "typescript",
|
|
22
|
+
"java": "java",
|
|
23
|
+
"cpp": "cpp",
|
|
24
|
+
"c": "c",
|
|
25
|
+
"cs": "csharp",
|
|
26
|
+
"go": "go",
|
|
27
|
+
"rs": "rust",
|
|
28
|
+
"rb": "ruby",
|
|
29
|
+
"php": "php",
|
|
30
|
+
"swift": "swift",
|
|
31
|
+
"kt": "kotlin",
|
|
32
|
+
"scala": "scala",
|
|
33
|
+
"sh": "bash",
|
|
34
|
+
"hx": "python",
|
|
35
|
+
}
|
|
36
|
+
language = lang_map.get(ext.lower())
|
|
37
|
+
if language:
|
|
38
|
+
code_chunks = Chunk.code_chunk(code, language=language)
|
|
39
|
+
else:
|
|
40
|
+
code_chunks = Chunk.token_chunk(code)
|
|
41
|
+
else:
|
|
42
|
+
code_chunks = Chunk.token_chunk(code)
|
|
43
|
+
except Exception:
|
|
44
|
+
# Fallback to token_chunk if code_chunk fails
|
|
45
|
+
code_chunks = Chunk.token_chunk(code)
|
|
46
|
+
|
|
47
|
+
code_embeddings = voyage_embedder.embed_batch([f"{code_chunks}"])
|
|
48
|
+
|
|
49
|
+
return code_embeddings
|
src/mcp/app.py
ADDED
|
@@ -0,0 +1,492 @@
|
|
|
1
|
+
from src.kite_exclusive.commit_splitter.services.voyage_service import embed_code
|
|
2
|
+
from src.core.LLM.cerebras_inference import complete
|
|
3
|
+
from typing import Any, Dict, List, Tuple
|
|
4
|
+
import subprocess
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import asyncio
|
|
8
|
+
from dotenv import load_dotenv
|
|
9
|
+
import helix
|
|
10
|
+
from fastmcp import FastMCP
|
|
11
|
+
load_dotenv()
|
|
12
|
+
|
|
13
|
+
mcp = FastMCP[Any]("glide")
|
|
14
|
+
|
|
15
|
+
HELIX_API_ENDPOINT = os.getenv("HELIX_API_ENDPOINT", "")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
async def find_git_root(start_path: str = None) -> str:
|
|
19
|
+
"""
|
|
20
|
+
Find the git repository root directory.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
start_path: Directory to start searching from (defaults to current working directory)
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
Path to the git repository root, or None if not in a git repository
|
|
27
|
+
"""
|
|
28
|
+
# First, try to get workspace root from common environment variables
|
|
29
|
+
# MCP clients like Cursor might set these
|
|
30
|
+
env_vars = [
|
|
31
|
+
"MCP_WORKSPACE_ROOT",
|
|
32
|
+
"CURSOR_WORKSPACE_ROOT",
|
|
33
|
+
"WORKSPACE_ROOT",
|
|
34
|
+
"WORKSPACE_FOLDER",
|
|
35
|
+
"PROJECT_ROOT"
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
for env_var in env_vars:
|
|
39
|
+
workspace_from_env = os.getenv(env_var)
|
|
40
|
+
if workspace_from_env and os.path.isdir(workspace_from_env):
|
|
41
|
+
# Try to find git root starting from this directory
|
|
42
|
+
start_path = workspace_from_env
|
|
43
|
+
break
|
|
44
|
+
|
|
45
|
+
if start_path is None:
|
|
46
|
+
start_path = os.getcwd()
|
|
47
|
+
|
|
48
|
+
# Use git rev-parse --show-toplevel to find the git root
|
|
49
|
+
# Use asyncio.create_subprocess_exec directly to avoid circular dependency
|
|
50
|
+
try:
|
|
51
|
+
process = await asyncio.create_subprocess_exec(
|
|
52
|
+
"git",
|
|
53
|
+
"rev-parse",
|
|
54
|
+
"--show-toplevel",
|
|
55
|
+
cwd=start_path,
|
|
56
|
+
stdout=asyncio.subprocess.PIPE,
|
|
57
|
+
stderr=asyncio.subprocess.PIPE,
|
|
58
|
+
stdin=asyncio.subprocess.DEVNULL,
|
|
59
|
+
)
|
|
60
|
+
stdout_data, stderr_data = await process.communicate()
|
|
61
|
+
|
|
62
|
+
if process.returncode == 0:
|
|
63
|
+
git_root = stdout_data.decode('utf-8').strip()
|
|
64
|
+
if git_root:
|
|
65
|
+
return git_root
|
|
66
|
+
except (FileNotFoundError, OSError):
|
|
67
|
+
# Git not found or other OS error
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# Helper function to run subprocess calls asynchronously to avoid blocking stdio
|
|
74
|
+
async def run_subprocess(args: List[str], **kwargs) -> subprocess.CompletedProcess:
|
|
75
|
+
"""Run subprocess calls asynchronously to avoid blocking stdio transport."""
|
|
76
|
+
# Use asyncio.create_subprocess_exec instead of subprocess.run to avoid blocking
|
|
77
|
+
capture_output = kwargs.pop('capture_output', False)
|
|
78
|
+
text = kwargs.pop('text', False)
|
|
79
|
+
check = kwargs.pop('check', False) # Handle check parameter separately
|
|
80
|
+
|
|
81
|
+
# CRITICAL: Set stdin to DEVNULL to prevent subprocess from inheriting
|
|
82
|
+
# the MCP stdio stdin, which causes deadlocks
|
|
83
|
+
stdin = kwargs.pop('stdin', asyncio.subprocess.DEVNULL)
|
|
84
|
+
|
|
85
|
+
# CRITICAL: Always capture stdout/stderr to PIPE to prevent subprocess output
|
|
86
|
+
# from leaking into the MCP stdio communication channel (which breaks JSON parsing)
|
|
87
|
+
# In stdio mode, parent's stdout/stderr IS the MCP communication channel, so we must
|
|
88
|
+
# always capture subprocess output to prevent git messages from breaking JSON protocol
|
|
89
|
+
stdout = asyncio.subprocess.PIPE
|
|
90
|
+
stderr = asyncio.subprocess.PIPE
|
|
91
|
+
# Remove any stdout/stderr from kwargs since we're overriding them
|
|
92
|
+
kwargs.pop('stdout', None)
|
|
93
|
+
kwargs.pop('stderr', None)
|
|
94
|
+
|
|
95
|
+
# Only pass valid parameters to asyncio.create_subprocess_exec
|
|
96
|
+
# Filter out any subprocess.run() specific parameters that aren't valid
|
|
97
|
+
# Explicitly remove check and other invalid params to prevent errors
|
|
98
|
+
kwargs.pop('check', None) # Extra safety: ensure check is removed
|
|
99
|
+
kwargs.pop('timeout', None) # timeout handled by asyncio.wait_for elsewhere
|
|
100
|
+
kwargs.pop('input', None) # input not supported in async subprocess
|
|
101
|
+
|
|
102
|
+
valid_exec_kwargs = {}
|
|
103
|
+
allowed_params = {'cwd', 'env', 'start_new_session', 'shell', 'preexec_fn',
|
|
104
|
+
'executable', 'bufsize', 'close_fds', 'pass_fds',
|
|
105
|
+
'restore_signals', 'umask', 'limit', 'creationflags'}
|
|
106
|
+
for key, value in kwargs.items():
|
|
107
|
+
if key in allowed_params:
|
|
108
|
+
valid_exec_kwargs[key] = value
|
|
109
|
+
# Silently ignore other parameters
|
|
110
|
+
|
|
111
|
+
# Final safety check: ensure check is not in valid_exec_kwargs
|
|
112
|
+
assert 'check' not in valid_exec_kwargs, "check parameter should not be passed to subprocess"
|
|
113
|
+
|
|
114
|
+
process = await asyncio.create_subprocess_exec(
|
|
115
|
+
*args,
|
|
116
|
+
stdin=stdin,
|
|
117
|
+
stdout=stdout,
|
|
118
|
+
stderr=stderr,
|
|
119
|
+
**valid_exec_kwargs
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
stdout_data, stderr_data = await process.communicate()
|
|
123
|
+
|
|
124
|
+
# Create a CompletedProcess-like object
|
|
125
|
+
result = subprocess.CompletedProcess(
|
|
126
|
+
args=args,
|
|
127
|
+
returncode=process.returncode,
|
|
128
|
+
stdout=stdout_data.decode('utf-8') if text and stdout_data else stdout_data,
|
|
129
|
+
stderr=stderr_data.decode('utf-8') if text and stderr_data else stderr_data,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# If check=True, raise CalledProcessError on non-zero return code
|
|
133
|
+
if check and result.returncode != 0:
|
|
134
|
+
raise subprocess.CalledProcessError(
|
|
135
|
+
result.returncode, args, result.stdout, result.stderr
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
return result
|
|
139
|
+
|
|
140
|
+
@mcp.tool
|
|
141
|
+
async def draft_pr():
|
|
142
|
+
instructions = [
|
|
143
|
+
"step 1: grep for CONTRIBUTING.md or similar documentation in the repository. If unable to find it, look for any contributing guidelines in the repository.",
|
|
144
|
+
"step 2: if not found, follow best practices for writing a pull request.",
|
|
145
|
+
"step 3: use the edit file tool to write a new PR_DRAFT.md file for the project.",
|
|
146
|
+
]
|
|
147
|
+
result = "draft pr instructions: \n\n"
|
|
148
|
+
for i, instruction in enumerate(instructions, 1):
|
|
149
|
+
result += f"{i}. {instruction}\n\n"
|
|
150
|
+
return result
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
@mcp.tool(
|
|
154
|
+
name="split_commit",
|
|
155
|
+
description="Splits a large unified diff / commit into smaller semantically-grouped commits.",
|
|
156
|
+
)
|
|
157
|
+
async def split_commit(workspace_root: str = None):
|
|
158
|
+
"""
|
|
159
|
+
Split a large commit into smaller semantic commits.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
workspace_root: Optional path to the workspace root directory.
|
|
163
|
+
If not provided, will attempt to detect from environment variables or current directory.
|
|
164
|
+
"""
|
|
165
|
+
try:
|
|
166
|
+
# Detect the git repository root
|
|
167
|
+
if workspace_root:
|
|
168
|
+
# If provided, use it directly
|
|
169
|
+
detected_root = await find_git_root(workspace_root)
|
|
170
|
+
if detected_root:
|
|
171
|
+
workspace_root = detected_root
|
|
172
|
+
elif not os.path.isdir(workspace_root):
|
|
173
|
+
return f"error: provided workspace_root '{workspace_root}' does not exist or is not a directory."
|
|
174
|
+
# If workspace_root is provided but not a git repo, we'll still try to use it
|
|
175
|
+
# (git commands will fail with a clear error if it's not a git repo)
|
|
176
|
+
else:
|
|
177
|
+
# Try to auto-detect
|
|
178
|
+
workspace_root = await find_git_root()
|
|
179
|
+
if not workspace_root:
|
|
180
|
+
cwd = os.getcwd()
|
|
181
|
+
return (
|
|
182
|
+
f"error: could not detect git repository root.\n"
|
|
183
|
+
f"Current working directory: {cwd}\n"
|
|
184
|
+
f"Please either:\n"
|
|
185
|
+
f" 1. Run this tool from within a git repository, or\n"
|
|
186
|
+
f" 2. Provide the workspace_root parameter with the path to your git repository root."
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# 1) Collect changed files and per-file unified diffs
|
|
190
|
+
# Check staged, unstaged, and untracked files
|
|
191
|
+
staged_proc = await run_subprocess(
|
|
192
|
+
["git", "diff", "--cached", "--name-only"],
|
|
193
|
+
capture_output=True,
|
|
194
|
+
text=True,
|
|
195
|
+
cwd=workspace_root
|
|
196
|
+
)
|
|
197
|
+
unstaged_proc = await run_subprocess(
|
|
198
|
+
["git", "diff", "--name-only"],
|
|
199
|
+
capture_output=True,
|
|
200
|
+
text=True,
|
|
201
|
+
cwd=workspace_root
|
|
202
|
+
)
|
|
203
|
+
untracked_proc = await run_subprocess(
|
|
204
|
+
["git", "ls-files", "--others", "--exclude-standard"],
|
|
205
|
+
capture_output=True,
|
|
206
|
+
text=True,
|
|
207
|
+
cwd=workspace_root
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
# Check if git commands failed (might indicate not a git repo)
|
|
211
|
+
# Note: git commands can return non-zero even in valid repos (e.g., no changes)
|
|
212
|
+
# Only error if we get explicit "not a git repository" messages
|
|
213
|
+
error_messages = []
|
|
214
|
+
if staged_proc.returncode != 0 and staged_proc.stderr:
|
|
215
|
+
error_messages.append(staged_proc.stderr)
|
|
216
|
+
if "not a git repository" in " ".join(error_messages).lower():
|
|
217
|
+
error_msg = f"error: '{workspace_root}' is not a git repository.\n"
|
|
218
|
+
error_msg += f"Git error: {error_messages[0] if error_messages else 'Unknown error'}\n"
|
|
219
|
+
error_msg += "Please provide the correct path to your git repository root."
|
|
220
|
+
return error_msg
|
|
221
|
+
|
|
222
|
+
changed_files = set()
|
|
223
|
+
if staged_proc.returncode == 0:
|
|
224
|
+
changed_files.update(
|
|
225
|
+
f.strip() for f in staged_proc.stdout.splitlines() if f.strip()
|
|
226
|
+
)
|
|
227
|
+
if unstaged_proc.returncode == 0:
|
|
228
|
+
changed_files.update(
|
|
229
|
+
f.strip() for f in unstaged_proc.stdout.splitlines() if f.strip()
|
|
230
|
+
)
|
|
231
|
+
if untracked_proc.returncode == 0:
|
|
232
|
+
changed_files.update(
|
|
233
|
+
f.strip() for f in untracked_proc.stdout.splitlines() if f.strip()
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
if not changed_files:
|
|
237
|
+
return "no changes detected (working tree clean)"
|
|
238
|
+
|
|
239
|
+
file_to_diff: Dict[str, str] = {}
|
|
240
|
+
for path in changed_files:
|
|
241
|
+
# Try staged diff first, then unstaged
|
|
242
|
+
p = await run_subprocess(
|
|
243
|
+
["git", "diff", "--cached", "--", path],
|
|
244
|
+
capture_output=True,
|
|
245
|
+
text=True,
|
|
246
|
+
cwd=workspace_root
|
|
247
|
+
)
|
|
248
|
+
if p.returncode == 0 and p.stdout.strip():
|
|
249
|
+
file_to_diff[path] = p.stdout
|
|
250
|
+
else:
|
|
251
|
+
p = await run_subprocess(
|
|
252
|
+
["git", "diff", "--", path],
|
|
253
|
+
capture_output=True,
|
|
254
|
+
text=True,
|
|
255
|
+
cwd=workspace_root
|
|
256
|
+
)
|
|
257
|
+
if p.returncode == 0 and p.stdout.strip():
|
|
258
|
+
file_to_diff[path] = p.stdout
|
|
259
|
+
else:
|
|
260
|
+
# For untracked/new files, read the entire file content as the "diff"
|
|
261
|
+
# Paths from git are relative to workspace root, so join them
|
|
262
|
+
file_path = os.path.join(workspace_root, path) if not os.path.isabs(path) else path
|
|
263
|
+
try:
|
|
264
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
265
|
+
content = f.read()
|
|
266
|
+
# Format as a new file addition diff
|
|
267
|
+
file_to_diff[path] = (
|
|
268
|
+
f"diff --git a/{path} b/{path}\nnew file mode 100644\n--- /dev/null\n+++ b/{path}\n@@ -0,0 +1,{len(content.splitlines())} @@\n+{chr(10).join('+'+line for line in content.splitlines())}"
|
|
269
|
+
)
|
|
270
|
+
except (FileNotFoundError, UnicodeDecodeError):
|
|
271
|
+
# File might not exist or not be text
|
|
272
|
+
continue
|
|
273
|
+
|
|
274
|
+
if not file_to_diff:
|
|
275
|
+
return "no per-file diffs produced"
|
|
276
|
+
|
|
277
|
+
# 2) Embed each file's diff with Voyage (preconfigured in voyage_service)
|
|
278
|
+
suggestions: List[Tuple[str, str]] = [] # (file_path, suggested_message)
|
|
279
|
+
|
|
280
|
+
# Connect Helix client - supports both local and cloud via environment variables
|
|
281
|
+
use_local = os.getenv("HELIX_LOCAL", "false").lower() == "true"
|
|
282
|
+
|
|
283
|
+
if use_local:
|
|
284
|
+
db = helix.Client(local=True)
|
|
285
|
+
else:
|
|
286
|
+
# Use cloud deployment from helix.toml (production.fly)
|
|
287
|
+
# Helix SDK automatically reads helix.toml and uses the configured deployment
|
|
288
|
+
api_endpoint = os.getenv("HELIX_API_ENDPOINT", "")
|
|
289
|
+
if not HELIX_API_ENDPOINT:
|
|
290
|
+
return "error: HELIX API ENDPOINT is not set"
|
|
291
|
+
db = helix.Client(local=False, api_endpoint=api_endpoint)
|
|
292
|
+
|
|
293
|
+
for file_path, diff_text in file_to_diff.items():
|
|
294
|
+
# 2a) Embed with timeout (5 seconds)
|
|
295
|
+
try:
|
|
296
|
+
vec_batch = await asyncio.wait_for(
|
|
297
|
+
asyncio.to_thread(embed_code, diff_text, file_path=file_path),
|
|
298
|
+
timeout=5
|
|
299
|
+
)
|
|
300
|
+
except asyncio.TimeoutError:
|
|
301
|
+
return f"error: embedding timed out for {file_path} (expected to always work)"
|
|
302
|
+
except Exception as embed_exc:
|
|
303
|
+
return f"error: embedding failed for {file_path}: {str(embed_exc)} (expected to always work)"
|
|
304
|
+
|
|
305
|
+
if not vec_batch:
|
|
306
|
+
return f"error: embedding returned empty result for {file_path}"
|
|
307
|
+
vec = vec_batch[0]
|
|
308
|
+
|
|
309
|
+
try:
|
|
310
|
+
# 3) ANN search for similar diffs; k kept small to keep it snappy
|
|
311
|
+
# Add timeout to database query (5 seconds)
|
|
312
|
+
res = await asyncio.wait_for(
|
|
313
|
+
asyncio.to_thread(db.query, "getSimilarDiffsByVector", {"vec": vec, "k": 8}),
|
|
314
|
+
timeout=5
|
|
315
|
+
)
|
|
316
|
+
except asyncio.TimeoutError:
|
|
317
|
+
# If database query times out, continue without examples
|
|
318
|
+
res = []
|
|
319
|
+
except Exception as db_exc:
|
|
320
|
+
# If database query fails, continue without examples
|
|
321
|
+
res = []
|
|
322
|
+
# Result rows include commit_message, summary, file_path
|
|
323
|
+
examples = []
|
|
324
|
+
if isinstance(res, list):
|
|
325
|
+
for row in res[:5]:
|
|
326
|
+
if isinstance(row, dict):
|
|
327
|
+
ex_msg = row.get("commit_message") or ""
|
|
328
|
+
ex_sum = row.get("summary") or ""
|
|
329
|
+
ex_path = row.get("file_path") or ""
|
|
330
|
+
if ex_msg or ex_sum:
|
|
331
|
+
examples.append(
|
|
332
|
+
f"file:{ex_path}\nmessage:{ex_msg}\nsummary:{ex_sum}"
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
example_block = "\n\n".join(examples) if examples else ""
|
|
336
|
+
|
|
337
|
+
# Helper function to detect and reject generic messages
|
|
338
|
+
def is_generic_message(msg: str) -> bool:
|
|
339
|
+
"""Check if a commit message is too generic."""
|
|
340
|
+
if not msg:
|
|
341
|
+
return True
|
|
342
|
+
msg_lower = msg.lower().strip()
|
|
343
|
+
generic_patterns = [
|
|
344
|
+
"update ",
|
|
345
|
+
"fix bug",
|
|
346
|
+
"fix issue",
|
|
347
|
+
"refactor code",
|
|
348
|
+
"changes",
|
|
349
|
+
"wip",
|
|
350
|
+
"misc",
|
|
351
|
+
"cleanup",
|
|
352
|
+
"minor",
|
|
353
|
+
"temporary",
|
|
354
|
+
]
|
|
355
|
+
# Check if message starts with generic patterns
|
|
356
|
+
for pattern in generic_patterns:
|
|
357
|
+
if msg_lower.startswith(pattern):
|
|
358
|
+
return True
|
|
359
|
+
# Check if message is just a filename (e.g., "Update app.py")
|
|
360
|
+
if msg_lower.startswith("update ") and len(msg_lower.split()) <= 3:
|
|
361
|
+
return True
|
|
362
|
+
return False
|
|
363
|
+
|
|
364
|
+
system_prompt = (
|
|
365
|
+
"""You are a senior engineer writing conventional commit messages. Analyze the diff carefully to understand what actually changed.
|
|
366
|
+
|
|
367
|
+
CRITICAL REQUIREMENTS:
|
|
368
|
+
- Write ONLY a single, concise commit title (under 50 characters preferred)
|
|
369
|
+
- Use conventional commit format: type(scope): description
|
|
370
|
+
- Common types: feat, fix, refactor, docs, style, test, chore, perf, build, ci
|
|
371
|
+
- No issue references, no trailing period
|
|
372
|
+
- Be SPECIFIC about what changed - analyze the actual code changes in the diff
|
|
373
|
+
- Output ONLY the commit message title, nothing else (no explanations, no prefixes, no quotes)
|
|
374
|
+
|
|
375
|
+
STRICT PROHIBITIONS - NEVER USE THESE PATTERNS:
|
|
376
|
+
- "Update [filename]" (e.g., "Update app.py") - ABSOLUTELY FORBIDDEN
|
|
377
|
+
- "Fix bug" - TOO GENERIC
|
|
378
|
+
- "Refactor code" - TOO GENERIC
|
|
379
|
+
- "Changes" - TOO GENERIC
|
|
380
|
+
- "WIP" - TOO GENERIC
|
|
381
|
+
- Any message that doesn't describe what actually changed
|
|
382
|
+
|
|
383
|
+
GUIDELINES:
|
|
384
|
+
- Analyze the actual code changes in the diff to determine the type and description
|
|
385
|
+
- For new features: use "feat:" - describe what capability was added (e.g., "feat(auth): add JWT token validation")
|
|
386
|
+
- For bug fixes: use "fix:" - describe what was broken and fixed (e.g., "fix(api): handle null response in user endpoint")
|
|
387
|
+
- For refactoring: use "refactor:" - describe what was improved without changing behavior (e.g., "refactor(utils): extract common validation logic")
|
|
388
|
+
- For configuration/build: use "chore:" or "build:" - describe what was configured (e.g., "chore(deps): update dependencies")
|
|
389
|
+
- For documentation: use "docs:" - describe what documentation was added/changed (e.g., "docs(api): add endpoint documentation")
|
|
390
|
+
- Include the affected component/file in scope if it adds clarity
|
|
391
|
+
|
|
392
|
+
EXAMPLES OF GOOD MESSAGES:
|
|
393
|
+
- "feat(auth): add JWT token validation"
|
|
394
|
+
- "fix(api): handle null response in user endpoint"
|
|
395
|
+
- "refactor(utils): extract common validation logic"
|
|
396
|
+
- "chore(deps): update numpy to 2.0.0"
|
|
397
|
+
- "docs(readme): add installation instructions"
|
|
398
|
+
|
|
399
|
+
EXAMPLES OF BAD MESSAGES (DO NOT USE):
|
|
400
|
+
- "Update app.py" ❌
|
|
401
|
+
- "Fix bug" ❌
|
|
402
|
+
- "Refactor code" ❌
|
|
403
|
+
- "Changes" ❌
|
|
404
|
+
|
|
405
|
+
Remember: Your output must be SPECIFIC and describe WHAT changed, not generic file operations."""
|
|
406
|
+
)
|
|
407
|
+
user_prompt = (
|
|
408
|
+
"Generate a commit message for this diff. Consider similar past changes if given.\n\n"
|
|
409
|
+
f"DIFF (truncated if long):\n{diff_text}\n\n"
|
|
410
|
+
f"SIMILAR EXAMPLES:\n{example_block}\n\n"
|
|
411
|
+
"Output ONLY the commit message title, nothing else."
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
# Call Cerebras inference - should always work
|
|
415
|
+
try:
|
|
416
|
+
raw_response = await asyncio.wait_for(
|
|
417
|
+
complete(user_prompt, system=system_prompt, max_tokens=100),
|
|
418
|
+
timeout=30.0
|
|
419
|
+
)
|
|
420
|
+
except asyncio.TimeoutError:
|
|
421
|
+
return f"error: Cerebras inference timed out for {file_path} (expected to always work)"
|
|
422
|
+
except Exception as llm_exc:
|
|
423
|
+
return f"error: Cerebras inference failed for {file_path}: {str(llm_exc)} (expected to always work)"
|
|
424
|
+
|
|
425
|
+
if not raw_response:
|
|
426
|
+
return f"error: Cerebras inference returned empty response for {file_path}"
|
|
427
|
+
|
|
428
|
+
commit_message = raw_response.strip().splitlines()[0].strip()
|
|
429
|
+
|
|
430
|
+
# Remove quotes if present
|
|
431
|
+
if commit_message.startswith('"') and commit_message.endswith('"'):
|
|
432
|
+
commit_message = commit_message[1:-1]
|
|
433
|
+
if commit_message.startswith("'") and commit_message.endswith("'"):
|
|
434
|
+
commit_message = commit_message[1:-1]
|
|
435
|
+
|
|
436
|
+
# Validate the message is not generic - fail if it is
|
|
437
|
+
if not commit_message or is_generic_message(commit_message):
|
|
438
|
+
return (
|
|
439
|
+
f"error: Cerebras inference generated generic message '{commit_message}' for {file_path}. "
|
|
440
|
+
f"Please improve the system prompt or check the inference output."
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
suggestions.append((file_path, commit_message))
|
|
444
|
+
|
|
445
|
+
if not suggestions:
|
|
446
|
+
return "no commit suggestions could be generated"
|
|
447
|
+
|
|
448
|
+
# 4) Commit each file separately with its suggested message
|
|
449
|
+
for file_path, message in suggestions:
|
|
450
|
+
try:
|
|
451
|
+
await run_subprocess(
|
|
452
|
+
["git", "add", "--", file_path],
|
|
453
|
+
check=True,
|
|
454
|
+
cwd=workspace_root
|
|
455
|
+
)
|
|
456
|
+
await run_subprocess(
|
|
457
|
+
["git", "commit", "-m", message],
|
|
458
|
+
check=True,
|
|
459
|
+
cwd=workspace_root
|
|
460
|
+
)
|
|
461
|
+
except subprocess.CalledProcessError as e:
|
|
462
|
+
return (
|
|
463
|
+
f"Failed to add or commit '{file_path}' with message '{message}'.\n"
|
|
464
|
+
f"Git error: {e}\n"
|
|
465
|
+
"Ensure the file exists, is not conflicted, and git is functioning properly."
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
# 5) Return a compact report of what was committed
|
|
469
|
+
report = {"commits": [{"file": f, "message": m} for f, m in suggestions]}
|
|
470
|
+
return json.dumps(report, indent=2)
|
|
471
|
+
|
|
472
|
+
except Exception as e:
|
|
473
|
+
return (
|
|
474
|
+
f"failed to split commit: {str(e)}\n"
|
|
475
|
+
f"Exception type: {type(e).__name__}\n"
|
|
476
|
+
"Ensure git is available and HelixDB is reachable on localhost:6969."
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
@mcp.tool
|
|
481
|
+
async def resolve_conflict():
|
|
482
|
+
return "resolve conflict ran successfully"
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
def main():
|
|
486
|
+
"""Entry point for the glide-mcp package."""
|
|
487
|
+
mcp.run(transport="stdio")
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
if __name__ == "__main__":
|
|
491
|
+
# mcp.run(transport="streamable-http", host="127.0.0.1", port=8000)
|
|
492
|
+
main()
|