gemini-cli-proxy 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gemini_cli_proxy/__init__.py +9 -0
- gemini_cli_proxy/cli.py +84 -0
- gemini_cli_proxy/config.py +34 -0
- gemini_cli_proxy/gemini_client.py +175 -0
- gemini_cli_proxy/models.py +97 -0
- gemini_cli_proxy/openai_adapter.py +157 -0
- gemini_cli_proxy/server.py +187 -0
- gemini_cli_proxy-1.0.3.dist-info/METADATA +153 -0
- gemini_cli_proxy-1.0.3.dist-info/RECORD +12 -0
- gemini_cli_proxy-1.0.3.dist-info/WHEEL +4 -0
- gemini_cli_proxy-1.0.3.dist-info/entry_points.txt +2 -0
- gemini_cli_proxy-1.0.3.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,9 @@
|
|
1
|
+
"""
|
2
|
+
Gemini CLI Proxy - OpenAI-compatible API wrapper for Gemini CLI
|
3
|
+
|
4
|
+
An OpenAI-compatible HTTP API wrapper for Google Gemini command line tool
|
5
|
+
"""
|
6
|
+
|
7
|
+
__version__ = "1.0.0"
|
8
|
+
__author__ = "nettee"
|
9
|
+
__description__ = "OpenAI-compatible API wrapper for Gemini CLI"
|
gemini_cli_proxy/cli.py
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
"""
|
2
|
+
CLI entry module
|
3
|
+
|
4
|
+
Uses Click to handle command line arguments and application startup
|
5
|
+
"""
|
6
|
+
|
7
|
+
import click
|
8
|
+
import uvicorn
|
9
|
+
|
10
|
+
|
11
|
+
@click.command()
|
12
|
+
@click.option(
|
13
|
+
"--host",
|
14
|
+
default="127.0.0.1",
|
15
|
+
help="Server host address"
|
16
|
+
)
|
17
|
+
@click.option(
|
18
|
+
"--port",
|
19
|
+
default=8765,
|
20
|
+
type=int,
|
21
|
+
help="Server port"
|
22
|
+
)
|
23
|
+
@click.option(
|
24
|
+
"--log-level",
|
25
|
+
default="info",
|
26
|
+
type=click.Choice(["debug", "info", "warning", "error", "critical"]),
|
27
|
+
help="Log level"
|
28
|
+
)
|
29
|
+
@click.option(
|
30
|
+
"--rate-limit",
|
31
|
+
default=60,
|
32
|
+
type=int,
|
33
|
+
help="Maximum requests per minute"
|
34
|
+
)
|
35
|
+
@click.option(
|
36
|
+
"--max-concurrency",
|
37
|
+
default=4,
|
38
|
+
type=int,
|
39
|
+
help="Maximum concurrent subprocesses"
|
40
|
+
)
|
41
|
+
@click.option(
|
42
|
+
"--timeout",
|
43
|
+
default=30.0,
|
44
|
+
type=float,
|
45
|
+
help="Gemini CLI command timeout in seconds"
|
46
|
+
)
|
47
|
+
@click.option(
|
48
|
+
"--debug",
|
49
|
+
is_flag=True,
|
50
|
+
help="Enable debug mode"
|
51
|
+
)
|
52
|
+
def main(
|
53
|
+
host: str,
|
54
|
+
port: int,
|
55
|
+
log_level: str,
|
56
|
+
rate_limit: int,
|
57
|
+
max_concurrency: int,
|
58
|
+
timeout: float,
|
59
|
+
debug: bool
|
60
|
+
):
|
61
|
+
"""Start Gemini CLI Proxy server"""
|
62
|
+
|
63
|
+
# Set configuration
|
64
|
+
from .config import config
|
65
|
+
config.host = host
|
66
|
+
config.port = port
|
67
|
+
config.log_level = log_level
|
68
|
+
config.rate_limit = rate_limit
|
69
|
+
config.max_concurrency = max_concurrency
|
70
|
+
config.timeout = timeout
|
71
|
+
config.debug = debug
|
72
|
+
|
73
|
+
# Start server
|
74
|
+
uvicorn.run(
|
75
|
+
"gemini_cli_proxy.server:app",
|
76
|
+
host=host,
|
77
|
+
port=port,
|
78
|
+
log_level=log_level,
|
79
|
+
reload=debug
|
80
|
+
)
|
81
|
+
|
82
|
+
|
83
|
+
if __name__ == "__main__":
|
84
|
+
main()
|
@@ -0,0 +1,34 @@
|
|
1
|
+
"""
|
2
|
+
Configuration management module
|
3
|
+
|
4
|
+
Manages application configuration
|
5
|
+
"""
|
6
|
+
|
7
|
+
|
8
|
+
class Config:
|
9
|
+
"""Application configuration class"""
|
10
|
+
|
11
|
+
def __init__(self):
|
12
|
+
# Server configuration
|
13
|
+
self.host: str = "127.0.0.1"
|
14
|
+
self.port: int = 8765
|
15
|
+
self.log_level: str = "info"
|
16
|
+
self.debug: bool = False
|
17
|
+
|
18
|
+
# Gemini CLI configuration
|
19
|
+
self.gemini_command: str = "gemini" # Gemini CLI command path
|
20
|
+
self.timeout: float = 30.0
|
21
|
+
|
22
|
+
# Limit configuration
|
23
|
+
self.rate_limit: int = 60 # Requests per minute
|
24
|
+
self.max_concurrency: int = 4 # Maximum concurrent subprocesses
|
25
|
+
|
26
|
+
# Supported models list
|
27
|
+
self.supported_models: list = [
|
28
|
+
"gemini-2.5-pro",
|
29
|
+
"gemini-2.5-flash",
|
30
|
+
]
|
31
|
+
|
32
|
+
|
33
|
+
# Global configuration instance
|
34
|
+
config = Config()
|
@@ -0,0 +1,175 @@
|
|
1
|
+
"""
|
2
|
+
Gemini client module
|
3
|
+
|
4
|
+
Handles interaction with Gemini CLI tool
|
5
|
+
"""
|
6
|
+
|
7
|
+
import asyncio
|
8
|
+
import logging
|
9
|
+
from typing import List, Optional, AsyncGenerator
|
10
|
+
from .models import ChatMessage
|
11
|
+
from .config import config
|
12
|
+
|
13
|
+
logger = logging.getLogger(__name__)
|
14
|
+
|
15
|
+
|
16
|
+
class GeminiClient:
|
17
|
+
"""Gemini CLI client"""
|
18
|
+
|
19
|
+
def __init__(self):
|
20
|
+
self.semaphore = asyncio.Semaphore(config.max_concurrency)
|
21
|
+
|
22
|
+
async def chat_completion(
|
23
|
+
self,
|
24
|
+
messages: List[ChatMessage],
|
25
|
+
temperature: Optional[float] = None,
|
26
|
+
max_tokens: Optional[int] = None,
|
27
|
+
**kwargs
|
28
|
+
) -> str:
|
29
|
+
"""
|
30
|
+
Execute chat completion request
|
31
|
+
|
32
|
+
Args:
|
33
|
+
messages: List of chat messages
|
34
|
+
temperature: Temperature parameter
|
35
|
+
max_tokens: Maximum number of tokens
|
36
|
+
**kwargs: Other parameters
|
37
|
+
|
38
|
+
Returns:
|
39
|
+
Response text from Gemini CLI
|
40
|
+
|
41
|
+
Raises:
|
42
|
+
asyncio.TimeoutError: Timeout error
|
43
|
+
subprocess.CalledProcessError: Command execution error
|
44
|
+
"""
|
45
|
+
async with self.semaphore:
|
46
|
+
return await self._execute_gemini_command(
|
47
|
+
messages, temperature, max_tokens, **kwargs
|
48
|
+
)
|
49
|
+
|
50
|
+
async def chat_completion_stream(
|
51
|
+
self,
|
52
|
+
messages: List[ChatMessage],
|
53
|
+
temperature: Optional[float] = None,
|
54
|
+
max_tokens: Optional[int] = None,
|
55
|
+
**kwargs
|
56
|
+
) -> AsyncGenerator[str, None]:
|
57
|
+
"""
|
58
|
+
Execute streaming chat completion request (fake streaming implementation)
|
59
|
+
|
60
|
+
Args:
|
61
|
+
messages: List of chat messages
|
62
|
+
temperature: Temperature parameter
|
63
|
+
max_tokens: Maximum number of tokens
|
64
|
+
**kwargs: Other parameters
|
65
|
+
|
66
|
+
Yields:
|
67
|
+
Response text chunks split by lines
|
68
|
+
"""
|
69
|
+
# First get complete response
|
70
|
+
full_response = await self.chat_completion(
|
71
|
+
messages, temperature, max_tokens, **kwargs
|
72
|
+
)
|
73
|
+
|
74
|
+
# Split by lines and yield one by one
|
75
|
+
lines = full_response.split('\n')
|
76
|
+
for line in lines:
|
77
|
+
if line.strip(): # Skip empty lines
|
78
|
+
yield line.strip()
|
79
|
+
# Add small delay to simulate streaming effect
|
80
|
+
await asyncio.sleep(0.05)
|
81
|
+
|
82
|
+
async def _execute_gemini_command(
|
83
|
+
self,
|
84
|
+
messages: List[ChatMessage],
|
85
|
+
temperature: Optional[float] = None,
|
86
|
+
max_tokens: Optional[int] = None,
|
87
|
+
**kwargs
|
88
|
+
) -> str:
|
89
|
+
"""
|
90
|
+
Execute Gemini CLI command
|
91
|
+
|
92
|
+
Args:
|
93
|
+
messages: List of chat messages
|
94
|
+
temperature: Temperature parameter
|
95
|
+
max_tokens: Maximum number of tokens
|
96
|
+
**kwargs: Other parameters
|
97
|
+
|
98
|
+
Returns:
|
99
|
+
Command output result
|
100
|
+
"""
|
101
|
+
# Build command arguments
|
102
|
+
cmd_args = [config.gemini_command]
|
103
|
+
|
104
|
+
# Build prompt text (simplified implementation: combine all messages)
|
105
|
+
prompt = self._build_prompt(messages)
|
106
|
+
|
107
|
+
# Use --prompt parameter to pass prompt text
|
108
|
+
cmd_args.extend(["--prompt", prompt])
|
109
|
+
|
110
|
+
# Note: Real gemini CLI doesn't support temperature and max_tokens parameters
|
111
|
+
# We ignore these parameters here but log them
|
112
|
+
if temperature is not None:
|
113
|
+
logger.debug(f"Ignoring temperature parameter: {temperature} (gemini CLI doesn't support)")
|
114
|
+
if max_tokens is not None:
|
115
|
+
logger.debug(f"Ignoring max_tokens parameter: {max_tokens} (gemini CLI doesn't support)")
|
116
|
+
|
117
|
+
logger.debug(f"Executing command: {' '.join(cmd_args)}")
|
118
|
+
|
119
|
+
try:
|
120
|
+
# Use asyncio to execute subprocess
|
121
|
+
process = await asyncio.create_subprocess_exec(
|
122
|
+
*cmd_args,
|
123
|
+
stdout=asyncio.subprocess.PIPE,
|
124
|
+
stderr=asyncio.subprocess.PIPE
|
125
|
+
)
|
126
|
+
|
127
|
+
# Wait for command execution to complete with timeout
|
128
|
+
stdout, stderr = await asyncio.wait_for(
|
129
|
+
process.communicate(),
|
130
|
+
timeout=config.timeout
|
131
|
+
)
|
132
|
+
|
133
|
+
# Check return code
|
134
|
+
if process.returncode != 0:
|
135
|
+
error_msg = stderr.decode('utf-8').strip()
|
136
|
+
raise RuntimeError(f"Gemini CLI execution failed (exit code: {process.returncode}): {error_msg}")
|
137
|
+
|
138
|
+
# Return standard output
|
139
|
+
result = stdout.decode('utf-8').strip()
|
140
|
+
logger.debug(f"Command executed successfully, output length: {len(result)}")
|
141
|
+
return result
|
142
|
+
|
143
|
+
except asyncio.TimeoutError:
|
144
|
+
logger.error(f"Gemini CLI command timeout ({config.timeout}s)")
|
145
|
+
raise
|
146
|
+
except Exception as e:
|
147
|
+
logger.error(f"Error executing Gemini CLI command: {e}")
|
148
|
+
raise
|
149
|
+
|
150
|
+
def _build_prompt(self, messages: List[ChatMessage]) -> str:
|
151
|
+
"""
|
152
|
+
Build prompt text
|
153
|
+
|
154
|
+
Args:
|
155
|
+
messages: List of chat messages
|
156
|
+
|
157
|
+
Returns:
|
158
|
+
Formatted prompt text
|
159
|
+
"""
|
160
|
+
# Simplified implementation: format all messages by role
|
161
|
+
prompt_parts = []
|
162
|
+
|
163
|
+
for message in messages:
|
164
|
+
if message.role == "system":
|
165
|
+
prompt_parts.append(f"System: {message.content}")
|
166
|
+
elif message.role == "user":
|
167
|
+
prompt_parts.append(f"User: {message.content}")
|
168
|
+
elif message.role == "assistant":
|
169
|
+
prompt_parts.append(f"Assistant: {message.content}")
|
170
|
+
|
171
|
+
return "\n".join(prompt_parts)
|
172
|
+
|
173
|
+
|
174
|
+
# Global client instance
|
175
|
+
gemini_client = GeminiClient()
|
@@ -0,0 +1,97 @@
|
|
1
|
+
"""
|
2
|
+
Data model definition module
|
3
|
+
|
4
|
+
Uses Pydantic to define OpenAI format request and response models
|
5
|
+
"""
|
6
|
+
|
7
|
+
from typing import List, Optional, Union, Dict, Any, Literal
|
8
|
+
from pydantic import BaseModel, Field
|
9
|
+
import time
|
10
|
+
import uuid
|
11
|
+
|
12
|
+
|
13
|
+
class ChatMessage(BaseModel):
|
14
|
+
"""Chat message model"""
|
15
|
+
role: Literal["system", "user", "assistant"]
|
16
|
+
content: str
|
17
|
+
|
18
|
+
|
19
|
+
class ChatCompletionRequest(BaseModel):
|
20
|
+
"""Chat completion request model"""
|
21
|
+
model: str
|
22
|
+
messages: List[ChatMessage]
|
23
|
+
temperature: Optional[float] = Field(default=0.7, ge=0.0, le=2.0)
|
24
|
+
max_tokens: Optional[int] = Field(default=None, gt=0)
|
25
|
+
stream: Optional[bool] = False
|
26
|
+
top_p: Optional[float] = Field(default=1.0, ge=0.0, le=1.0)
|
27
|
+
frequency_penalty: Optional[float] = Field(default=0.0, ge=-2.0, le=2.0)
|
28
|
+
presence_penalty: Optional[float] = Field(default=0.0, ge=-2.0, le=2.0)
|
29
|
+
stop: Optional[Union[str, List[str]]] = None
|
30
|
+
user: Optional[str] = None
|
31
|
+
|
32
|
+
|
33
|
+
class ChatCompletionChoice(BaseModel):
|
34
|
+
"""Chat completion choice model"""
|
35
|
+
index: int
|
36
|
+
message: ChatMessage
|
37
|
+
finish_reason: Literal["stop", "length", "content_filter"] = "stop"
|
38
|
+
|
39
|
+
|
40
|
+
class ChatCompletionResponse(BaseModel):
|
41
|
+
"""Chat completion response model"""
|
42
|
+
id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4().hex[:12]}")
|
43
|
+
object: str = "chat.completion"
|
44
|
+
created: int = Field(default_factory=lambda: int(time.time()))
|
45
|
+
model: str
|
46
|
+
choices: List[ChatCompletionChoice]
|
47
|
+
# Note: According to design document, usage field is omitted since gemini CLI doesn't provide token usage info
|
48
|
+
|
49
|
+
|
50
|
+
class ChatCompletionStreamChoice(BaseModel):
|
51
|
+
"""Streaming chat completion choice model"""
|
52
|
+
index: int
|
53
|
+
delta: Dict[str, Any]
|
54
|
+
finish_reason: Optional[Literal["stop", "length", "content_filter"]] = None
|
55
|
+
|
56
|
+
|
57
|
+
class ChatCompletionStreamResponse(BaseModel):
|
58
|
+
"""Streaming chat completion response model"""
|
59
|
+
id: str
|
60
|
+
object: str = "chat.completion.chunk"
|
61
|
+
created: int
|
62
|
+
model: str
|
63
|
+
choices: List[ChatCompletionStreamChoice]
|
64
|
+
|
65
|
+
|
66
|
+
class ErrorDetail(BaseModel):
|
67
|
+
"""Error detail model"""
|
68
|
+
message: str
|
69
|
+
type: str
|
70
|
+
param: Optional[str] = None
|
71
|
+
code: Optional[str] = None
|
72
|
+
|
73
|
+
|
74
|
+
class ErrorResponse(BaseModel):
|
75
|
+
"""Error response model"""
|
76
|
+
error: ErrorDetail
|
77
|
+
|
78
|
+
|
79
|
+
class ModelInfo(BaseModel):
|
80
|
+
"""Model information model"""
|
81
|
+
id: str
|
82
|
+
object: str = "model"
|
83
|
+
created: int = Field(default_factory=lambda: int(time.time()))
|
84
|
+
owned_by: str = "gemini-cli-proxy"
|
85
|
+
|
86
|
+
|
87
|
+
class ModelsResponse(BaseModel):
|
88
|
+
"""Models list response model"""
|
89
|
+
object: str = "list"
|
90
|
+
data: List[ModelInfo]
|
91
|
+
|
92
|
+
|
93
|
+
class HealthResponse(BaseModel):
|
94
|
+
"""Health check response model"""
|
95
|
+
status: str = "ok"
|
96
|
+
version: str
|
97
|
+
timestamp: int = Field(default_factory=lambda: int(time.time()))
|
@@ -0,0 +1,157 @@
|
|
1
|
+
"""
|
2
|
+
OpenAI adapter module
|
3
|
+
|
4
|
+
Handles format conversion and compatibility
|
5
|
+
"""
|
6
|
+
|
7
|
+
import time
|
8
|
+
import uuid
|
9
|
+
import logging
|
10
|
+
from typing import AsyncGenerator
|
11
|
+
from fastapi.responses import StreamingResponse
|
12
|
+
|
13
|
+
from .models import (
|
14
|
+
ChatCompletionRequest,
|
15
|
+
ChatCompletionResponse,
|
16
|
+
ChatCompletionChoice,
|
17
|
+
ChatCompletionStreamResponse,
|
18
|
+
ChatCompletionStreamChoice,
|
19
|
+
ChatMessage
|
20
|
+
)
|
21
|
+
from .gemini_client import gemini_client
|
22
|
+
|
23
|
+
logger = logging.getLogger(__name__)
|
24
|
+
|
25
|
+
|
26
|
+
class OpenAIAdapter:
|
27
|
+
"""OpenAI format adapter"""
|
28
|
+
|
29
|
+
async def chat_completion(self, request: ChatCompletionRequest) -> ChatCompletionResponse:
|
30
|
+
"""
|
31
|
+
Handle chat completion request (non-streaming)
|
32
|
+
|
33
|
+
Args:
|
34
|
+
request: OpenAI format chat completion request
|
35
|
+
|
36
|
+
Returns:
|
37
|
+
OpenAI format chat completion response
|
38
|
+
"""
|
39
|
+
logger.info(f"Processing chat completion request, model: {request.model}, messages: {len(request.messages)}")
|
40
|
+
|
41
|
+
try:
|
42
|
+
# Call Gemini CLI
|
43
|
+
response_text = await gemini_client.chat_completion(
|
44
|
+
messages=request.messages,
|
45
|
+
temperature=request.temperature,
|
46
|
+
max_tokens=request.max_tokens
|
47
|
+
)
|
48
|
+
|
49
|
+
# Build OpenAI format response
|
50
|
+
response = ChatCompletionResponse(
|
51
|
+
model=request.model,
|
52
|
+
choices=[
|
53
|
+
ChatCompletionChoice(
|
54
|
+
index=0,
|
55
|
+
message=ChatMessage(
|
56
|
+
role="assistant",
|
57
|
+
content=response_text
|
58
|
+
),
|
59
|
+
finish_reason="stop"
|
60
|
+
)
|
61
|
+
]
|
62
|
+
)
|
63
|
+
|
64
|
+
logger.info(f"Chat completion request processed successfully, response length: {len(response_text)}")
|
65
|
+
return response
|
66
|
+
|
67
|
+
except Exception as e:
|
68
|
+
logger.error(f"Error processing chat completion request: {e}")
|
69
|
+
raise
|
70
|
+
|
71
|
+
async def chat_completion_stream(self, request: ChatCompletionRequest) -> StreamingResponse:
|
72
|
+
"""
|
73
|
+
Handle streaming chat completion request
|
74
|
+
|
75
|
+
Args:
|
76
|
+
request: OpenAI format chat completion request
|
77
|
+
|
78
|
+
Returns:
|
79
|
+
Streaming response
|
80
|
+
"""
|
81
|
+
logger.info(f"Processing streaming chat completion request, model: {request.model}, messages: {len(request.messages)}")
|
82
|
+
|
83
|
+
async def generate_stream():
|
84
|
+
"""Generate streaming response data"""
|
85
|
+
completion_id = f"chatcmpl-{uuid.uuid4().hex[:12]}"
|
86
|
+
created_time = int(time.time())
|
87
|
+
|
88
|
+
try:
|
89
|
+
# Get streaming data generator
|
90
|
+
stream_generator = gemini_client.chat_completion_stream(
|
91
|
+
messages=request.messages,
|
92
|
+
temperature=request.temperature,
|
93
|
+
max_tokens=request.max_tokens
|
94
|
+
)
|
95
|
+
|
96
|
+
# Send data chunks one by one
|
97
|
+
async for chunk in stream_generator:
|
98
|
+
stream_response = ChatCompletionStreamResponse(
|
99
|
+
id=completion_id,
|
100
|
+
created=created_time,
|
101
|
+
model=request.model,
|
102
|
+
choices=[
|
103
|
+
ChatCompletionStreamChoice(
|
104
|
+
index=0,
|
105
|
+
delta={"content": chunk},
|
106
|
+
finish_reason=None
|
107
|
+
)
|
108
|
+
]
|
109
|
+
)
|
110
|
+
|
111
|
+
# Send data chunk
|
112
|
+
yield f"data: {stream_response.model_dump_json()}\n\n"
|
113
|
+
|
114
|
+
# Send end marker
|
115
|
+
final_response = ChatCompletionStreamResponse(
|
116
|
+
id=completion_id,
|
117
|
+
created=created_time,
|
118
|
+
model=request.model,
|
119
|
+
choices=[
|
120
|
+
ChatCompletionStreamChoice(
|
121
|
+
index=0,
|
122
|
+
delta={},
|
123
|
+
finish_reason="stop"
|
124
|
+
)
|
125
|
+
]
|
126
|
+
)
|
127
|
+
yield f"data: {final_response.model_dump_json()}\n\n"
|
128
|
+
yield "data: [DONE]\n\n"
|
129
|
+
|
130
|
+
logger.info("Streaming chat completion request processed successfully")
|
131
|
+
|
132
|
+
except Exception as e:
|
133
|
+
logger.error(f"Error processing streaming chat completion request: {e}")
|
134
|
+
# Send error information
|
135
|
+
error_response = {
|
136
|
+
"error": {
|
137
|
+
"message": str(e),
|
138
|
+
"type": "internal_error"
|
139
|
+
}
|
140
|
+
}
|
141
|
+
yield f"data: {error_response}\n\n"
|
142
|
+
yield "data: [DONE]\n\n"
|
143
|
+
|
144
|
+
return StreamingResponse(
|
145
|
+
generate_stream(),
|
146
|
+
media_type="text/plain",
|
147
|
+
headers={
|
148
|
+
"Cache-Control": "no-cache",
|
149
|
+
"Connection": "keep-alive",
|
150
|
+
"Access-Control-Allow-Origin": "*",
|
151
|
+
"Access-Control-Allow-Headers": "*",
|
152
|
+
}
|
153
|
+
)
|
154
|
+
|
155
|
+
|
156
|
+
# Global adapter instance
|
157
|
+
openai_adapter = OpenAIAdapter()
|
@@ -0,0 +1,187 @@
|
|
1
|
+
"""
|
2
|
+
FastAPI server module
|
3
|
+
|
4
|
+
Implements HTTP service and API endpoints
|
5
|
+
"""
|
6
|
+
|
7
|
+
import asyncio
|
8
|
+
import logging
|
9
|
+
import traceback
|
10
|
+
from contextlib import asynccontextmanager
|
11
|
+
|
12
|
+
from fastapi import FastAPI, HTTPException, Request
|
13
|
+
from fastapi.middleware.cors import CORSMiddleware
|
14
|
+
from fastapi.responses import JSONResponse
|
15
|
+
from slowapi import Limiter, _rate_limit_exceeded_handler
|
16
|
+
from slowapi.util import get_remote_address
|
17
|
+
from slowapi.errors import RateLimitExceeded
|
18
|
+
|
19
|
+
from . import __version__
|
20
|
+
from .config import config
|
21
|
+
from .models import (
|
22
|
+
ChatCompletionRequest,
|
23
|
+
ChatCompletionResponse,
|
24
|
+
ErrorResponse,
|
25
|
+
ErrorDetail,
|
26
|
+
HealthResponse,
|
27
|
+
ModelsResponse,
|
28
|
+
ModelInfo
|
29
|
+
)
|
30
|
+
from .openai_adapter import openai_adapter
|
31
|
+
|
32
|
+
# Configure logging
|
33
|
+
logging.basicConfig(
|
34
|
+
level=getattr(logging, config.log_level.upper()),
|
35
|
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
36
|
+
)
|
37
|
+
logger = logging.getLogger(__name__)
|
38
|
+
|
39
|
+
# Create rate limiter
|
40
|
+
limiter = Limiter(key_func=get_remote_address)
|
41
|
+
|
42
|
+
|
43
|
+
@asynccontextmanager
|
44
|
+
async def lifespan(app: FastAPI):
|
45
|
+
"""Application lifecycle management"""
|
46
|
+
logger.info(f"Starting Gemini CLI Proxy v{__version__}")
|
47
|
+
logger.info(f"Configuration: port={config.port}, rate_limit={config.rate_limit}/min, concurrency={config.max_concurrency}")
|
48
|
+
yield
|
49
|
+
logger.info("Shutting down Gemini CLI Proxy")
|
50
|
+
|
51
|
+
|
52
|
+
# Create FastAPI application
|
53
|
+
app = FastAPI(
|
54
|
+
title="Gemini CLI Proxy",
|
55
|
+
description="OpenAI-compatible API wrapper for Gemini CLI",
|
56
|
+
version=__version__,
|
57
|
+
lifespan=lifespan
|
58
|
+
)
|
59
|
+
|
60
|
+
# Add CORS middleware
|
61
|
+
app.add_middleware(
|
62
|
+
CORSMiddleware,
|
63
|
+
allow_origins=["*"],
|
64
|
+
allow_credentials=True,
|
65
|
+
allow_methods=["*"],
|
66
|
+
allow_headers=["*"],
|
67
|
+
)
|
68
|
+
|
69
|
+
# Add rate limiting
|
70
|
+
app.state.limiter = limiter
|
71
|
+
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
|
72
|
+
|
73
|
+
|
74
|
+
@app.exception_handler(Exception)
|
75
|
+
async def global_exception_handler(request: Request, exc: Exception):
|
76
|
+
"""Global exception handler"""
|
77
|
+
logger.error(f"Unhandled exception occurred while processing request: {exc}")
|
78
|
+
logger.error(f"Exception details: {traceback.format_exc()}")
|
79
|
+
|
80
|
+
error_response = ErrorResponse(
|
81
|
+
error=ErrorDetail(
|
82
|
+
message="Internal server error",
|
83
|
+
type="internal_error",
|
84
|
+
code="500"
|
85
|
+
)
|
86
|
+
)
|
87
|
+
|
88
|
+
return JSONResponse(
|
89
|
+
status_code=500,
|
90
|
+
content=error_response.model_dump()
|
91
|
+
)
|
92
|
+
|
93
|
+
|
94
|
+
@app.get("/health", response_model=HealthResponse)
|
95
|
+
async def health_check():
|
96
|
+
"""Health check endpoint"""
|
97
|
+
return HealthResponse(version=__version__)
|
98
|
+
|
99
|
+
|
100
|
+
@app.get("/v1/models", response_model=ModelsResponse)
|
101
|
+
async def list_models():
|
102
|
+
"""List available models"""
|
103
|
+
models = [
|
104
|
+
ModelInfo(id=model_id) for model_id in config.supported_models
|
105
|
+
]
|
106
|
+
|
107
|
+
return ModelsResponse(data=models)
|
108
|
+
|
109
|
+
|
110
|
+
@app.post("/v1/chat/completions")
|
111
|
+
@limiter.limit(f"{config.rate_limit}/minute")
|
112
|
+
async def chat_completions(
|
113
|
+
chat_request: ChatCompletionRequest,
|
114
|
+
request: Request
|
115
|
+
):
|
116
|
+
"""
|
117
|
+
Chat completion endpoint
|
118
|
+
|
119
|
+
Implements OpenAI-compatible chat completion API
|
120
|
+
"""
|
121
|
+
logger.info(f"Received chat completion request: model={chat_request.model}, stream={chat_request.stream}")
|
122
|
+
|
123
|
+
try:
|
124
|
+
# Validate model
|
125
|
+
if chat_request.model not in config.supported_models:
|
126
|
+
raise HTTPException(
|
127
|
+
status_code=400,
|
128
|
+
detail=ErrorResponse(
|
129
|
+
error=ErrorDetail(
|
130
|
+
message=f"Unsupported model: {chat_request.model}. Supported models: {', '.join(config.supported_models)}",
|
131
|
+
type="invalid_request_error",
|
132
|
+
param="model"
|
133
|
+
)
|
134
|
+
).model_dump()
|
135
|
+
)
|
136
|
+
|
137
|
+
# Handle streaming request
|
138
|
+
if chat_request.stream:
|
139
|
+
return await openai_adapter.chat_completion_stream(chat_request)
|
140
|
+
|
141
|
+
# Handle non-streaming request
|
142
|
+
response = await openai_adapter.chat_completion(chat_request)
|
143
|
+
return response
|
144
|
+
|
145
|
+
except HTTPException:
|
146
|
+
raise
|
147
|
+
except asyncio.TimeoutError:
|
148
|
+
logger.error("Gemini CLI command execution timeout")
|
149
|
+
raise HTTPException(
|
150
|
+
status_code=502,
|
151
|
+
detail=ErrorResponse(
|
152
|
+
error=ErrorDetail(
|
153
|
+
message="Gemini CLI command execution timeout",
|
154
|
+
type="bad_gateway",
|
155
|
+
code="502"
|
156
|
+
)
|
157
|
+
).model_dump()
|
158
|
+
)
|
159
|
+
except RuntimeError as e:
|
160
|
+
logger.error(f"Gemini CLI execution error: {e}")
|
161
|
+
raise HTTPException(
|
162
|
+
status_code=502,
|
163
|
+
detail=ErrorResponse(
|
164
|
+
error=ErrorDetail(
|
165
|
+
message=str(e),
|
166
|
+
type="bad_gateway",
|
167
|
+
code="502"
|
168
|
+
)
|
169
|
+
).model_dump()
|
170
|
+
)
|
171
|
+
except Exception as e:
|
172
|
+
logger.error(f"Error processing chat completion request: {e}")
|
173
|
+
raise HTTPException(
|
174
|
+
status_code=500,
|
175
|
+
detail=ErrorResponse(
|
176
|
+
error=ErrorDetail(
|
177
|
+
message="Internal server error",
|
178
|
+
type="internal_error",
|
179
|
+
code="500"
|
180
|
+
)
|
181
|
+
).model_dump()
|
182
|
+
)
|
183
|
+
|
184
|
+
|
185
|
+
if __name__ == "__main__":
|
186
|
+
import uvicorn
|
187
|
+
uvicorn.run(app, host=config.host, port=config.port)
|
@@ -0,0 +1,153 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: gemini-cli-proxy
|
3
|
+
Version: 1.0.3
|
4
|
+
Summary: OpenAI-compatible API wrapper for Gemini CLI
|
5
|
+
Author: nettee
|
6
|
+
License: MIT
|
7
|
+
License-File: LICENSE
|
8
|
+
Keywords: api,cli,gemini,openai,proxy
|
9
|
+
Requires-Python: >=3.8
|
10
|
+
Requires-Dist: click<9.0,>=8.0.0
|
11
|
+
Requires-Dist: fastapi<1.0,>=0.104.0
|
12
|
+
Requires-Dist: pydantic<3.0,>=2.0.0
|
13
|
+
Requires-Dist: slowapi<1.0,>=0.1.9
|
14
|
+
Requires-Dist: uvicorn[standard]<1.0,>=0.24.0
|
15
|
+
Description-Content-Type: text/markdown
|
16
|
+
|
17
|
+
# Gemini CLI Proxy
|
18
|
+
|
19
|
+
[](https://www.python.org/downloads/)
|
20
|
+
[](https://opensource.org/licenses/MIT)
|
21
|
+
|
22
|
+
Wrap Gemini CLI as an OpenAI-compatible API service, allowing you to enjoy the free Gemini 2.5 Pro model through API!
|
23
|
+
|
24
|
+
[English](./README.md) | [简体中文](./README_zh.md)
|
25
|
+
|
26
|
+
## ✨ Features
|
27
|
+
|
28
|
+
- 🔌 **OpenAI API Compatible**: Implements `/v1/chat/completions` endpoint
|
29
|
+
- 🚀 **Quick Setup**: Zero-config run with `uvx`
|
30
|
+
- ⚡ **High Performance**: Built on FastAPI + asyncio with concurrent request support
|
31
|
+
|
32
|
+
## 🚀 Quick Start
|
33
|
+
|
34
|
+
### Network Configuration
|
35
|
+
|
36
|
+
Since Gemini needs to access Google services, you may need to configure terminal proxy in certain network environments:
|
37
|
+
|
38
|
+
```bash
|
39
|
+
# Configure proxy (adjust according to your proxy server)
|
40
|
+
export https_proxy=http://127.0.0.1:7890
|
41
|
+
export http_proxy=http://127.0.0.1:7890
|
42
|
+
export all_proxy=socks5://127.0.0.1:7890
|
43
|
+
```
|
44
|
+
|
45
|
+
### Install Gemini CLI
|
46
|
+
|
47
|
+
Install Gemini CLI:
|
48
|
+
```bash
|
49
|
+
npm install -g @google/gemini-cli
|
50
|
+
```
|
51
|
+
|
52
|
+
After installation, use the `gemini` command to run Gemini CLI. You need to start it once first for login and initial configuration.
|
53
|
+
|
54
|
+
After configuration is complete, please confirm you can successfully run the following command:
|
55
|
+
|
56
|
+
```bash
|
57
|
+
gemini -p "Hello, Gemini"
|
58
|
+
```
|
59
|
+
|
60
|
+
### Start Gemini CLI Proxy
|
61
|
+
|
62
|
+
```bash
|
63
|
+
uv run gemini-cli-proxy
|
64
|
+
```
|
65
|
+
|
66
|
+
Gemini CLI Proxy listens on port `8765` by default. You can customize the startup port with the `--port` parameter.
|
67
|
+
|
68
|
+
After startup, test the service with curl:
|
69
|
+
|
70
|
+
```bash
|
71
|
+
curl http://localhost:8765/v1/chat/completions \
|
72
|
+
-H "Content-Type: application/json" \
|
73
|
+
-H "Authorization: Bearer dummy-key" \
|
74
|
+
-d '{
|
75
|
+
"model": "gemini-2.5-pro",
|
76
|
+
"messages": [{"role": "user", "content": "Hello!"}]
|
77
|
+
}'
|
78
|
+
```
|
79
|
+
|
80
|
+
### Usage Examples
|
81
|
+
|
82
|
+
#### OpenAI Client
|
83
|
+
|
84
|
+
```python
|
85
|
+
from openai import OpenAI
|
86
|
+
|
87
|
+
client = OpenAI(
|
88
|
+
base_url='http://localhost:8765/v1',
|
89
|
+
api_key='dummy-key' # Any string works
|
90
|
+
)
|
91
|
+
|
92
|
+
response = client.chat.completions.create(
|
93
|
+
model='gemini-2.5-pro',
|
94
|
+
messages=[
|
95
|
+
{'role': 'user', 'content': 'Hello!'}
|
96
|
+
],
|
97
|
+
)
|
98
|
+
|
99
|
+
print(response.choices[0].message.content)
|
100
|
+
```
|
101
|
+
|
102
|
+
#### Cherry Studio
|
103
|
+
|
104
|
+
Add Model Provider in Cherry Studio settings:
|
105
|
+
- Provider Type: OpenAI
|
106
|
+
- API Host: `http://localhost:8765`
|
107
|
+
- API Key: Any string works
|
108
|
+
- Model Name: `gemini-2.5-pro` or `gemini-2.5-flash`
|
109
|
+
|
110
|
+

|
111
|
+
|
112
|
+

|
113
|
+
|
114
|
+
## ⚙️ Configuration Options
|
115
|
+
|
116
|
+
View command line parameters:
|
117
|
+
|
118
|
+
```bash
|
119
|
+
gemini-cli-proxy --help
|
120
|
+
```
|
121
|
+
|
122
|
+
Available options:
|
123
|
+
- `--host`: Server host address (default: 127.0.0.1)
|
124
|
+
- `--port`: Server port (default: 8765)
|
125
|
+
- `--log-level`: Log level (debug/info/warning/error/critical)
|
126
|
+
- `--rate-limit`: Max requests per minute (default: 60)
|
127
|
+
- `--max-concurrency`: Max concurrent subprocesses (default: 4)
|
128
|
+
- `--timeout`: Gemini CLI command timeout in seconds (default: 30.0)
|
129
|
+
- `--debug`: Enable debug mode
|
130
|
+
|
131
|
+
## ❓ FAQ
|
132
|
+
|
133
|
+
### Q: Why do requests keep timing out?
|
134
|
+
|
135
|
+
A: This is usually a network connectivity issue. Gemini needs to access Google services, which may require proxy configuration in certain regions:
|
136
|
+
|
137
|
+
```bash
|
138
|
+
# Configure proxy (adjust according to your proxy server)
|
139
|
+
export https_proxy=http://127.0.0.1:7890
|
140
|
+
export http_proxy=http://127.0.0.1:7890
|
141
|
+
export all_proxy=socks5://127.0.0.1:7890
|
142
|
+
|
143
|
+
# Then start the service
|
144
|
+
uvx gemini-cli-proxy
|
145
|
+
```
|
146
|
+
|
147
|
+
## 📄 License
|
148
|
+
|
149
|
+
MIT License
|
150
|
+
|
151
|
+
## 🤝 Contributing
|
152
|
+
|
153
|
+
Issues and Pull Requests are welcome!
|
@@ -0,0 +1,12 @@
|
|
1
|
+
gemini_cli_proxy/__init__.py,sha256=fOI3EtGmmggiMc3uGV8lGLsbzXmM3ADfnFbaHnwrKtg,257
|
2
|
+
gemini_cli_proxy/cli.py,sha256=hc83w1AUobLN_-ONITforwdyQBx_0jwGyn-M_zbh4LQ,1555
|
3
|
+
gemini_cli_proxy/config.py,sha256=5erCL5v5sb2Kz-Kje-luCw7EJUB8oq5wU3fdBnMS3H0,854
|
4
|
+
gemini_cli_proxy/gemini_client.py,sha256=nfRbgzTHvh7w4QsLzG-s5B964JIBzwt--PE3jvQT0R4,5735
|
5
|
+
gemini_cli_proxy/models.py,sha256=3FNvKk4CuLUU7MrFM0X12HeEN5paRPrRoJO0083KLfQ,2779
|
6
|
+
gemini_cli_proxy/openai_adapter.py,sha256=x_8dUcob1DOLnKbTLQBsmN_e1dO6mX0DFaXqUmzcBzY,5394
|
7
|
+
gemini_cli_proxy/server.py,sha256=6u6vEc4nqrh6eocflmx8JuHTZuWoH4uKJdfdblMnJfo,5383
|
8
|
+
gemini_cli_proxy-1.0.3.dist-info/METADATA,sha256=vi5AG8BOBCWU25lJyIUXmKybadk4GzwOcZ8mJzhSzrg,4006
|
9
|
+
gemini_cli_proxy-1.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
10
|
+
gemini_cli_proxy-1.0.3.dist-info/entry_points.txt,sha256=wDLl4ePzvEWNQMSxoE7rKV5k8_MpK6yQwpYdiaXjcWI,63
|
11
|
+
gemini_cli_proxy-1.0.3.dist-info/licenses/LICENSE,sha256=-LKYkZXXzjCmYRVwR74fDmMHP3gNlKIW_UUuEbY9hq8,1068
|
12
|
+
gemini_cli_proxy-1.0.3.dist-info/RECORD,,
|
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 William Liu
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|