gemini-cli-proxy 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ """
2
+ Gemini CLI Proxy - OpenAI-compatible API wrapper for Gemini CLI
3
+
4
+ An OpenAI-compatible HTTP API wrapper for Google Gemini command line tool
5
+ """
6
+
7
+ __version__ = "1.0.0"
8
+ __author__ = "nettee"
9
+ __description__ = "OpenAI-compatible API wrapper for Gemini CLI"
@@ -0,0 +1,84 @@
1
+ """
2
+ CLI entry module
3
+
4
+ Uses Click to handle command line arguments and application startup
5
+ """
6
+
7
+ import click
8
+ import uvicorn
9
+
10
+
11
+ @click.command()
12
+ @click.option(
13
+ "--host",
14
+ default="127.0.0.1",
15
+ help="Server host address"
16
+ )
17
+ @click.option(
18
+ "--port",
19
+ default=8765,
20
+ type=int,
21
+ help="Server port"
22
+ )
23
+ @click.option(
24
+ "--log-level",
25
+ default="info",
26
+ type=click.Choice(["debug", "info", "warning", "error", "critical"]),
27
+ help="Log level"
28
+ )
29
+ @click.option(
30
+ "--rate-limit",
31
+ default=60,
32
+ type=int,
33
+ help="Maximum requests per minute"
34
+ )
35
+ @click.option(
36
+ "--max-concurrency",
37
+ default=4,
38
+ type=int,
39
+ help="Maximum concurrent subprocesses"
40
+ )
41
+ @click.option(
42
+ "--timeout",
43
+ default=30.0,
44
+ type=float,
45
+ help="Gemini CLI command timeout in seconds"
46
+ )
47
+ @click.option(
48
+ "--debug",
49
+ is_flag=True,
50
+ help="Enable debug mode"
51
+ )
52
+ def main(
53
+ host: str,
54
+ port: int,
55
+ log_level: str,
56
+ rate_limit: int,
57
+ max_concurrency: int,
58
+ timeout: float,
59
+ debug: bool
60
+ ):
61
+ """Start Gemini CLI Proxy server"""
62
+
63
+ # Set configuration
64
+ from .config import config
65
+ config.host = host
66
+ config.port = port
67
+ config.log_level = log_level
68
+ config.rate_limit = rate_limit
69
+ config.max_concurrency = max_concurrency
70
+ config.timeout = timeout
71
+ config.debug = debug
72
+
73
+ # Start server
74
+ uvicorn.run(
75
+ "gemini_cli_proxy.server:app",
76
+ host=host,
77
+ port=port,
78
+ log_level=log_level,
79
+ reload=debug
80
+ )
81
+
82
+
83
+ if __name__ == "__main__":
84
+ main()
@@ -0,0 +1,34 @@
1
+ """
2
+ Configuration management module
3
+
4
+ Manages application configuration
5
+ """
6
+
7
+
8
+ class Config:
9
+ """Application configuration class"""
10
+
11
+ def __init__(self):
12
+ # Server configuration
13
+ self.host: str = "127.0.0.1"
14
+ self.port: int = 8765
15
+ self.log_level: str = "info"
16
+ self.debug: bool = False
17
+
18
+ # Gemini CLI configuration
19
+ self.gemini_command: str = "gemini" # Gemini CLI command path
20
+ self.timeout: float = 30.0
21
+
22
+ # Limit configuration
23
+ self.rate_limit: int = 60 # Requests per minute
24
+ self.max_concurrency: int = 4 # Maximum concurrent subprocesses
25
+
26
+ # Supported models list
27
+ self.supported_models: list = [
28
+ "gemini-2.5-pro",
29
+ "gemini-2.5-flash",
30
+ ]
31
+
32
+
33
+ # Global configuration instance
34
+ config = Config()
@@ -0,0 +1,175 @@
1
+ """
2
+ Gemini client module
3
+
4
+ Handles interaction with Gemini CLI tool
5
+ """
6
+
7
+ import asyncio
8
+ import logging
9
+ from typing import List, Optional, AsyncGenerator
10
+ from .models import ChatMessage
11
+ from .config import config
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class GeminiClient:
17
+ """Gemini CLI client"""
18
+
19
+ def __init__(self):
20
+ self.semaphore = asyncio.Semaphore(config.max_concurrency)
21
+
22
+ async def chat_completion(
23
+ self,
24
+ messages: List[ChatMessage],
25
+ temperature: Optional[float] = None,
26
+ max_tokens: Optional[int] = None,
27
+ **kwargs
28
+ ) -> str:
29
+ """
30
+ Execute chat completion request
31
+
32
+ Args:
33
+ messages: List of chat messages
34
+ temperature: Temperature parameter
35
+ max_tokens: Maximum number of tokens
36
+ **kwargs: Other parameters
37
+
38
+ Returns:
39
+ Response text from Gemini CLI
40
+
41
+ Raises:
42
+ asyncio.TimeoutError: Timeout error
43
+ subprocess.CalledProcessError: Command execution error
44
+ """
45
+ async with self.semaphore:
46
+ return await self._execute_gemini_command(
47
+ messages, temperature, max_tokens, **kwargs
48
+ )
49
+
50
+ async def chat_completion_stream(
51
+ self,
52
+ messages: List[ChatMessage],
53
+ temperature: Optional[float] = None,
54
+ max_tokens: Optional[int] = None,
55
+ **kwargs
56
+ ) -> AsyncGenerator[str, None]:
57
+ """
58
+ Execute streaming chat completion request (fake streaming implementation)
59
+
60
+ Args:
61
+ messages: List of chat messages
62
+ temperature: Temperature parameter
63
+ max_tokens: Maximum number of tokens
64
+ **kwargs: Other parameters
65
+
66
+ Yields:
67
+ Response text chunks split by lines
68
+ """
69
+ # First get complete response
70
+ full_response = await self.chat_completion(
71
+ messages, temperature, max_tokens, **kwargs
72
+ )
73
+
74
+ # Split by lines and yield one by one
75
+ lines = full_response.split('\n')
76
+ for line in lines:
77
+ if line.strip(): # Skip empty lines
78
+ yield line.strip()
79
+ # Add small delay to simulate streaming effect
80
+ await asyncio.sleep(0.05)
81
+
82
+ async def _execute_gemini_command(
83
+ self,
84
+ messages: List[ChatMessage],
85
+ temperature: Optional[float] = None,
86
+ max_tokens: Optional[int] = None,
87
+ **kwargs
88
+ ) -> str:
89
+ """
90
+ Execute Gemini CLI command
91
+
92
+ Args:
93
+ messages: List of chat messages
94
+ temperature: Temperature parameter
95
+ max_tokens: Maximum number of tokens
96
+ **kwargs: Other parameters
97
+
98
+ Returns:
99
+ Command output result
100
+ """
101
+ # Build command arguments
102
+ cmd_args = [config.gemini_command]
103
+
104
+ # Build prompt text (simplified implementation: combine all messages)
105
+ prompt = self._build_prompt(messages)
106
+
107
+ # Use --prompt parameter to pass prompt text
108
+ cmd_args.extend(["--prompt", prompt])
109
+
110
+ # Note: Real gemini CLI doesn't support temperature and max_tokens parameters
111
+ # We ignore these parameters here but log them
112
+ if temperature is not None:
113
+ logger.debug(f"Ignoring temperature parameter: {temperature} (gemini CLI doesn't support)")
114
+ if max_tokens is not None:
115
+ logger.debug(f"Ignoring max_tokens parameter: {max_tokens} (gemini CLI doesn't support)")
116
+
117
+ logger.debug(f"Executing command: {' '.join(cmd_args)}")
118
+
119
+ try:
120
+ # Use asyncio to execute subprocess
121
+ process = await asyncio.create_subprocess_exec(
122
+ *cmd_args,
123
+ stdout=asyncio.subprocess.PIPE,
124
+ stderr=asyncio.subprocess.PIPE
125
+ )
126
+
127
+ # Wait for command execution to complete with timeout
128
+ stdout, stderr = await asyncio.wait_for(
129
+ process.communicate(),
130
+ timeout=config.timeout
131
+ )
132
+
133
+ # Check return code
134
+ if process.returncode != 0:
135
+ error_msg = stderr.decode('utf-8').strip()
136
+ raise RuntimeError(f"Gemini CLI execution failed (exit code: {process.returncode}): {error_msg}")
137
+
138
+ # Return standard output
139
+ result = stdout.decode('utf-8').strip()
140
+ logger.debug(f"Command executed successfully, output length: {len(result)}")
141
+ return result
142
+
143
+ except asyncio.TimeoutError:
144
+ logger.error(f"Gemini CLI command timeout ({config.timeout}s)")
145
+ raise
146
+ except Exception as e:
147
+ logger.error(f"Error executing Gemini CLI command: {e}")
148
+ raise
149
+
150
+ def _build_prompt(self, messages: List[ChatMessage]) -> str:
151
+ """
152
+ Build prompt text
153
+
154
+ Args:
155
+ messages: List of chat messages
156
+
157
+ Returns:
158
+ Formatted prompt text
159
+ """
160
+ # Simplified implementation: format all messages by role
161
+ prompt_parts = []
162
+
163
+ for message in messages:
164
+ if message.role == "system":
165
+ prompt_parts.append(f"System: {message.content}")
166
+ elif message.role == "user":
167
+ prompt_parts.append(f"User: {message.content}")
168
+ elif message.role == "assistant":
169
+ prompt_parts.append(f"Assistant: {message.content}")
170
+
171
+ return "\n".join(prompt_parts)
172
+
173
+
174
+ # Global client instance
175
+ gemini_client = GeminiClient()
@@ -0,0 +1,97 @@
1
+ """
2
+ Data model definition module
3
+
4
+ Uses Pydantic to define OpenAI format request and response models
5
+ """
6
+
7
+ from typing import List, Optional, Union, Dict, Any, Literal
8
+ from pydantic import BaseModel, Field
9
+ import time
10
+ import uuid
11
+
12
+
13
+ class ChatMessage(BaseModel):
14
+ """Chat message model"""
15
+ role: Literal["system", "user", "assistant"]
16
+ content: str
17
+
18
+
19
+ class ChatCompletionRequest(BaseModel):
20
+ """Chat completion request model"""
21
+ model: str
22
+ messages: List[ChatMessage]
23
+ temperature: Optional[float] = Field(default=0.7, ge=0.0, le=2.0)
24
+ max_tokens: Optional[int] = Field(default=None, gt=0)
25
+ stream: Optional[bool] = False
26
+ top_p: Optional[float] = Field(default=1.0, ge=0.0, le=1.0)
27
+ frequency_penalty: Optional[float] = Field(default=0.0, ge=-2.0, le=2.0)
28
+ presence_penalty: Optional[float] = Field(default=0.0, ge=-2.0, le=2.0)
29
+ stop: Optional[Union[str, List[str]]] = None
30
+ user: Optional[str] = None
31
+
32
+
33
+ class ChatCompletionChoice(BaseModel):
34
+ """Chat completion choice model"""
35
+ index: int
36
+ message: ChatMessage
37
+ finish_reason: Literal["stop", "length", "content_filter"] = "stop"
38
+
39
+
40
+ class ChatCompletionResponse(BaseModel):
41
+ """Chat completion response model"""
42
+ id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4().hex[:12]}")
43
+ object: str = "chat.completion"
44
+ created: int = Field(default_factory=lambda: int(time.time()))
45
+ model: str
46
+ choices: List[ChatCompletionChoice]
47
+ # Note: According to design document, usage field is omitted since gemini CLI doesn't provide token usage info
48
+
49
+
50
+ class ChatCompletionStreamChoice(BaseModel):
51
+ """Streaming chat completion choice model"""
52
+ index: int
53
+ delta: Dict[str, Any]
54
+ finish_reason: Optional[Literal["stop", "length", "content_filter"]] = None
55
+
56
+
57
+ class ChatCompletionStreamResponse(BaseModel):
58
+ """Streaming chat completion response model"""
59
+ id: str
60
+ object: str = "chat.completion.chunk"
61
+ created: int
62
+ model: str
63
+ choices: List[ChatCompletionStreamChoice]
64
+
65
+
66
+ class ErrorDetail(BaseModel):
67
+ """Error detail model"""
68
+ message: str
69
+ type: str
70
+ param: Optional[str] = None
71
+ code: Optional[str] = None
72
+
73
+
74
+ class ErrorResponse(BaseModel):
75
+ """Error response model"""
76
+ error: ErrorDetail
77
+
78
+
79
+ class ModelInfo(BaseModel):
80
+ """Model information model"""
81
+ id: str
82
+ object: str = "model"
83
+ created: int = Field(default_factory=lambda: int(time.time()))
84
+ owned_by: str = "gemini-cli-proxy"
85
+
86
+
87
+ class ModelsResponse(BaseModel):
88
+ """Models list response model"""
89
+ object: str = "list"
90
+ data: List[ModelInfo]
91
+
92
+
93
+ class HealthResponse(BaseModel):
94
+ """Health check response model"""
95
+ status: str = "ok"
96
+ version: str
97
+ timestamp: int = Field(default_factory=lambda: int(time.time()))
@@ -0,0 +1,157 @@
1
+ """
2
+ OpenAI adapter module
3
+
4
+ Handles format conversion and compatibility
5
+ """
6
+
7
+ import time
8
+ import uuid
9
+ import logging
10
+ from typing import AsyncGenerator
11
+ from fastapi.responses import StreamingResponse
12
+
13
+ from .models import (
14
+ ChatCompletionRequest,
15
+ ChatCompletionResponse,
16
+ ChatCompletionChoice,
17
+ ChatCompletionStreamResponse,
18
+ ChatCompletionStreamChoice,
19
+ ChatMessage
20
+ )
21
+ from .gemini_client import gemini_client
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class OpenAIAdapter:
27
+ """OpenAI format adapter"""
28
+
29
+ async def chat_completion(self, request: ChatCompletionRequest) -> ChatCompletionResponse:
30
+ """
31
+ Handle chat completion request (non-streaming)
32
+
33
+ Args:
34
+ request: OpenAI format chat completion request
35
+
36
+ Returns:
37
+ OpenAI format chat completion response
38
+ """
39
+ logger.info(f"Processing chat completion request, model: {request.model}, messages: {len(request.messages)}")
40
+
41
+ try:
42
+ # Call Gemini CLI
43
+ response_text = await gemini_client.chat_completion(
44
+ messages=request.messages,
45
+ temperature=request.temperature,
46
+ max_tokens=request.max_tokens
47
+ )
48
+
49
+ # Build OpenAI format response
50
+ response = ChatCompletionResponse(
51
+ model=request.model,
52
+ choices=[
53
+ ChatCompletionChoice(
54
+ index=0,
55
+ message=ChatMessage(
56
+ role="assistant",
57
+ content=response_text
58
+ ),
59
+ finish_reason="stop"
60
+ )
61
+ ]
62
+ )
63
+
64
+ logger.info(f"Chat completion request processed successfully, response length: {len(response_text)}")
65
+ return response
66
+
67
+ except Exception as e:
68
+ logger.error(f"Error processing chat completion request: {e}")
69
+ raise
70
+
71
+ async def chat_completion_stream(self, request: ChatCompletionRequest) -> StreamingResponse:
72
+ """
73
+ Handle streaming chat completion request
74
+
75
+ Args:
76
+ request: OpenAI format chat completion request
77
+
78
+ Returns:
79
+ Streaming response
80
+ """
81
+ logger.info(f"Processing streaming chat completion request, model: {request.model}, messages: {len(request.messages)}")
82
+
83
+ async def generate_stream():
84
+ """Generate streaming response data"""
85
+ completion_id = f"chatcmpl-{uuid.uuid4().hex[:12]}"
86
+ created_time = int(time.time())
87
+
88
+ try:
89
+ # Get streaming data generator
90
+ stream_generator = gemini_client.chat_completion_stream(
91
+ messages=request.messages,
92
+ temperature=request.temperature,
93
+ max_tokens=request.max_tokens
94
+ )
95
+
96
+ # Send data chunks one by one
97
+ async for chunk in stream_generator:
98
+ stream_response = ChatCompletionStreamResponse(
99
+ id=completion_id,
100
+ created=created_time,
101
+ model=request.model,
102
+ choices=[
103
+ ChatCompletionStreamChoice(
104
+ index=0,
105
+ delta={"content": chunk},
106
+ finish_reason=None
107
+ )
108
+ ]
109
+ )
110
+
111
+ # Send data chunk
112
+ yield f"data: {stream_response.model_dump_json()}\n\n"
113
+
114
+ # Send end marker
115
+ final_response = ChatCompletionStreamResponse(
116
+ id=completion_id,
117
+ created=created_time,
118
+ model=request.model,
119
+ choices=[
120
+ ChatCompletionStreamChoice(
121
+ index=0,
122
+ delta={},
123
+ finish_reason="stop"
124
+ )
125
+ ]
126
+ )
127
+ yield f"data: {final_response.model_dump_json()}\n\n"
128
+ yield "data: [DONE]\n\n"
129
+
130
+ logger.info("Streaming chat completion request processed successfully")
131
+
132
+ except Exception as e:
133
+ logger.error(f"Error processing streaming chat completion request: {e}")
134
+ # Send error information
135
+ error_response = {
136
+ "error": {
137
+ "message": str(e),
138
+ "type": "internal_error"
139
+ }
140
+ }
141
+ yield f"data: {error_response}\n\n"
142
+ yield "data: [DONE]\n\n"
143
+
144
+ return StreamingResponse(
145
+ generate_stream(),
146
+ media_type="text/plain",
147
+ headers={
148
+ "Cache-Control": "no-cache",
149
+ "Connection": "keep-alive",
150
+ "Access-Control-Allow-Origin": "*",
151
+ "Access-Control-Allow-Headers": "*",
152
+ }
153
+ )
154
+
155
+
156
+ # Global adapter instance
157
+ openai_adapter = OpenAIAdapter()
@@ -0,0 +1,187 @@
1
+ """
2
+ FastAPI server module
3
+
4
+ Implements HTTP service and API endpoints
5
+ """
6
+
7
+ import asyncio
8
+ import logging
9
+ import traceback
10
+ from contextlib import asynccontextmanager
11
+
12
+ from fastapi import FastAPI, HTTPException, Request
13
+ from fastapi.middleware.cors import CORSMiddleware
14
+ from fastapi.responses import JSONResponse
15
+ from slowapi import Limiter, _rate_limit_exceeded_handler
16
+ from slowapi.util import get_remote_address
17
+ from slowapi.errors import RateLimitExceeded
18
+
19
+ from . import __version__
20
+ from .config import config
21
+ from .models import (
22
+ ChatCompletionRequest,
23
+ ChatCompletionResponse,
24
+ ErrorResponse,
25
+ ErrorDetail,
26
+ HealthResponse,
27
+ ModelsResponse,
28
+ ModelInfo
29
+ )
30
+ from .openai_adapter import openai_adapter
31
+
32
+ # Configure logging
33
+ logging.basicConfig(
34
+ level=getattr(logging, config.log_level.upper()),
35
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
36
+ )
37
+ logger = logging.getLogger(__name__)
38
+
39
+ # Create rate limiter
40
+ limiter = Limiter(key_func=get_remote_address)
41
+
42
+
43
+ @asynccontextmanager
44
+ async def lifespan(app: FastAPI):
45
+ """Application lifecycle management"""
46
+ logger.info(f"Starting Gemini CLI Proxy v{__version__}")
47
+ logger.info(f"Configuration: port={config.port}, rate_limit={config.rate_limit}/min, concurrency={config.max_concurrency}")
48
+ yield
49
+ logger.info("Shutting down Gemini CLI Proxy")
50
+
51
+
52
+ # Create FastAPI application
53
+ app = FastAPI(
54
+ title="Gemini CLI Proxy",
55
+ description="OpenAI-compatible API wrapper for Gemini CLI",
56
+ version=__version__,
57
+ lifespan=lifespan
58
+ )
59
+
60
+ # Add CORS middleware
61
+ app.add_middleware(
62
+ CORSMiddleware,
63
+ allow_origins=["*"],
64
+ allow_credentials=True,
65
+ allow_methods=["*"],
66
+ allow_headers=["*"],
67
+ )
68
+
69
+ # Add rate limiting
70
+ app.state.limiter = limiter
71
+ app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
72
+
73
+
74
+ @app.exception_handler(Exception)
75
+ async def global_exception_handler(request: Request, exc: Exception):
76
+ """Global exception handler"""
77
+ logger.error(f"Unhandled exception occurred while processing request: {exc}")
78
+ logger.error(f"Exception details: {traceback.format_exc()}")
79
+
80
+ error_response = ErrorResponse(
81
+ error=ErrorDetail(
82
+ message="Internal server error",
83
+ type="internal_error",
84
+ code="500"
85
+ )
86
+ )
87
+
88
+ return JSONResponse(
89
+ status_code=500,
90
+ content=error_response.model_dump()
91
+ )
92
+
93
+
94
+ @app.get("/health", response_model=HealthResponse)
95
+ async def health_check():
96
+ """Health check endpoint"""
97
+ return HealthResponse(version=__version__)
98
+
99
+
100
+ @app.get("/v1/models", response_model=ModelsResponse)
101
+ async def list_models():
102
+ """List available models"""
103
+ models = [
104
+ ModelInfo(id=model_id) for model_id in config.supported_models
105
+ ]
106
+
107
+ return ModelsResponse(data=models)
108
+
109
+
110
+ @app.post("/v1/chat/completions")
111
+ @limiter.limit(f"{config.rate_limit}/minute")
112
+ async def chat_completions(
113
+ chat_request: ChatCompletionRequest,
114
+ request: Request
115
+ ):
116
+ """
117
+ Chat completion endpoint
118
+
119
+ Implements OpenAI-compatible chat completion API
120
+ """
121
+ logger.info(f"Received chat completion request: model={chat_request.model}, stream={chat_request.stream}")
122
+
123
+ try:
124
+ # Validate model
125
+ if chat_request.model not in config.supported_models:
126
+ raise HTTPException(
127
+ status_code=400,
128
+ detail=ErrorResponse(
129
+ error=ErrorDetail(
130
+ message=f"Unsupported model: {chat_request.model}. Supported models: {', '.join(config.supported_models)}",
131
+ type="invalid_request_error",
132
+ param="model"
133
+ )
134
+ ).model_dump()
135
+ )
136
+
137
+ # Handle streaming request
138
+ if chat_request.stream:
139
+ return await openai_adapter.chat_completion_stream(chat_request)
140
+
141
+ # Handle non-streaming request
142
+ response = await openai_adapter.chat_completion(chat_request)
143
+ return response
144
+
145
+ except HTTPException:
146
+ raise
147
+ except asyncio.TimeoutError:
148
+ logger.error("Gemini CLI command execution timeout")
149
+ raise HTTPException(
150
+ status_code=502,
151
+ detail=ErrorResponse(
152
+ error=ErrorDetail(
153
+ message="Gemini CLI command execution timeout",
154
+ type="bad_gateway",
155
+ code="502"
156
+ )
157
+ ).model_dump()
158
+ )
159
+ except RuntimeError as e:
160
+ logger.error(f"Gemini CLI execution error: {e}")
161
+ raise HTTPException(
162
+ status_code=502,
163
+ detail=ErrorResponse(
164
+ error=ErrorDetail(
165
+ message=str(e),
166
+ type="bad_gateway",
167
+ code="502"
168
+ )
169
+ ).model_dump()
170
+ )
171
+ except Exception as e:
172
+ logger.error(f"Error processing chat completion request: {e}")
173
+ raise HTTPException(
174
+ status_code=500,
175
+ detail=ErrorResponse(
176
+ error=ErrorDetail(
177
+ message="Internal server error",
178
+ type="internal_error",
179
+ code="500"
180
+ )
181
+ ).model_dump()
182
+ )
183
+
184
+
185
+ if __name__ == "__main__":
186
+ import uvicorn
187
+ uvicorn.run(app, host=config.host, port=config.port)
@@ -0,0 +1,153 @@
1
+ Metadata-Version: 2.4
2
+ Name: gemini-cli-proxy
3
+ Version: 1.0.3
4
+ Summary: OpenAI-compatible API wrapper for Gemini CLI
5
+ Author: nettee
6
+ License: MIT
7
+ License-File: LICENSE
8
+ Keywords: api,cli,gemini,openai,proxy
9
+ Requires-Python: >=3.8
10
+ Requires-Dist: click<9.0,>=8.0.0
11
+ Requires-Dist: fastapi<1.0,>=0.104.0
12
+ Requires-Dist: pydantic<3.0,>=2.0.0
13
+ Requires-Dist: slowapi<1.0,>=0.1.9
14
+ Requires-Dist: uvicorn[standard]<1.0,>=0.24.0
15
+ Description-Content-Type: text/markdown
16
+
17
+ # Gemini CLI Proxy
18
+
19
+ [![Python 3.12+](https://img.shields.io/badge/python-3.12+-blue.svg)](https://www.python.org/downloads/)
20
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
21
+
22
+ Wrap Gemini CLI as an OpenAI-compatible API service, allowing you to enjoy the free Gemini 2.5 Pro model through API!
23
+
24
+ [English](./README.md) | [简体中文](./README_zh.md)
25
+
26
+ ## ✨ Features
27
+
28
+ - 🔌 **OpenAI API Compatible**: Implements `/v1/chat/completions` endpoint
29
+ - 🚀 **Quick Setup**: Zero-config run with `uvx`
30
+ - ⚡ **High Performance**: Built on FastAPI + asyncio with concurrent request support
31
+
32
+ ## 🚀 Quick Start
33
+
34
+ ### Network Configuration
35
+
36
+ Since Gemini needs to access Google services, you may need to configure terminal proxy in certain network environments:
37
+
38
+ ```bash
39
+ # Configure proxy (adjust according to your proxy server)
40
+ export https_proxy=http://127.0.0.1:7890
41
+ export http_proxy=http://127.0.0.1:7890
42
+ export all_proxy=socks5://127.0.0.1:7890
43
+ ```
44
+
45
+ ### Install Gemini CLI
46
+
47
+ Install Gemini CLI:
48
+ ```bash
49
+ npm install -g @google/gemini-cli
50
+ ```
51
+
52
+ After installation, use the `gemini` command to run Gemini CLI. You need to start it once first for login and initial configuration.
53
+
54
+ After configuration is complete, please confirm you can successfully run the following command:
55
+
56
+ ```bash
57
+ gemini -p "Hello, Gemini"
58
+ ```
59
+
60
+ ### Start Gemini CLI Proxy
61
+
62
+ ```bash
63
+ uv run gemini-cli-proxy
64
+ ```
65
+
66
+ Gemini CLI Proxy listens on port `8765` by default. You can customize the startup port with the `--port` parameter.
67
+
68
+ After startup, test the service with curl:
69
+
70
+ ```bash
71
+ curl http://localhost:8765/v1/chat/completions \
72
+ -H "Content-Type: application/json" \
73
+ -H "Authorization: Bearer dummy-key" \
74
+ -d '{
75
+ "model": "gemini-2.5-pro",
76
+ "messages": [{"role": "user", "content": "Hello!"}]
77
+ }'
78
+ ```
79
+
80
+ ### Usage Examples
81
+
82
+ #### OpenAI Client
83
+
84
+ ```python
85
+ from openai import OpenAI
86
+
87
+ client = OpenAI(
88
+ base_url='http://localhost:8765/v1',
89
+ api_key='dummy-key' # Any string works
90
+ )
91
+
92
+ response = client.chat.completions.create(
93
+ model='gemini-2.5-pro',
94
+ messages=[
95
+ {'role': 'user', 'content': 'Hello!'}
96
+ ],
97
+ )
98
+
99
+ print(response.choices[0].message.content)
100
+ ```
101
+
102
+ #### Cherry Studio
103
+
104
+ Add Model Provider in Cherry Studio settings:
105
+ - Provider Type: OpenAI
106
+ - API Host: `http://localhost:8765`
107
+ - API Key: Any string works
108
+ - Model Name: `gemini-2.5-pro` or `gemini-2.5-flash`
109
+
110
+ ![Cherry Studio Config 1](./img/cherry-studio-1.jpg)
111
+
112
+ ![Cherry Studio Config 2](./img/cherry-studio-2.jpg)
113
+
114
+ ## ⚙️ Configuration Options
115
+
116
+ View command line parameters:
117
+
118
+ ```bash
119
+ gemini-cli-proxy --help
120
+ ```
121
+
122
+ Available options:
123
+ - `--host`: Server host address (default: 127.0.0.1)
124
+ - `--port`: Server port (default: 8765)
125
+ - `--log-level`: Log level (debug/info/warning/error/critical)
126
+ - `--rate-limit`: Max requests per minute (default: 60)
127
+ - `--max-concurrency`: Max concurrent subprocesses (default: 4)
128
+ - `--timeout`: Gemini CLI command timeout in seconds (default: 30.0)
129
+ - `--debug`: Enable debug mode
130
+
131
+ ## ❓ FAQ
132
+
133
+ ### Q: Why do requests keep timing out?
134
+
135
+ A: This is usually a network connectivity issue. Gemini needs to access Google services, which may require proxy configuration in certain regions:
136
+
137
+ ```bash
138
+ # Configure proxy (adjust according to your proxy server)
139
+ export https_proxy=http://127.0.0.1:7890
140
+ export http_proxy=http://127.0.0.1:7890
141
+ export all_proxy=socks5://127.0.0.1:7890
142
+
143
+ # Then start the service
144
+ uvx gemini-cli-proxy
145
+ ```
146
+
147
+ ## 📄 License
148
+
149
+ MIT License
150
+
151
+ ## 🤝 Contributing
152
+
153
+ Issues and Pull Requests are welcome!
@@ -0,0 +1,12 @@
1
+ gemini_cli_proxy/__init__.py,sha256=fOI3EtGmmggiMc3uGV8lGLsbzXmM3ADfnFbaHnwrKtg,257
2
+ gemini_cli_proxy/cli.py,sha256=hc83w1AUobLN_-ONITforwdyQBx_0jwGyn-M_zbh4LQ,1555
3
+ gemini_cli_proxy/config.py,sha256=5erCL5v5sb2Kz-Kje-luCw7EJUB8oq5wU3fdBnMS3H0,854
4
+ gemini_cli_proxy/gemini_client.py,sha256=nfRbgzTHvh7w4QsLzG-s5B964JIBzwt--PE3jvQT0R4,5735
5
+ gemini_cli_proxy/models.py,sha256=3FNvKk4CuLUU7MrFM0X12HeEN5paRPrRoJO0083KLfQ,2779
6
+ gemini_cli_proxy/openai_adapter.py,sha256=x_8dUcob1DOLnKbTLQBsmN_e1dO6mX0DFaXqUmzcBzY,5394
7
+ gemini_cli_proxy/server.py,sha256=6u6vEc4nqrh6eocflmx8JuHTZuWoH4uKJdfdblMnJfo,5383
8
+ gemini_cli_proxy-1.0.3.dist-info/METADATA,sha256=vi5AG8BOBCWU25lJyIUXmKybadk4GzwOcZ8mJzhSzrg,4006
9
+ gemini_cli_proxy-1.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
+ gemini_cli_proxy-1.0.3.dist-info/entry_points.txt,sha256=wDLl4ePzvEWNQMSxoE7rKV5k8_MpK6yQwpYdiaXjcWI,63
11
+ gemini_cli_proxy-1.0.3.dist-info/licenses/LICENSE,sha256=-LKYkZXXzjCmYRVwR74fDmMHP3gNlKIW_UUuEbY9hq8,1068
12
+ gemini_cli_proxy-1.0.3.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.27.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ gemini-cli-proxy = gemini_cli_proxy.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 William Liu
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.