local-openai2anthropic 0.1.1__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_openai2anthropic-0.2.2/PKG-INFO +351 -0
- local_openai2anthropic-0.2.2/README.md +316 -0
- local_openai2anthropic-0.2.2/README_zh.md +316 -0
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/pyproject.toml +1 -1
- local_openai2anthropic-0.2.2/src/local_openai2anthropic/__main__.py +7 -0
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/src/local_openai2anthropic/converter.py +29 -2
- local_openai2anthropic-0.2.2/src/local_openai2anthropic/daemon.py +382 -0
- local_openai2anthropic-0.2.2/src/local_openai2anthropic/daemon_runner.py +116 -0
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/src/local_openai2anthropic/main.py +173 -34
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/src/local_openai2anthropic/router.py +18 -0
- local_openai2anthropic-0.1.1/PKG-INFO +0 -689
- local_openai2anthropic-0.1.1/README.md +0 -654
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/.env.example +0 -0
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/.github/workflows/publish.yml +0 -0
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/.gitignore +0 -0
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/LICENSE +0 -0
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/debug_request.py +0 -0
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/examples/basic_chat.py +0 -0
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/examples/streaming.py +0 -0
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/examples/thinking_mode.py +0 -0
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/examples/tool_calling.py +0 -0
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/examples/vision.py +0 -0
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/examples/web_search.py +0 -0
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/src/local_openai2anthropic/__init__.py +0 -0
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/src/local_openai2anthropic/config.py +0 -0
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/src/local_openai2anthropic/protocol.py +0 -0
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/src/local_openai2anthropic/server_tools/__init__.py +0 -0
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/src/local_openai2anthropic/server_tools/base.py +0 -0
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/src/local_openai2anthropic/server_tools/web_search.py +0 -0
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/src/local_openai2anthropic/tavily_client.py +0 -0
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/tests/__init__.py +0 -0
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/tests/test_converter.py +0 -0
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/tests/test_integration.py +0 -0
- {local_openai2anthropic-0.1.1 → local_openai2anthropic-0.2.2}/tests/test_router.py +0 -0
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: local-openai2anthropic
|
|
3
|
+
Version: 0.2.2
|
|
4
|
+
Summary: A lightweight proxy server that converts Anthropic Messages API to OpenAI API
|
|
5
|
+
Project-URL: Homepage, https://github.com/dongfangzan/local-openai2anthropic
|
|
6
|
+
Project-URL: Repository, https://github.com/dongfangzan/local-openai2anthropic
|
|
7
|
+
Project-URL: Issues, https://github.com/dongfangzan/local-openai2anthropic/issues
|
|
8
|
+
Author-email: dongfangzan <zsybook0124@163.com>
|
|
9
|
+
Maintainer-email: dongfangzan <zsybook0124@163.com>
|
|
10
|
+
License: Apache-2.0
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: anthropic,api,claude,messages,openai,proxy
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Requires-Python: >=3.12
|
|
21
|
+
Requires-Dist: anthropic>=0.30.0
|
|
22
|
+
Requires-Dist: fastapi>=0.100.0
|
|
23
|
+
Requires-Dist: httpx>=0.25.0
|
|
24
|
+
Requires-Dist: openai>=1.30.0
|
|
25
|
+
Requires-Dist: pydantic-settings>=2.0.0
|
|
26
|
+
Requires-Dist: pydantic>=2.0.0
|
|
27
|
+
Requires-Dist: uvicorn[standard]>=0.23.0
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: black>=23.0.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: mypy>=1.0.0; extra == 'dev'
|
|
31
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
|
|
32
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
33
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
34
|
+
Description-Content-Type: text/markdown
|
|
35
|
+
|
|
36
|
+
# local-openai2anthropic
|
|
37
|
+
|
|
38
|
+
[](https://www.python.org/downloads/)
|
|
39
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
40
|
+
[](https://pypi.org/project/local-openai2anthropic/)
|
|
41
|
+
|
|
42
|
+
**English | [中文](README_zh.md)**
|
|
43
|
+
|
|
44
|
+
A lightweight proxy that lets applications built with [Claude SDK](https://github.com/anthropics/anthropic-sdk-python) talk to locally-hosted OpenAI-compatible LLMs.
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## What Problem This Solves
|
|
49
|
+
|
|
50
|
+
Many local LLM tools (vLLM, SGLang, etc.) provide an OpenAI-compatible API. But if you've built your app using Anthropic's Claude SDK, you can't use them directly.
|
|
51
|
+
|
|
52
|
+
This proxy translates Claude SDK calls to OpenAI API format in real-time, enabling:
|
|
53
|
+
|
|
54
|
+
- **Local LLM inference** with Claude-based apps
|
|
55
|
+
- **Offline development** without cloud API costs
|
|
56
|
+
- **Privacy-first AI** - data never leaves your machine
|
|
57
|
+
- **Seamless model switching** between cloud and local
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
## Supported Local Backends
|
|
62
|
+
|
|
63
|
+
Currently tested and supported:
|
|
64
|
+
|
|
65
|
+
| Backend | Description | Status |
|
|
66
|
+
|---------|-------------|--------|
|
|
67
|
+
| [vLLM](https://github.com/vllm-project/vllm) | High-throughput LLM inference | ✅ Fully supported |
|
|
68
|
+
| [SGLang](https://github.com/sgl-project/sglang) | Fast structured language model serving | ✅ Fully supported |
|
|
69
|
+
|
|
70
|
+
Other OpenAI-compatible backends may work but are not fully tested.
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## Quick Start
|
|
75
|
+
|
|
76
|
+
### 1. Install
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
pip install local-openai2anthropic
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### 2. Start Your Local LLM Server
|
|
83
|
+
|
|
84
|
+
Example with vLLM:
|
|
85
|
+
```bash
|
|
86
|
+
vllm serve meta-llama/Llama-2-7b-chat-hf
|
|
87
|
+
# vLLM starts OpenAI-compatible API at http://localhost:8000/v1
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Or with SGLang:
|
|
91
|
+
```bash
|
|
92
|
+
sglang launch --model-path meta-llama/Llama-2-7b-chat-hf --port 8000
|
|
93
|
+
# SGLang starts at http://localhost:8000/v1
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### 3. Start the Proxy
|
|
97
|
+
|
|
98
|
+
**Option A: Run in background (recommended)**
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
export OA2A_OPENAI_BASE_URL=http://localhost:8000/v1 # Your local LLM endpoint
|
|
102
|
+
export OA2A_OPENAI_API_KEY=dummy # Any value, not used by local backends
|
|
103
|
+
|
|
104
|
+
oa2a start # Start server in background
|
|
105
|
+
# Server starts at http://localhost:8080
|
|
106
|
+
|
|
107
|
+
# View logs
|
|
108
|
+
oa2a logs # Show last 50 lines of logs
|
|
109
|
+
oa2a logs -f # Follow logs in real-time (Ctrl+C to exit)
|
|
110
|
+
|
|
111
|
+
# Check status
|
|
112
|
+
oa2a status # Check if server is running
|
|
113
|
+
|
|
114
|
+
# Stop server
|
|
115
|
+
oa2a stop # Stop background server
|
|
116
|
+
|
|
117
|
+
# Restart server
|
|
118
|
+
oa2a restart # Restart with same settings
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
**Option B: Run in foreground**
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
export OA2A_OPENAI_BASE_URL=http://localhost:8000/v1
|
|
125
|
+
export OA2A_OPENAI_API_KEY=dummy
|
|
126
|
+
|
|
127
|
+
oa2a # Run server in foreground (blocking)
|
|
128
|
+
# Press Ctrl+C to stop
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### 4. Use in Your App
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
import anthropic
|
|
135
|
+
|
|
136
|
+
client = anthropic.Anthropic(
|
|
137
|
+
base_url="http://localhost:8080", # Point to proxy
|
|
138
|
+
api_key="dummy-key", # Not used
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
message = client.messages.create(
|
|
142
|
+
model="meta-llama/Llama-2-7b-chat-hf", # Your local model name
|
|
143
|
+
max_tokens=1024,
|
|
144
|
+
messages=[{"role": "user", "content": "Hello!"}],
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
print(message.content[0].text)
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## Using with Claude Code
|
|
153
|
+
|
|
154
|
+
You can configure [Claude Code](https://github.com/anthropics/claude-code) to use your local LLM through this proxy.
|
|
155
|
+
|
|
156
|
+
### Configuration Steps
|
|
157
|
+
|
|
158
|
+
1. **Create or edit Claude Code config file** at `~/.claude/CLAUDE.md`:
|
|
159
|
+
|
|
160
|
+
```markdown
|
|
161
|
+
# Claude Code Configuration
|
|
162
|
+
|
|
163
|
+
## API Settings
|
|
164
|
+
|
|
165
|
+
- Claude API Base URL: http://localhost:8080
|
|
166
|
+
- Claude API Key: dummy-key
|
|
167
|
+
|
|
168
|
+
## Model Settings
|
|
169
|
+
|
|
170
|
+
Use model: meta-llama/Llama-2-7b-chat-hf # Your local model name
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
2. **Alternatively, set environment variables** before running Claude Code:
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
export ANTHROPIC_BASE_URL=http://localhost:8080
|
|
177
|
+
export ANTHROPIC_API_KEY=dummy-key
|
|
178
|
+
|
|
179
|
+
claude
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
3. **Or use the `--api-key` and `--base-url` flags**:
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
claude --api-key dummy-key --base-url http://localhost:8080
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### Complete Workflow Example
|
|
189
|
+
|
|
190
|
+
Terminal 1 - Start your local LLM:
|
|
191
|
+
```bash
|
|
192
|
+
vllm serve meta-llama/Llama-2-7b-chat-hf
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
Terminal 2 - Start the proxy:
|
|
196
|
+
```bash
|
|
197
|
+
export OA2A_OPENAI_BASE_URL=http://localhost:8000/v1
|
|
198
|
+
export OA2A_OPENAI_API_KEY=dummy
|
|
199
|
+
export OA2A_TAVILY_API_KEY="tvly-your-tavily-api-key" # Optional: enable web search
|
|
200
|
+
|
|
201
|
+
oa2a
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
Terminal 3 - Launch Claude Code with local LLM:
|
|
205
|
+
```bash
|
|
206
|
+
export ANTHROPIC_BASE_URL=http://localhost:8080
|
|
207
|
+
export ANTHROPIC_API_KEY=dummy-key
|
|
208
|
+
|
|
209
|
+
claude
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
Now Claude Code will use your local LLM instead of the cloud API.
|
|
213
|
+
|
|
214
|
+
---
|
|
215
|
+
|
|
216
|
+
## Features
|
|
217
|
+
|
|
218
|
+
- ✅ **Streaming responses** - Real-time token streaming via SSE
|
|
219
|
+
- ✅ **Tool calling** - Local LLM function calling support
|
|
220
|
+
- ✅ **Vision models** - Multi-modal input for vision-capable models
|
|
221
|
+
- ✅ **Web Search** - Give your local LLM internet access (see below)
|
|
222
|
+
- ✅ **Thinking mode** - Supports reasoning/thinking model outputs
|
|
223
|
+
|
|
224
|
+
---
|
|
225
|
+
|
|
226
|
+
## Web Search Capability 🔍
|
|
227
|
+
|
|
228
|
+
**Bridge the gap: Give your local LLM the web search power that Claude Code users enjoy!**
|
|
229
|
+
|
|
230
|
+
When using locally-hosted models with Claude Code, you lose access to the built-in web search tool. This proxy fills that gap by providing a server-side web search implementation powered by [Tavily](https://tavily.com).
|
|
231
|
+
|
|
232
|
+
### The Problem
|
|
233
|
+
|
|
234
|
+
| Scenario | Web Search Available? |
|
|
235
|
+
|----------|----------------------|
|
|
236
|
+
| Using Claude (cloud) in Claude Code | ✅ Built-in |
|
|
237
|
+
| Using local vLLM/SGLang in Claude Code | ❌ Not available |
|
|
238
|
+
| **Using this proxy + local LLM** | ✅ **Enabled via Tavily** |
|
|
239
|
+
|
|
240
|
+
### How It Works
|
|
241
|
+
|
|
242
|
+
```
|
|
243
|
+
Claude Code → Anthropic SDK → This Proxy → Local LLM
|
|
244
|
+
↓
|
|
245
|
+
Tavily API (Web Search)
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
The proxy intercepts `web_search_20250305` tool calls and handles them directly, regardless of whether your local model supports web search natively.
|
|
249
|
+
|
|
250
|
+
### Setup Tavily Search
|
|
251
|
+
|
|
252
|
+
1. **Get a free API key** at [tavily.com](https://tavily.com) - generous free tier available
|
|
253
|
+
|
|
254
|
+
2. **Configure the proxy:**
|
|
255
|
+
```bash
|
|
256
|
+
export OA2A_OPENAI_BASE_URL=http://localhost:8000/v1
|
|
257
|
+
export OA2A_OPENAI_API_KEY=dummy
|
|
258
|
+
export OA2A_TAVILY_API_KEY="tvly-your-tavily-api-key" # Enable web search
|
|
259
|
+
|
|
260
|
+
oa2a
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
3. **Use in your app:**
|
|
264
|
+
```python
|
|
265
|
+
import anthropic
|
|
266
|
+
|
|
267
|
+
client = anthropic.Anthropic(
|
|
268
|
+
base_url="http://localhost:8080",
|
|
269
|
+
api_key="dummy-key",
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
message = client.messages.create(
|
|
273
|
+
model="meta-llama/Llama-2-7b-chat-hf",
|
|
274
|
+
max_tokens=1024,
|
|
275
|
+
tools=[
|
|
276
|
+
{
|
|
277
|
+
"name": "web_search_20250305",
|
|
278
|
+
"description": "Search the web for current information",
|
|
279
|
+
"input_schema": {
|
|
280
|
+
"type": "object",
|
|
281
|
+
"properties": {
|
|
282
|
+
"query": {"type": "string", "description": "Search query"},
|
|
283
|
+
},
|
|
284
|
+
"required": ["query"],
|
|
285
|
+
},
|
|
286
|
+
}
|
|
287
|
+
],
|
|
288
|
+
messages=[{"role": "user", "content": "What happened in AI today?"}],
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
if message.stop_reason == "tool_use":
|
|
292
|
+
tool_use = message.content[-1]
|
|
293
|
+
print(f"Searching: {tool_use.input}")
|
|
294
|
+
# The proxy automatically calls Tavily and returns results
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
### Tavily Configuration Options
|
|
298
|
+
|
|
299
|
+
| Variable | Default | Description |
|
|
300
|
+
|----------|---------|-------------|
|
|
301
|
+
| `OA2A_TAVILY_API_KEY` | - | Your Tavily API key ([get free at tavily.com](https://tavily.com)) |
|
|
302
|
+
| `OA2A_TAVILY_MAX_RESULTS` | 5 | Number of search results to return |
|
|
303
|
+
| `OA2A_TAVILY_TIMEOUT` | 30 | Search timeout in seconds |
|
|
304
|
+
| `OA2A_WEBSEARCH_MAX_USES` | 5 | Max search calls per request |
|
|
305
|
+
|
|
306
|
+
---
|
|
307
|
+
|
|
308
|
+
## Configuration
|
|
309
|
+
|
|
310
|
+
| Variable | Required | Default | Description |
|
|
311
|
+
|----------|----------|---------|-------------|
|
|
312
|
+
| `OA2A_OPENAI_BASE_URL` | ✅ | - | Your local LLM's OpenAI-compatible endpoint |
|
|
313
|
+
| `OA2A_OPENAI_API_KEY` | ✅ | - | Any value (local backends usually ignore this) |
|
|
314
|
+
| `OA2A_PORT` | ❌ | 8080 | Proxy server port |
|
|
315
|
+
| `OA2A_HOST` | ❌ | 0.0.0.0 | Proxy server host |
|
|
316
|
+
| `OA2A_TAVILY_API_KEY` | ❌ | - | Enable web search ([tavily.com](https://tavily.com)) |
|
|
317
|
+
|
|
318
|
+
---
|
|
319
|
+
|
|
320
|
+
## Architecture
|
|
321
|
+
|
|
322
|
+
```
|
|
323
|
+
Your App (Claude SDK)
|
|
324
|
+
│
|
|
325
|
+
▼
|
|
326
|
+
┌─────────────────────┐
|
|
327
|
+
│ local-openai2anthropic │ ← This proxy
|
|
328
|
+
│ (Port 8080) │
|
|
329
|
+
└─────────────────────┘
|
|
330
|
+
│
|
|
331
|
+
▼
|
|
332
|
+
Your Local LLM Server
|
|
333
|
+
(vLLM / SGLang)
|
|
334
|
+
(OpenAI-compatible API)
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
---
|
|
338
|
+
|
|
339
|
+
## Development
|
|
340
|
+
|
|
341
|
+
```bash
|
|
342
|
+
git clone https://github.com/dongfangzan/local-openai2anthropic.git
|
|
343
|
+
cd local-openai2anthropic
|
|
344
|
+
pip install -e ".[dev]"
|
|
345
|
+
|
|
346
|
+
pytest
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
## License
|
|
350
|
+
|
|
351
|
+
Apache License 2.0
|
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
# local-openai2anthropic
|
|
2
|
+
|
|
3
|
+
[](https://www.python.org/downloads/)
|
|
4
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
5
|
+
[](https://pypi.org/project/local-openai2anthropic/)
|
|
6
|
+
|
|
7
|
+
**English | [中文](README_zh.md)**
|
|
8
|
+
|
|
9
|
+
A lightweight proxy that lets applications built with [Claude SDK](https://github.com/anthropics/anthropic-sdk-python) talk to locally-hosted OpenAI-compatible LLMs.
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## What Problem This Solves
|
|
14
|
+
|
|
15
|
+
Many local LLM tools (vLLM, SGLang, etc.) provide an OpenAI-compatible API. But if you've built your app using Anthropic's Claude SDK, you can't use them directly.
|
|
16
|
+
|
|
17
|
+
This proxy translates Claude SDK calls to OpenAI API format in real-time, enabling:
|
|
18
|
+
|
|
19
|
+
- **Local LLM inference** with Claude-based apps
|
|
20
|
+
- **Offline development** without cloud API costs
|
|
21
|
+
- **Privacy-first AI** - data never leaves your machine
|
|
22
|
+
- **Seamless model switching** between cloud and local
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Supported Local Backends
|
|
27
|
+
|
|
28
|
+
Currently tested and supported:
|
|
29
|
+
|
|
30
|
+
| Backend | Description | Status |
|
|
31
|
+
|---------|-------------|--------|
|
|
32
|
+
| [vLLM](https://github.com/vllm-project/vllm) | High-throughput LLM inference | ✅ Fully supported |
|
|
33
|
+
| [SGLang](https://github.com/sgl-project/sglang) | Fast structured language model serving | ✅ Fully supported |
|
|
34
|
+
|
|
35
|
+
Other OpenAI-compatible backends may work but are not fully tested.
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## Quick Start
|
|
40
|
+
|
|
41
|
+
### 1. Install
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
pip install local-openai2anthropic
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### 2. Start Your Local LLM Server
|
|
48
|
+
|
|
49
|
+
Example with vLLM:
|
|
50
|
+
```bash
|
|
51
|
+
vllm serve meta-llama/Llama-2-7b-chat-hf
|
|
52
|
+
# vLLM starts OpenAI-compatible API at http://localhost:8000/v1
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Or with SGLang:
|
|
56
|
+
```bash
|
|
57
|
+
sglang launch --model-path meta-llama/Llama-2-7b-chat-hf --port 8000
|
|
58
|
+
# SGLang starts at http://localhost:8000/v1
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### 3. Start the Proxy
|
|
62
|
+
|
|
63
|
+
**Option A: Run in background (recommended)**
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
export OA2A_OPENAI_BASE_URL=http://localhost:8000/v1 # Your local LLM endpoint
|
|
67
|
+
export OA2A_OPENAI_API_KEY=dummy # Any value, not used by local backends
|
|
68
|
+
|
|
69
|
+
oa2a start # Start server in background
|
|
70
|
+
# Server starts at http://localhost:8080
|
|
71
|
+
|
|
72
|
+
# View logs
|
|
73
|
+
oa2a logs # Show last 50 lines of logs
|
|
74
|
+
oa2a logs -f # Follow logs in real-time (Ctrl+C to exit)
|
|
75
|
+
|
|
76
|
+
# Check status
|
|
77
|
+
oa2a status # Check if server is running
|
|
78
|
+
|
|
79
|
+
# Stop server
|
|
80
|
+
oa2a stop # Stop background server
|
|
81
|
+
|
|
82
|
+
# Restart server
|
|
83
|
+
oa2a restart # Restart with same settings
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
**Option B: Run in foreground**
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
export OA2A_OPENAI_BASE_URL=http://localhost:8000/v1
|
|
90
|
+
export OA2A_OPENAI_API_KEY=dummy
|
|
91
|
+
|
|
92
|
+
oa2a # Run server in foreground (blocking)
|
|
93
|
+
# Press Ctrl+C to stop
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### 4. Use in Your App
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
import anthropic
|
|
100
|
+
|
|
101
|
+
client = anthropic.Anthropic(
|
|
102
|
+
base_url="http://localhost:8080", # Point to proxy
|
|
103
|
+
api_key="dummy-key", # Not used
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
message = client.messages.create(
|
|
107
|
+
model="meta-llama/Llama-2-7b-chat-hf", # Your local model name
|
|
108
|
+
max_tokens=1024,
|
|
109
|
+
messages=[{"role": "user", "content": "Hello!"}],
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
print(message.content[0].text)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
## Using with Claude Code
|
|
118
|
+
|
|
119
|
+
You can configure [Claude Code](https://github.com/anthropics/claude-code) to use your local LLM through this proxy.
|
|
120
|
+
|
|
121
|
+
### Configuration Steps
|
|
122
|
+
|
|
123
|
+
1. **Create or edit Claude Code config file** at `~/.claude/CLAUDE.md`:
|
|
124
|
+
|
|
125
|
+
```markdown
|
|
126
|
+
# Claude Code Configuration
|
|
127
|
+
|
|
128
|
+
## API Settings
|
|
129
|
+
|
|
130
|
+
- Claude API Base URL: http://localhost:8080
|
|
131
|
+
- Claude API Key: dummy-key
|
|
132
|
+
|
|
133
|
+
## Model Settings
|
|
134
|
+
|
|
135
|
+
Use model: meta-llama/Llama-2-7b-chat-hf # Your local model name
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
2. **Alternatively, set environment variables** before running Claude Code:
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
export ANTHROPIC_BASE_URL=http://localhost:8080
|
|
142
|
+
export ANTHROPIC_API_KEY=dummy-key
|
|
143
|
+
|
|
144
|
+
claude
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
3. **Or use the `--api-key` and `--base-url` flags**:
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
claude --api-key dummy-key --base-url http://localhost:8080
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### Complete Workflow Example
|
|
154
|
+
|
|
155
|
+
Terminal 1 - Start your local LLM:
|
|
156
|
+
```bash
|
|
157
|
+
vllm serve meta-llama/Llama-2-7b-chat-hf
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
Terminal 2 - Start the proxy:
|
|
161
|
+
```bash
|
|
162
|
+
export OA2A_OPENAI_BASE_URL=http://localhost:8000/v1
|
|
163
|
+
export OA2A_OPENAI_API_KEY=dummy
|
|
164
|
+
export OA2A_TAVILY_API_KEY="tvly-your-tavily-api-key" # Optional: enable web search
|
|
165
|
+
|
|
166
|
+
oa2a
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
Terminal 3 - Launch Claude Code with local LLM:
|
|
170
|
+
```bash
|
|
171
|
+
export ANTHROPIC_BASE_URL=http://localhost:8080
|
|
172
|
+
export ANTHROPIC_API_KEY=dummy-key
|
|
173
|
+
|
|
174
|
+
claude
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
Now Claude Code will use your local LLM instead of the cloud API.
|
|
178
|
+
|
|
179
|
+
---
|
|
180
|
+
|
|
181
|
+
## Features
|
|
182
|
+
|
|
183
|
+
- ✅ **Streaming responses** - Real-time token streaming via SSE
|
|
184
|
+
- ✅ **Tool calling** - Local LLM function calling support
|
|
185
|
+
- ✅ **Vision models** - Multi-modal input for vision-capable models
|
|
186
|
+
- ✅ **Web Search** - Give your local LLM internet access (see below)
|
|
187
|
+
- ✅ **Thinking mode** - Supports reasoning/thinking model outputs
|
|
188
|
+
|
|
189
|
+
---
|
|
190
|
+
|
|
191
|
+
## Web Search Capability 🔍
|
|
192
|
+
|
|
193
|
+
**Bridge the gap: Give your local LLM the web search power that Claude Code users enjoy!**
|
|
194
|
+
|
|
195
|
+
When using locally-hosted models with Claude Code, you lose access to the built-in web search tool. This proxy fills that gap by providing a server-side web search implementation powered by [Tavily](https://tavily.com).
|
|
196
|
+
|
|
197
|
+
### The Problem
|
|
198
|
+
|
|
199
|
+
| Scenario | Web Search Available? |
|
|
200
|
+
|----------|----------------------|
|
|
201
|
+
| Using Claude (cloud) in Claude Code | ✅ Built-in |
|
|
202
|
+
| Using local vLLM/SGLang in Claude Code | ❌ Not available |
|
|
203
|
+
| **Using this proxy + local LLM** | ✅ **Enabled via Tavily** |
|
|
204
|
+
|
|
205
|
+
### How It Works
|
|
206
|
+
|
|
207
|
+
```
|
|
208
|
+
Claude Code → Anthropic SDK → This Proxy → Local LLM
|
|
209
|
+
↓
|
|
210
|
+
Tavily API (Web Search)
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
The proxy intercepts `web_search_20250305` tool calls and handles them directly, regardless of whether your local model supports web search natively.
|
|
214
|
+
|
|
215
|
+
### Setup Tavily Search
|
|
216
|
+
|
|
217
|
+
1. **Get a free API key** at [tavily.com](https://tavily.com) - generous free tier available
|
|
218
|
+
|
|
219
|
+
2. **Configure the proxy:**
|
|
220
|
+
```bash
|
|
221
|
+
export OA2A_OPENAI_BASE_URL=http://localhost:8000/v1
|
|
222
|
+
export OA2A_OPENAI_API_KEY=dummy
|
|
223
|
+
export OA2A_TAVILY_API_KEY="tvly-your-tavily-api-key" # Enable web search
|
|
224
|
+
|
|
225
|
+
oa2a
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
3. **Use in your app:**
|
|
229
|
+
```python
|
|
230
|
+
import anthropic
|
|
231
|
+
|
|
232
|
+
client = anthropic.Anthropic(
|
|
233
|
+
base_url="http://localhost:8080",
|
|
234
|
+
api_key="dummy-key",
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
message = client.messages.create(
|
|
238
|
+
model="meta-llama/Llama-2-7b-chat-hf",
|
|
239
|
+
max_tokens=1024,
|
|
240
|
+
tools=[
|
|
241
|
+
{
|
|
242
|
+
"name": "web_search_20250305",
|
|
243
|
+
"description": "Search the web for current information",
|
|
244
|
+
"input_schema": {
|
|
245
|
+
"type": "object",
|
|
246
|
+
"properties": {
|
|
247
|
+
"query": {"type": "string", "description": "Search query"},
|
|
248
|
+
},
|
|
249
|
+
"required": ["query"],
|
|
250
|
+
},
|
|
251
|
+
}
|
|
252
|
+
],
|
|
253
|
+
messages=[{"role": "user", "content": "What happened in AI today?"}],
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
if message.stop_reason == "tool_use":
|
|
257
|
+
tool_use = message.content[-1]
|
|
258
|
+
print(f"Searching: {tool_use.input}")
|
|
259
|
+
# The proxy automatically calls Tavily and returns results
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
### Tavily Configuration Options
|
|
263
|
+
|
|
264
|
+
| Variable | Default | Description |
|
|
265
|
+
|----------|---------|-------------|
|
|
266
|
+
| `OA2A_TAVILY_API_KEY` | - | Your Tavily API key ([get free at tavily.com](https://tavily.com)) |
|
|
267
|
+
| `OA2A_TAVILY_MAX_RESULTS` | 5 | Number of search results to return |
|
|
268
|
+
| `OA2A_TAVILY_TIMEOUT` | 30 | Search timeout in seconds |
|
|
269
|
+
| `OA2A_WEBSEARCH_MAX_USES` | 5 | Max search calls per request |
|
|
270
|
+
|
|
271
|
+
---
|
|
272
|
+
|
|
273
|
+
## Configuration
|
|
274
|
+
|
|
275
|
+
| Variable | Required | Default | Description |
|
|
276
|
+
|----------|----------|---------|-------------|
|
|
277
|
+
| `OA2A_OPENAI_BASE_URL` | ✅ | - | Your local LLM's OpenAI-compatible endpoint |
|
|
278
|
+
| `OA2A_OPENAI_API_KEY` | ✅ | - | Any value (local backends usually ignore this) |
|
|
279
|
+
| `OA2A_PORT` | ❌ | 8080 | Proxy server port |
|
|
280
|
+
| `OA2A_HOST` | ❌ | 0.0.0.0 | Proxy server host |
|
|
281
|
+
| `OA2A_TAVILY_API_KEY` | ❌ | - | Enable web search ([tavily.com](https://tavily.com)) |
|
|
282
|
+
|
|
283
|
+
---
|
|
284
|
+
|
|
285
|
+
## Architecture
|
|
286
|
+
|
|
287
|
+
```
|
|
288
|
+
Your App (Claude SDK)
|
|
289
|
+
│
|
|
290
|
+
▼
|
|
291
|
+
┌─────────────────────┐
|
|
292
|
+
│ local-openai2anthropic │ ← This proxy
|
|
293
|
+
│ (Port 8080) │
|
|
294
|
+
└─────────────────────┘
|
|
295
|
+
│
|
|
296
|
+
▼
|
|
297
|
+
Your Local LLM Server
|
|
298
|
+
(vLLM / SGLang)
|
|
299
|
+
(OpenAI-compatible API)
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
---
|
|
303
|
+
|
|
304
|
+
## Development
|
|
305
|
+
|
|
306
|
+
```bash
|
|
307
|
+
git clone https://github.com/dongfangzan/local-openai2anthropic.git
|
|
308
|
+
cd local-openai2anthropic
|
|
309
|
+
pip install -e ".[dev]"
|
|
310
|
+
|
|
311
|
+
pytest
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
## License
|
|
315
|
+
|
|
316
|
+
Apache License 2.0
|