local-openai2anthropic 0.1.0__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_openai2anthropic/__init__.py +1 -1
- local_openai2anthropic/__main__.py +7 -0
- local_openai2anthropic/config.py +2 -2
- local_openai2anthropic/converter.py +28 -193
- local_openai2anthropic/daemon.py +382 -0
- local_openai2anthropic/daemon_runner.py +116 -0
- local_openai2anthropic/main.py +177 -25
- local_openai2anthropic/openai_types.py +149 -0
- local_openai2anthropic/router.py +75 -16
- local_openai2anthropic-0.2.3.dist-info/METADATA +351 -0
- local_openai2anthropic-0.2.3.dist-info/RECORD +19 -0
- local_openai2anthropic-0.1.0.dist-info/METADATA +0 -689
- local_openai2anthropic-0.1.0.dist-info/RECORD +0 -15
- {local_openai2anthropic-0.1.0.dist-info → local_openai2anthropic-0.2.3.dist-info}/WHEEL +0 -0
- {local_openai2anthropic-0.1.0.dist-info → local_openai2anthropic-0.2.3.dist-info}/entry_points.txt +0 -0
- {local_openai2anthropic-0.1.0.dist-info → local_openai2anthropic-0.2.3.dist-info}/licenses/LICENSE +0 -0
local_openai2anthropic/router.py
CHANGED
|
@@ -82,6 +82,7 @@ async def _stream_response(
|
|
|
82
82
|
first_chunk = True
|
|
83
83
|
content_block_started = False
|
|
84
84
|
content_block_index = 0
|
|
85
|
+
current_block_type = None # 'thinking', 'text', or 'tool_use'
|
|
85
86
|
finish_reason = None
|
|
86
87
|
input_tokens = 0
|
|
87
88
|
output_tokens = 0
|
|
@@ -97,13 +98,14 @@ async def _stream_response(
|
|
|
97
98
|
|
|
98
99
|
try:
|
|
99
100
|
chunk = json.loads(data)
|
|
101
|
+
logger.debug(f"[OpenAI Stream Chunk] {json.dumps(chunk, ensure_ascii=False)}")
|
|
100
102
|
except json.JSONDecodeError:
|
|
101
103
|
continue
|
|
102
104
|
|
|
103
105
|
# First chunk: message_start
|
|
104
106
|
if first_chunk:
|
|
105
107
|
message_id = chunk.get("id", "")
|
|
106
|
-
usage = chunk.get("usage"
|
|
108
|
+
usage = chunk.get("usage") or {}
|
|
107
109
|
input_tokens = usage.get("prompt_tokens", 0)
|
|
108
110
|
|
|
109
111
|
start_event = {
|
|
@@ -124,37 +126,70 @@ async def _stream_response(
|
|
|
124
126
|
},
|
|
125
127
|
},
|
|
126
128
|
}
|
|
129
|
+
logger.debug(f"[Anthropic Stream Event] message_start: {json.dumps(start_event, ensure_ascii=False)}")
|
|
127
130
|
yield f"event: message_start\ndata: {json.dumps(start_event)}\n\n"
|
|
128
131
|
first_chunk = False
|
|
129
132
|
continue
|
|
130
133
|
|
|
131
134
|
# Handle usage-only chunks
|
|
132
135
|
if not chunk.get("choices"):
|
|
133
|
-
usage = chunk.get("usage"
|
|
136
|
+
usage = chunk.get("usage") or {}
|
|
134
137
|
if usage:
|
|
135
138
|
if content_block_started:
|
|
136
139
|
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': content_block_index})}\n\n"
|
|
137
140
|
content_block_started = False
|
|
138
141
|
|
|
139
142
|
stop_reason_map = {"stop": "end_turn", "length": "max_tokens", "tool_calls": "tool_use"}
|
|
140
|
-
|
|
143
|
+
delta_event = {'type': 'message_delta', 'delta': {'stop_reason': stop_reason_map.get(finish_reason or 'stop', 'end_turn')}, 'usage': {'input_tokens': usage.get('prompt_tokens', 0), 'output_tokens': usage.get('completion_tokens', 0), 'cache_creation_input_tokens': None, 'cache_read_input_tokens': None}}
|
|
144
|
+
logger.debug(f"[Anthropic Stream Event] message_delta: {json.dumps(delta_event, ensure_ascii=False)}")
|
|
145
|
+
yield f"event: message_delta\ndata: {json.dumps(delta_event)}\n\n"
|
|
141
146
|
continue
|
|
142
147
|
|
|
143
148
|
choice = chunk["choices"][0]
|
|
144
149
|
delta = choice.get("delta", {})
|
|
145
150
|
|
|
146
|
-
# Track finish reason
|
|
151
|
+
# Track finish reason (but don't skip - content may also be present)
|
|
147
152
|
if choice.get("finish_reason"):
|
|
148
153
|
finish_reason = choice["finish_reason"]
|
|
154
|
+
|
|
155
|
+
# Handle reasoning content (thinking)
|
|
156
|
+
if delta.get("reasoning_content"):
|
|
157
|
+
reasoning = delta["reasoning_content"]
|
|
158
|
+
# Start thinking content block if not already started
|
|
159
|
+
if not content_block_started or current_block_type != 'thinking':
|
|
160
|
+
# Close previous block if exists
|
|
161
|
+
if content_block_started:
|
|
162
|
+
stop_block = {'type': 'content_block_stop', 'index': content_block_index}
|
|
163
|
+
logger.debug(f"[Anthropic Stream Event] content_block_stop ({current_block_type}): {json.dumps(stop_block, ensure_ascii=False)}")
|
|
164
|
+
yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
|
|
165
|
+
content_block_index += 1
|
|
166
|
+
start_block = {'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'thinking', 'thinking': ''}}
|
|
167
|
+
logger.debug(f"[Anthropic Stream Event] content_block_start (thinking): {json.dumps(start_block, ensure_ascii=False)}")
|
|
168
|
+
yield f"event: content_block_start\ndata: {json.dumps(start_block)}\n\n"
|
|
169
|
+
content_block_started = True
|
|
170
|
+
current_block_type = 'thinking'
|
|
171
|
+
|
|
172
|
+
delta_block = {'type': 'content_block_delta', 'index': content_block_index, 'delta': {'type': 'thinking_delta', 'thinking': reasoning}}
|
|
173
|
+
yield f"event: content_block_delta\ndata: {json.dumps(delta_block)}\n\n"
|
|
149
174
|
continue
|
|
150
175
|
|
|
151
176
|
# Handle content
|
|
152
177
|
if delta.get("content"):
|
|
153
|
-
if not content_block_started:
|
|
154
|
-
|
|
178
|
+
if not content_block_started or current_block_type != 'text':
|
|
179
|
+
# Close previous block if exists
|
|
180
|
+
if content_block_started:
|
|
181
|
+
stop_block = {'type': 'content_block_stop', 'index': content_block_index}
|
|
182
|
+
logger.debug(f"[Anthropic Stream Event] content_block_stop ({current_block_type}): {json.dumps(stop_block, ensure_ascii=False)}")
|
|
183
|
+
yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
|
|
184
|
+
content_block_index += 1
|
|
185
|
+
start_block = {'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'text', 'text': ''}}
|
|
186
|
+
logger.debug(f"[Anthropic Stream Event] content_block_start (text): {json.dumps(start_block, ensure_ascii=False)}")
|
|
187
|
+
yield f"event: content_block_start\ndata: {json.dumps(start_block)}\n\n"
|
|
155
188
|
content_block_started = True
|
|
189
|
+
current_block_type = 'text'
|
|
156
190
|
|
|
157
|
-
|
|
191
|
+
delta_block = {'type': 'content_block_delta', 'index': content_block_index, 'delta': {'type': 'text_delta', 'text': delta['content']}}
|
|
192
|
+
yield f"event: content_block_delta\ndata: {json.dumps(delta_block)}\n\n"
|
|
158
193
|
|
|
159
194
|
# Handle tool calls
|
|
160
195
|
if delta.get("tool_calls"):
|
|
@@ -166,27 +201,34 @@ async def _stream_response(
|
|
|
166
201
|
content_block_started = False
|
|
167
202
|
content_block_index += 1
|
|
168
203
|
|
|
169
|
-
|
|
204
|
+
func = tool_call.get('function') or {}
|
|
205
|
+
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': content_block_index, 'content_block': {'type': 'tool_use', 'id': tool_call['id'], 'name': func.get('name', ''), 'input': {}}})}\n\n"
|
|
170
206
|
content_block_started = True
|
|
207
|
+
current_block_type = 'tool_use'
|
|
171
208
|
|
|
172
|
-
elif tool_call.get(
|
|
173
|
-
args = tool_call
|
|
209
|
+
elif (tool_call.get('function') or {}).get("arguments"):
|
|
210
|
+
args = (tool_call.get('function') or {}).get("arguments", "")
|
|
174
211
|
yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': content_block_index, 'delta': {'type': 'input_json_delta', 'partial_json': args}})}\n\n"
|
|
175
212
|
|
|
176
213
|
# Close final content block
|
|
177
214
|
if content_block_started:
|
|
178
|
-
|
|
215
|
+
stop_block = {'type': 'content_block_stop', 'index': content_block_index}
|
|
216
|
+
logger.debug(f"[Anthropic Stream Event] content_block_stop (final): {json.dumps(stop_block, ensure_ascii=False)}")
|
|
217
|
+
yield f"event: content_block_stop\ndata: {json.dumps(stop_block)}\n\n"
|
|
179
218
|
|
|
180
219
|
# Message stop
|
|
181
|
-
|
|
182
|
-
|
|
220
|
+
stop_event = {'type': 'message_stop'}
|
|
221
|
+
logger.debug(f"[Anthropic Stream Event] message_stop: {json.dumps(stop_event, ensure_ascii=False)}")
|
|
222
|
+
yield f"event: message_stop\ndata: {json.dumps(stop_event)}\n\n"
|
|
183
223
|
|
|
184
224
|
except Exception as e:
|
|
225
|
+
import traceback
|
|
226
|
+
error_msg = f"{str(e)}\n{traceback.format_exc()}"
|
|
227
|
+
logger.error(f"Stream error: {error_msg}")
|
|
185
228
|
error_event = AnthropicErrorResponse(
|
|
186
229
|
error=AnthropicError(type="internal_error", message=str(e))
|
|
187
230
|
)
|
|
188
231
|
yield f"event: error\ndata: {error_event.model_dump_json()}\n\n"
|
|
189
|
-
yield "data: [DONE]\n\n"
|
|
190
232
|
|
|
191
233
|
|
|
192
234
|
async def _convert_result_to_stream(
|
|
@@ -255,6 +297,14 @@ async def _convert_result_to_stream(
|
|
|
255
297
|
|
|
256
298
|
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': tool_result_block})}\n\n"
|
|
257
299
|
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
|
|
300
|
+
|
|
301
|
+
elif block_type == "thinking":
|
|
302
|
+
# Handle thinking blocks (BetaThinkingBlock)
|
|
303
|
+
yield f"event: content_block_start\ndata: {json.dumps({'type': 'content_block_start', 'index': i, 'content_block': {'type': 'thinking', 'thinking': ''}})}\n\n"
|
|
304
|
+
thinking_text = block.get("thinking", "")
|
|
305
|
+
if thinking_text:
|
|
306
|
+
yield f"event: content_block_delta\ndata: {json.dumps({'type': 'content_block_delta', 'index': i, 'delta': {'type': 'thinking_delta', 'thinking': thinking_text}})}\n\n"
|
|
307
|
+
yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': i})}\n\n"
|
|
258
308
|
|
|
259
309
|
# 3. message_delta with final usage
|
|
260
310
|
delta_event = {
|
|
@@ -272,7 +322,6 @@ async def _convert_result_to_stream(
|
|
|
272
322
|
|
|
273
323
|
# 4. message_stop
|
|
274
324
|
yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n"
|
|
275
|
-
yield "data: [DONE]\n\n"
|
|
276
325
|
|
|
277
326
|
|
|
278
327
|
class ServerToolHandler:
|
|
@@ -562,6 +611,7 @@ async def create_message(
|
|
|
562
611
|
try:
|
|
563
612
|
body_bytes = await request.body()
|
|
564
613
|
body_json = json.loads(body_bytes.decode("utf-8"))
|
|
614
|
+
logger.debug(f"[Anthropic Request] {json.dumps(body_json, ensure_ascii=False, indent=2)}")
|
|
565
615
|
anthropic_params = body_json
|
|
566
616
|
except json.JSONDecodeError as e:
|
|
567
617
|
logger.error(f"Invalid JSON in request body: {e}")
|
|
@@ -618,6 +668,10 @@ async def create_message(
|
|
|
618
668
|
enabled_server_tools=enabled_server_tools if has_server_tools else None,
|
|
619
669
|
)
|
|
620
670
|
openai_params: dict[str, Any] = dict(openai_params_obj) # type: ignore
|
|
671
|
+
|
|
672
|
+
# Log converted OpenAI request (remove internal fields)
|
|
673
|
+
log_params = {k: v for k, v in openai_params.items() if not k.startswith('_')}
|
|
674
|
+
logger.debug(f"[OpenAI Request] {json.dumps(log_params, ensure_ascii=False, indent=2)}")
|
|
621
675
|
|
|
622
676
|
stream = openai_params.get("stream", False)
|
|
623
677
|
model = openai_params.get("model", "")
|
|
@@ -674,11 +728,16 @@ async def create_message(
|
|
|
674
728
|
)
|
|
675
729
|
|
|
676
730
|
openai_completion = response.json()
|
|
731
|
+
logger.debug(f"[OpenAI Response] {json.dumps(openai_completion, ensure_ascii=False, indent=2)}")
|
|
732
|
+
|
|
677
733
|
from openai.types.chat import ChatCompletion
|
|
678
734
|
completion = ChatCompletion.model_validate(openai_completion)
|
|
679
735
|
anthropic_message = convert_openai_to_anthropic(completion, model)
|
|
736
|
+
|
|
737
|
+
anthropic_response = anthropic_message.model_dump()
|
|
738
|
+
logger.debug(f"[Anthropic Response] {json.dumps(anthropic_response, ensure_ascii=False, indent=2)}")
|
|
680
739
|
|
|
681
|
-
return JSONResponse(content=
|
|
740
|
+
return JSONResponse(content=anthropic_response)
|
|
682
741
|
|
|
683
742
|
except httpx.TimeoutException:
|
|
684
743
|
error_response = AnthropicErrorResponse(
|
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: local-openai2anthropic
|
|
3
|
+
Version: 0.2.3
|
|
4
|
+
Summary: A lightweight proxy server that converts Anthropic Messages API to OpenAI API
|
|
5
|
+
Project-URL: Homepage, https://github.com/dongfangzan/local-openai2anthropic
|
|
6
|
+
Project-URL: Repository, https://github.com/dongfangzan/local-openai2anthropic
|
|
7
|
+
Project-URL: Issues, https://github.com/dongfangzan/local-openai2anthropic/issues
|
|
8
|
+
Author-email: dongfangzan <zsybook0124@163.com>
|
|
9
|
+
Maintainer-email: dongfangzan <zsybook0124@163.com>
|
|
10
|
+
License: Apache-2.0
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: anthropic,api,claude,messages,openai,proxy
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Requires-Python: >=3.12
|
|
21
|
+
Requires-Dist: anthropic>=0.30.0
|
|
22
|
+
Requires-Dist: fastapi>=0.100.0
|
|
23
|
+
Requires-Dist: httpx>=0.25.0
|
|
24
|
+
Requires-Dist: openai>=1.30.0
|
|
25
|
+
Requires-Dist: pydantic-settings>=2.0.0
|
|
26
|
+
Requires-Dist: pydantic>=2.0.0
|
|
27
|
+
Requires-Dist: uvicorn[standard]>=0.23.0
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: black>=23.0.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: mypy>=1.0.0; extra == 'dev'
|
|
31
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == 'dev'
|
|
32
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
33
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
34
|
+
Description-Content-Type: text/markdown
|
|
35
|
+
|
|
36
|
+
# local-openai2anthropic
|
|
37
|
+
|
|
38
|
+
[](https://www.python.org/downloads/)
|
|
39
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
40
|
+
[](https://pypi.org/project/local-openai2anthropic/)
|
|
41
|
+
|
|
42
|
+
**English | [中文](README_zh.md)**
|
|
43
|
+
|
|
44
|
+
A lightweight proxy that lets applications built with [Claude SDK](https://github.com/anthropics/anthropic-sdk-python) talk to locally-hosted OpenAI-compatible LLMs.
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## What Problem This Solves
|
|
49
|
+
|
|
50
|
+
Many local LLM tools (vLLM, SGLang, etc.) provide an OpenAI-compatible API. But if you've built your app using Anthropic's Claude SDK, you can't use them directly.
|
|
51
|
+
|
|
52
|
+
This proxy translates Claude SDK calls to OpenAI API format in real-time, enabling:
|
|
53
|
+
|
|
54
|
+
- **Local LLM inference** with Claude-based apps
|
|
55
|
+
- **Offline development** without cloud API costs
|
|
56
|
+
- **Privacy-first AI** - data never leaves your machine
|
|
57
|
+
- **Seamless model switching** between cloud and local
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
## Supported Local Backends
|
|
62
|
+
|
|
63
|
+
Currently tested and supported:
|
|
64
|
+
|
|
65
|
+
| Backend | Description | Status |
|
|
66
|
+
|---------|-------------|--------|
|
|
67
|
+
| [vLLM](https://github.com/vllm-project/vllm) | High-throughput LLM inference | ✅ Fully supported |
|
|
68
|
+
| [SGLang](https://github.com/sgl-project/sglang) | Fast structured language model serving | ✅ Fully supported |
|
|
69
|
+
|
|
70
|
+
Other OpenAI-compatible backends may work but are not fully tested.
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## Quick Start
|
|
75
|
+
|
|
76
|
+
### 1. Install
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
pip install local-openai2anthropic
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### 2. Start Your Local LLM Server
|
|
83
|
+
|
|
84
|
+
Example with vLLM:
|
|
85
|
+
```bash
|
|
86
|
+
vllm serve meta-llama/Llama-2-7b-chat-hf
|
|
87
|
+
# vLLM starts OpenAI-compatible API at http://localhost:8000/v1
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Or with SGLang:
|
|
91
|
+
```bash
|
|
92
|
+
sglang launch --model-path meta-llama/Llama-2-7b-chat-hf --port 8000
|
|
93
|
+
# SGLang starts at http://localhost:8000/v1
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### 3. Start the Proxy
|
|
97
|
+
|
|
98
|
+
**Option A: Run in background (recommended)**
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
export OA2A_OPENAI_BASE_URL=http://localhost:8000/v1 # Your local LLM endpoint
|
|
102
|
+
export OA2A_OPENAI_API_KEY=dummy # Any value, not used by local backends
|
|
103
|
+
|
|
104
|
+
oa2a start # Start server in background
|
|
105
|
+
# Server starts at http://localhost:8080
|
|
106
|
+
|
|
107
|
+
# View logs
|
|
108
|
+
oa2a logs # Show last 50 lines of logs
|
|
109
|
+
oa2a logs -f # Follow logs in real-time (Ctrl+C to exit)
|
|
110
|
+
|
|
111
|
+
# Check status
|
|
112
|
+
oa2a status # Check if server is running
|
|
113
|
+
|
|
114
|
+
# Stop server
|
|
115
|
+
oa2a stop # Stop background server
|
|
116
|
+
|
|
117
|
+
# Restart server
|
|
118
|
+
oa2a restart # Restart with same settings
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
**Option B: Run in foreground**
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
export OA2A_OPENAI_BASE_URL=http://localhost:8000/v1
|
|
125
|
+
export OA2A_OPENAI_API_KEY=dummy
|
|
126
|
+
|
|
127
|
+
oa2a # Run server in foreground (blocking)
|
|
128
|
+
# Press Ctrl+C to stop
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### 4. Use in Your App
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
import anthropic
|
|
135
|
+
|
|
136
|
+
client = anthropic.Anthropic(
|
|
137
|
+
base_url="http://localhost:8080", # Point to proxy
|
|
138
|
+
api_key="dummy-key", # Not used
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
message = client.messages.create(
|
|
142
|
+
model="meta-llama/Llama-2-7b-chat-hf", # Your local model name
|
|
143
|
+
max_tokens=1024,
|
|
144
|
+
messages=[{"role": "user", "content": "Hello!"}],
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
print(message.content[0].text)
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## Using with Claude Code
|
|
153
|
+
|
|
154
|
+
You can configure [Claude Code](https://github.com/anthropics/claude-code) to use your local LLM through this proxy.
|
|
155
|
+
|
|
156
|
+
### Configuration Steps
|
|
157
|
+
|
|
158
|
+
1. **Create or edit Claude Code config file** at `~/.claude/CLAUDE.md`:
|
|
159
|
+
|
|
160
|
+
```markdown
|
|
161
|
+
# Claude Code Configuration
|
|
162
|
+
|
|
163
|
+
## API Settings
|
|
164
|
+
|
|
165
|
+
- Claude API Base URL: http://localhost:8080
|
|
166
|
+
- Claude API Key: dummy-key
|
|
167
|
+
|
|
168
|
+
## Model Settings
|
|
169
|
+
|
|
170
|
+
Use model: meta-llama/Llama-2-7b-chat-hf # Your local model name
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
2. **Alternatively, set environment variables** before running Claude Code:
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
export ANTHROPIC_BASE_URL=http://localhost:8080
|
|
177
|
+
export ANTHROPIC_API_KEY=dummy-key
|
|
178
|
+
|
|
179
|
+
claude
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
3. **Or use the `--api-key` and `--base-url` flags**:
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
claude --api-key dummy-key --base-url http://localhost:8080
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### Complete Workflow Example
|
|
189
|
+
|
|
190
|
+
Terminal 1 - Start your local LLM:
|
|
191
|
+
```bash
|
|
192
|
+
vllm serve meta-llama/Llama-2-7b-chat-hf
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
Terminal 2 - Start the proxy:
|
|
196
|
+
```bash
|
|
197
|
+
export OA2A_OPENAI_BASE_URL=http://localhost:8000/v1
|
|
198
|
+
export OA2A_OPENAI_API_KEY=dummy
|
|
199
|
+
export OA2A_TAVILY_API_KEY="tvly-your-tavily-api-key" # Optional: enable web search
|
|
200
|
+
|
|
201
|
+
oa2a
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
Terminal 3 - Launch Claude Code with local LLM:
|
|
205
|
+
```bash
|
|
206
|
+
export ANTHROPIC_BASE_URL=http://localhost:8080
|
|
207
|
+
export ANTHROPIC_API_KEY=dummy-key
|
|
208
|
+
|
|
209
|
+
claude
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
Now Claude Code will use your local LLM instead of the cloud API.
|
|
213
|
+
|
|
214
|
+
---
|
|
215
|
+
|
|
216
|
+
## Features
|
|
217
|
+
|
|
218
|
+
- ✅ **Streaming responses** - Real-time token streaming via SSE
|
|
219
|
+
- ✅ **Tool calling** - Local LLM function calling support
|
|
220
|
+
- ✅ **Vision models** - Multi-modal input for vision-capable models
|
|
221
|
+
- ✅ **Web Search** - Give your local LLM internet access (see below)
|
|
222
|
+
- ✅ **Thinking mode** - Supports reasoning/thinking model outputs
|
|
223
|
+
|
|
224
|
+
---
|
|
225
|
+
|
|
226
|
+
## Web Search Capability 🔍
|
|
227
|
+
|
|
228
|
+
**Bridge the gap: Give your local LLM the web search power that Claude Code users enjoy!**
|
|
229
|
+
|
|
230
|
+
When using locally-hosted models with Claude Code, you lose access to the built-in web search tool. This proxy fills that gap by providing a server-side web search implementation powered by [Tavily](https://tavily.com).
|
|
231
|
+
|
|
232
|
+
### The Problem
|
|
233
|
+
|
|
234
|
+
| Scenario | Web Search Available? |
|
|
235
|
+
|----------|----------------------|
|
|
236
|
+
| Using Claude (cloud) in Claude Code | ✅ Built-in |
|
|
237
|
+
| Using local vLLM/SGLang in Claude Code | ❌ Not available |
|
|
238
|
+
| **Using this proxy + local LLM** | ✅ **Enabled via Tavily** |
|
|
239
|
+
|
|
240
|
+
### How It Works
|
|
241
|
+
|
|
242
|
+
```
|
|
243
|
+
Claude Code → Anthropic SDK → This Proxy → Local LLM
|
|
244
|
+
↓
|
|
245
|
+
Tavily API (Web Search)
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
The proxy intercepts `web_search_20250305` tool calls and handles them directly, regardless of whether your local model supports web search natively.
|
|
249
|
+
|
|
250
|
+
### Setup Tavily Search
|
|
251
|
+
|
|
252
|
+
1. **Get a free API key** at [tavily.com](https://tavily.com) - generous free tier available
|
|
253
|
+
|
|
254
|
+
2. **Configure the proxy:**
|
|
255
|
+
```bash
|
|
256
|
+
export OA2A_OPENAI_BASE_URL=http://localhost:8000/v1
|
|
257
|
+
export OA2A_OPENAI_API_KEY=dummy
|
|
258
|
+
export OA2A_TAVILY_API_KEY="tvly-your-tavily-api-key" # Enable web search
|
|
259
|
+
|
|
260
|
+
oa2a
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
3. **Use in your app:**
|
|
264
|
+
```python
|
|
265
|
+
import anthropic
|
|
266
|
+
|
|
267
|
+
client = anthropic.Anthropic(
|
|
268
|
+
base_url="http://localhost:8080",
|
|
269
|
+
api_key="dummy-key",
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
message = client.messages.create(
|
|
273
|
+
model="meta-llama/Llama-2-7b-chat-hf",
|
|
274
|
+
max_tokens=1024,
|
|
275
|
+
tools=[
|
|
276
|
+
{
|
|
277
|
+
"name": "web_search_20250305",
|
|
278
|
+
"description": "Search the web for current information",
|
|
279
|
+
"input_schema": {
|
|
280
|
+
"type": "object",
|
|
281
|
+
"properties": {
|
|
282
|
+
"query": {"type": "string", "description": "Search query"},
|
|
283
|
+
},
|
|
284
|
+
"required": ["query"],
|
|
285
|
+
},
|
|
286
|
+
}
|
|
287
|
+
],
|
|
288
|
+
messages=[{"role": "user", "content": "What happened in AI today?"}],
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
if message.stop_reason == "tool_use":
|
|
292
|
+
tool_use = message.content[-1]
|
|
293
|
+
print(f"Searching: {tool_use.input}")
|
|
294
|
+
# The proxy automatically calls Tavily and returns results
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
### Tavily Configuration Options
|
|
298
|
+
|
|
299
|
+
| Variable | Default | Description |
|
|
300
|
+
|----------|---------|-------------|
|
|
301
|
+
| `OA2A_TAVILY_API_KEY` | - | Your Tavily API key ([get free at tavily.com](https://tavily.com)) |
|
|
302
|
+
| `OA2A_TAVILY_MAX_RESULTS` | 5 | Number of search results to return |
|
|
303
|
+
| `OA2A_TAVILY_TIMEOUT` | 30 | Search timeout in seconds |
|
|
304
|
+
| `OA2A_WEBSEARCH_MAX_USES` | 5 | Max search calls per request |
|
|
305
|
+
|
|
306
|
+
---
|
|
307
|
+
|
|
308
|
+
## Configuration
|
|
309
|
+
|
|
310
|
+
| Variable | Required | Default | Description |
|
|
311
|
+
|----------|----------|---------|-------------|
|
|
312
|
+
| `OA2A_OPENAI_BASE_URL` | ✅ | - | Your local LLM's OpenAI-compatible endpoint |
|
|
313
|
+
| `OA2A_OPENAI_API_KEY` | ✅ | - | Any value (local backends usually ignore this) |
|
|
314
|
+
| `OA2A_PORT` | ❌ | 8080 | Proxy server port |
|
|
315
|
+
| `OA2A_HOST` | ❌ | 0.0.0.0 | Proxy server host |
|
|
316
|
+
| `OA2A_TAVILY_API_KEY` | ❌ | - | Enable web search ([tavily.com](https://tavily.com)) |
|
|
317
|
+
|
|
318
|
+
---
|
|
319
|
+
|
|
320
|
+
## Architecture
|
|
321
|
+
|
|
322
|
+
```
|
|
323
|
+
Your App (Claude SDK)
|
|
324
|
+
│
|
|
325
|
+
▼
|
|
326
|
+
┌─────────────────────┐
|
|
327
|
+
│ local-openai2anthropic │ ← This proxy
|
|
328
|
+
│ (Port 8080) │
|
|
329
|
+
└─────────────────────┘
|
|
330
|
+
│
|
|
331
|
+
▼
|
|
332
|
+
Your Local LLM Server
|
|
333
|
+
(vLLM / SGLang)
|
|
334
|
+
(OpenAI-compatible API)
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
---
|
|
338
|
+
|
|
339
|
+
## Development
|
|
340
|
+
|
|
341
|
+
```bash
|
|
342
|
+
git clone https://github.com/dongfangzan/local-openai2anthropic.git
|
|
343
|
+
cd local-openai2anthropic
|
|
344
|
+
pip install -e ".[dev]"
|
|
345
|
+
|
|
346
|
+
pytest
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
## License
|
|
350
|
+
|
|
351
|
+
Apache License 2.0
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
local_openai2anthropic/__init__.py,sha256=bj8tRC4_GyO5x4A5NqRdpxWWrdhAi7pC8xN9-ui0bQo,1059
|
|
2
|
+
local_openai2anthropic/__main__.py,sha256=K21u5u7FN8-DbO67TT_XDF0neGqJeFrVNkteRauCRQk,179
|
|
3
|
+
local_openai2anthropic/config.py,sha256=bnM7p5htd6rHgLn7Z0Ukmm2jVImLuVjIB5Cnfpf2ClY,1918
|
|
4
|
+
local_openai2anthropic/converter.py,sha256=qp0LPJBTP0uAb_5l9VINZ03RAjmumxdquP6JqWXiZkQ,15779
|
|
5
|
+
local_openai2anthropic/daemon.py,sha256=pZnRojGFcuIpR8yLDNjV-b0LJRBVhgRAa-dKeRRse44,10017
|
|
6
|
+
local_openai2anthropic/daemon_runner.py,sha256=rguOH0PgpbjqNsKYei0uCQX8JQOQ1wmtQH1CtW95Dbw,3274
|
|
7
|
+
local_openai2anthropic/main.py,sha256=5tdgPel8RSCn1iK0d7hYAmcTM9vYHlepgQujaEXA2ic,9866
|
|
8
|
+
local_openai2anthropic/openai_types.py,sha256=jFdCvLwtXYoo5gGRqOhbHQcVaxcsxNnCP_yFPIv7rG4,3823
|
|
9
|
+
local_openai2anthropic/protocol.py,sha256=vUEgxtRPFll6jEtLc4DyxTLCBjrWIEScZXhEqe4uibk,5185
|
|
10
|
+
local_openai2anthropic/router.py,sha256=5c9APWIIkM2pi4C6AZ0OWP_yrE6wn5YQmJo1OOHcuVo,36101
|
|
11
|
+
local_openai2anthropic/tavily_client.py,sha256=QsBhnyF8BFWPAxB4XtWCCpHCquNL5SW93-zjTTi4Meg,3774
|
|
12
|
+
local_openai2anthropic/server_tools/__init__.py,sha256=QlJfjEta-HOCtLe7NaY_fpbEKv-ZpInjAnfmSqE9tbk,615
|
|
13
|
+
local_openai2anthropic/server_tools/base.py,sha256=pNFsv-jSgxVrkY004AHAcYMNZgVSO8ZOeCzQBUtQ3vU,5633
|
|
14
|
+
local_openai2anthropic/server_tools/web_search.py,sha256=1C7lX_cm-tMaN3MsCjinEZYPJc_Hj4yAxYay9h8Zbvs,6543
|
|
15
|
+
local_openai2anthropic-0.2.3.dist-info/METADATA,sha256=auO3568iC566_VVykvf8x7oZylGVBhu0qW_zuAgp5WQ,10040
|
|
16
|
+
local_openai2anthropic-0.2.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
17
|
+
local_openai2anthropic-0.2.3.dist-info/entry_points.txt,sha256=hdc9tSJUNxyNLXcTYye5SuD2K0bEQhxBhGnWTFup6ZM,116
|
|
18
|
+
local_openai2anthropic-0.2.3.dist-info/licenses/LICENSE,sha256=X3_kZy3lJvd_xp8IeyUcIAO2Y367MXZc6aaRx8BYR_s,11369
|
|
19
|
+
local_openai2anthropic-0.2.3.dist-info/RECORD,,
|