@mmmbuto/anthmorph 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +34 -0
- package/Cargo.lock +1 -1
- package/Cargo.toml +1 -1
- package/README.md +48 -123
- package/bin/anthmorph +0 -0
- package/docs/CLAUDE_CODE_SETUP.md +78 -0
- package/docs/PACKAGING.md +59 -0
- package/docs/RELEASE.md +82 -0
- package/package.json +16 -4
- package/scripts/anthmorphctl +150 -8
- package/scripts/docker_build_linux.sh +11 -0
- package/scripts/docker_npm_dry_run.sh +25 -0
- package/scripts/docker_release_checks.sh +18 -0
- package/scripts/docker_rust_test.sh +35 -0
- package/scripts/docker_secret_scan.sh +11 -0
- package/scripts/postinstall.js +10 -1
- package/scripts/test_claude_code_patterns_real.sh +150 -0
- package/src/config.rs +33 -0
- package/src/main.rs +24 -5
- package/src/models/anthropic.rs +40 -0
- package/src/proxy.rs +374 -49
- package/src/transform.rs +312 -21
- package/scripts/smoke_test.sh +0 -72
- package/tests/real_backends.rs +0 -213
package/src/models/anthropic.rs
CHANGED
|
@@ -23,11 +23,29 @@ pub struct AnthropicRequest {
|
|
|
23
23
|
#[serde(default)]
|
|
24
24
|
pub tools: Option<Vec<Tool>>,
|
|
25
25
|
#[serde(default)]
|
|
26
|
+
pub thinking: Option<ThinkingConfig>,
|
|
27
|
+
#[serde(default)]
|
|
28
|
+
pub output_config: Option<OutputConfig>,
|
|
29
|
+
#[serde(default)]
|
|
26
30
|
pub stop_sequences: Option<Vec<String>>,
|
|
27
31
|
#[serde(flatten)]
|
|
28
32
|
pub extra: serde_json::Map<String, serde_json::Value>,
|
|
29
33
|
}
|
|
30
34
|
|
|
35
|
+
#[derive(Debug, Clone, Deserialize)]
|
|
36
|
+
pub struct ThinkingConfig {
|
|
37
|
+
#[serde(rename = "type")]
|
|
38
|
+
pub thinking_type: String,
|
|
39
|
+
#[serde(default, alias = "budgetTokens")]
|
|
40
|
+
pub budget_tokens: Option<usize>,
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
#[derive(Debug, Clone, Deserialize)]
|
|
44
|
+
pub struct OutputConfig {
|
|
45
|
+
#[serde(default)]
|
|
46
|
+
pub effort: Option<String>,
|
|
47
|
+
}
|
|
48
|
+
|
|
31
49
|
#[derive(Debug, Clone)]
|
|
32
50
|
pub enum SystemPrompt {
|
|
33
51
|
Single(String),
|
|
@@ -115,6 +133,8 @@ pub enum ContentBlock {
|
|
|
115
133
|
Text { text: String },
|
|
116
134
|
#[serde(rename = "image")]
|
|
117
135
|
Image { source: ImageSource },
|
|
136
|
+
#[serde(rename = "document")]
|
|
137
|
+
Document { source: serde_json::Value },
|
|
118
138
|
#[serde(rename = "tool_use")]
|
|
119
139
|
ToolUse {
|
|
120
140
|
id: String,
|
|
@@ -130,6 +150,20 @@ pub enum ContentBlock {
|
|
|
130
150
|
},
|
|
131
151
|
#[serde(rename = "thinking")]
|
|
132
152
|
Thinking { thinking: String },
|
|
153
|
+
#[serde(rename = "server_tool_use")]
|
|
154
|
+
ServerToolUse {
|
|
155
|
+
#[serde(default)]
|
|
156
|
+
name: Option<String>,
|
|
157
|
+
#[serde(default)]
|
|
158
|
+
input: Option<serde_json::Value>,
|
|
159
|
+
},
|
|
160
|
+
#[serde(rename = "search_result")]
|
|
161
|
+
SearchResult {
|
|
162
|
+
#[serde(default)]
|
|
163
|
+
query: Option<String>,
|
|
164
|
+
#[serde(default)]
|
|
165
|
+
content: Vec<serde_json::Value>,
|
|
166
|
+
},
|
|
133
167
|
#[serde(other)]
|
|
134
168
|
Other,
|
|
135
169
|
}
|
|
@@ -281,3 +315,9 @@ pub struct MessageDeltaUsage {
|
|
|
281
315
|
#[serde(rename = "output_tokens")]
|
|
282
316
|
pub output_tokens: usize,
|
|
283
317
|
}
|
|
318
|
+
|
|
319
|
+
#[derive(Debug, Clone, Serialize)]
|
|
320
|
+
pub struct CountTokensResponse {
|
|
321
|
+
#[serde(rename = "input_tokens")]
|
|
322
|
+
pub input_tokens: usize,
|
|
323
|
+
}
|
package/src/proxy.rs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
use crate::config::BackendProfile;
|
|
1
|
+
use crate::config::{BackendProfile, CompatMode};
|
|
2
2
|
use crate::error::{ProxyError, ProxyResult};
|
|
3
3
|
use crate::models::{anthropic, openai};
|
|
4
4
|
use crate::transform::{self, generate_message_id};
|
|
@@ -21,12 +21,24 @@ use tower_http::cors::{AllowOrigin, CorsLayer};
|
|
|
21
21
|
|
|
22
22
|
fn map_model(client_model: &str, config: &Config) -> String {
|
|
23
23
|
match client_model {
|
|
24
|
-
m if m.is_empty() || m == "default" => config.
|
|
25
|
-
m if m.starts_with("claude-") => config.
|
|
24
|
+
m if m.is_empty() || m == "default" => config.primary_model.clone(),
|
|
25
|
+
m if m.starts_with("claude-") => config.primary_model.clone(),
|
|
26
26
|
other => other.to_string(),
|
|
27
27
|
}
|
|
28
28
|
}
|
|
29
29
|
|
|
30
|
+
fn request_has_thinking(req: &anthropic::AnthropicRequest) -> bool {
|
|
31
|
+
if let Some(thinking) = &req.thinking {
|
|
32
|
+
return !thinking.thinking_type.eq_ignore_ascii_case("disabled");
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
req.extra
|
|
36
|
+
.get("thinking")
|
|
37
|
+
.and_then(|value| value.get("type").and_then(|type_value| type_value.as_str()))
|
|
38
|
+
.map(|value| !value.eq_ignore_ascii_case("disabled"))
|
|
39
|
+
.is_some()
|
|
40
|
+
}
|
|
41
|
+
|
|
30
42
|
pub async fn proxy_handler(
|
|
31
43
|
headers: HeaderMap,
|
|
32
44
|
Extension(config): Extension<Arc<Config>>,
|
|
@@ -49,9 +61,12 @@ pub async fn proxy_handler(
|
|
|
49
61
|
match &blocks[0] {
|
|
50
62
|
anthropic::ContentBlock::Text { .. } => "text_block",
|
|
51
63
|
anthropic::ContentBlock::Image { .. } => "image_block",
|
|
64
|
+
anthropic::ContentBlock::Document { .. } => "document_block",
|
|
52
65
|
anthropic::ContentBlock::ToolUse { .. } => "tool_use_block",
|
|
53
66
|
anthropic::ContentBlock::ToolResult { .. } => "tool_result_block",
|
|
54
67
|
anthropic::ContentBlock::Thinking { .. } => "thinking_block",
|
|
68
|
+
anthropic::ContentBlock::ServerToolUse { .. } => "server_tool_use_block",
|
|
69
|
+
anthropic::ContentBlock::SearchResult { .. } => "search_result_block",
|
|
55
70
|
anthropic::ContentBlock::Other => "unknown_block",
|
|
56
71
|
}
|
|
57
72
|
}
|
|
@@ -61,21 +76,17 @@ pub async fn proxy_handler(
|
|
|
61
76
|
}
|
|
62
77
|
tracing::debug!("Streaming: {}", is_streaming);
|
|
63
78
|
|
|
64
|
-
let model = if req
|
|
65
|
-
.extra
|
|
66
|
-
.get("thinking")
|
|
67
|
-
.and_then(|v| v.get("type"))
|
|
68
|
-
.is_some()
|
|
69
|
-
{
|
|
79
|
+
let model = if request_has_thinking(&req) {
|
|
70
80
|
config
|
|
71
81
|
.reasoning_model
|
|
72
82
|
.clone()
|
|
73
|
-
.unwrap_or_else(|| config.
|
|
83
|
+
.unwrap_or_else(|| config.primary_model.clone())
|
|
74
84
|
} else {
|
|
75
85
|
map_model(&req.model, &config)
|
|
76
86
|
};
|
|
77
87
|
|
|
78
|
-
let openai_req =
|
|
88
|
+
let openai_req =
|
|
89
|
+
transform::anthropic_to_openai(req, &model, config.backend_profile, config.compat_mode)?;
|
|
79
90
|
|
|
80
91
|
if is_streaming {
|
|
81
92
|
handle_streaming(config, client, openai_req).await
|
|
@@ -84,6 +95,58 @@ pub async fn proxy_handler(
|
|
|
84
95
|
}
|
|
85
96
|
}
|
|
86
97
|
|
|
98
|
+
pub async fn count_tokens_handler(
|
|
99
|
+
Extension(config): Extension<Arc<Config>>,
|
|
100
|
+
Json(req): Json<anthropic::AnthropicRequest>,
|
|
101
|
+
) -> ProxyResult<Json<anthropic::CountTokensResponse>> {
|
|
102
|
+
let model = if request_has_thinking(&req) {
|
|
103
|
+
config
|
|
104
|
+
.reasoning_model
|
|
105
|
+
.clone()
|
|
106
|
+
.unwrap_or_else(|| config.primary_model.clone())
|
|
107
|
+
} else {
|
|
108
|
+
map_model(&req.model, &config)
|
|
109
|
+
};
|
|
110
|
+
let openai_req =
|
|
111
|
+
transform::anthropic_to_openai(req, &model, config.backend_profile, config.compat_mode)?;
|
|
112
|
+
let serialized = serde_json::to_string(&openai_req)?;
|
|
113
|
+
let estimated = std::cmp::max(1, serialized.chars().count() / 4);
|
|
114
|
+
Ok(Json(anthropic::CountTokensResponse {
|
|
115
|
+
input_tokens: estimated,
|
|
116
|
+
}))
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
pub async fn models_handler(
|
|
120
|
+
Extension(config): Extension<Arc<Config>>,
|
|
121
|
+
Extension(client): Extension<Client>,
|
|
122
|
+
) -> ProxyResult<Response> {
|
|
123
|
+
let url = config.models_url();
|
|
124
|
+
let mut req_builder = client.get(&url).timeout(Duration::from_secs(60));
|
|
125
|
+
|
|
126
|
+
if let Some(api_key) = &config.api_key {
|
|
127
|
+
req_builder = req_builder.header("Authorization", format!("Bearer {}", api_key));
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
let response = req_builder.send().await.map_err(ProxyError::Http)?;
|
|
131
|
+
let status = response.status();
|
|
132
|
+
let body = response.bytes().await.map_err(ProxyError::Http)?;
|
|
133
|
+
|
|
134
|
+
if !status.is_success() {
|
|
135
|
+
return Err(ProxyError::Upstream(format!(
|
|
136
|
+
"Upstream returned {}: {}",
|
|
137
|
+
status,
|
|
138
|
+
String::from_utf8_lossy(&body)
|
|
139
|
+
)));
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
let mut headers = HeaderMap::new();
|
|
143
|
+
headers.insert(
|
|
144
|
+
header::CONTENT_TYPE,
|
|
145
|
+
HeaderValue::from_static("application/json"),
|
|
146
|
+
);
|
|
147
|
+
Ok((headers, body).into_response())
|
|
148
|
+
}
|
|
149
|
+
|
|
87
150
|
async fn handle_non_streaming(
|
|
88
151
|
config: Arc<Config>,
|
|
89
152
|
client: Client,
|
|
@@ -120,8 +183,12 @@ async fn handle_non_streaming(
|
|
|
120
183
|
}
|
|
121
184
|
|
|
122
185
|
let openai_resp: openai::OpenAIResponse = response.json().await?;
|
|
123
|
-
let anthropic_resp =
|
|
124
|
-
|
|
186
|
+
let anthropic_resp = transform::openai_to_anthropic(
|
|
187
|
+
openai_resp,
|
|
188
|
+
&openai_req.model,
|
|
189
|
+
config.backend_profile,
|
|
190
|
+
config.compat_mode,
|
|
191
|
+
)?;
|
|
125
192
|
|
|
126
193
|
Ok(Json(anthropic_resp).into_response())
|
|
127
194
|
}
|
|
@@ -162,7 +229,12 @@ async fn handle_streaming(
|
|
|
162
229
|
}
|
|
163
230
|
|
|
164
231
|
let stream = response.bytes_stream();
|
|
165
|
-
let sse_stream = create_sse_stream(
|
|
232
|
+
let sse_stream = create_sse_stream(
|
|
233
|
+
stream,
|
|
234
|
+
openai_req.model.clone(),
|
|
235
|
+
config.backend_profile,
|
|
236
|
+
config.compat_mode,
|
|
237
|
+
);
|
|
166
238
|
|
|
167
239
|
let mut headers = HeaderMap::new();
|
|
168
240
|
headers.insert(
|
|
@@ -179,6 +251,7 @@ fn create_sse_stream(
|
|
|
179
251
|
stream: impl Stream<Item = Result<Bytes, reqwest::Error>> + Send + 'static,
|
|
180
252
|
fallback_model: String,
|
|
181
253
|
profile: BackendProfile,
|
|
254
|
+
compat_mode: CompatMode,
|
|
182
255
|
) -> impl Stream<Item = Result<Bytes, std::io::Error>> + Send {
|
|
183
256
|
async_stream::stream! {
|
|
184
257
|
let mut buffer = String::new();
|
|
@@ -186,8 +259,11 @@ fn create_sse_stream(
|
|
|
186
259
|
let mut current_model = None;
|
|
187
260
|
let mut next_content_index = 0usize;
|
|
188
261
|
let mut has_sent_message_start = false;
|
|
262
|
+
let mut has_sent_message_delta = false;
|
|
263
|
+
let mut has_sent_message_stop = false;
|
|
189
264
|
let mut active_block: Option<ActiveBlock> = None;
|
|
190
265
|
let mut tool_states: BTreeMap<usize, ToolCallState> = BTreeMap::new();
|
|
266
|
+
let mut think_filter = ThinkTagStreamFilter::default();
|
|
191
267
|
|
|
192
268
|
pin!(stream);
|
|
193
269
|
|
|
@@ -232,7 +308,24 @@ fn create_sse_stream(
|
|
|
232
308
|
};
|
|
233
309
|
|
|
234
310
|
if data.trim() == "[DONE]" {
|
|
235
|
-
|
|
311
|
+
if let Some(previous) = active_block.take() {
|
|
312
|
+
yield Ok(Bytes::from(stop_block_sse(previous.index())));
|
|
313
|
+
}
|
|
314
|
+
if has_sent_message_start && !has_sent_message_delta {
|
|
315
|
+
let event = anthropic::StreamEvent::MessageDelta {
|
|
316
|
+
delta: anthropic::MessageDeltaData {
|
|
317
|
+
stop_reason: Some("end_turn".to_string()),
|
|
318
|
+
stop_sequence: (),
|
|
319
|
+
},
|
|
320
|
+
usage: None,
|
|
321
|
+
};
|
|
322
|
+
yield Ok(Bytes::from(sse_event("message_delta", &event)));
|
|
323
|
+
has_sent_message_delta = true;
|
|
324
|
+
}
|
|
325
|
+
if has_sent_message_start && !has_sent_message_stop {
|
|
326
|
+
yield Ok(Bytes::from(message_stop_sse()));
|
|
327
|
+
has_sent_message_stop = true;
|
|
328
|
+
}
|
|
236
329
|
continue;
|
|
237
330
|
}
|
|
238
331
|
|
|
@@ -273,44 +366,68 @@ fn create_sse_stream(
|
|
|
273
366
|
|
|
274
367
|
if let Some(reasoning) = &choice.delta.reasoning {
|
|
275
368
|
if !reasoning.is_empty() {
|
|
276
|
-
if !profile.supports_reasoning() {
|
|
369
|
+
if !profile.supports_reasoning() && compat_mode.is_strict() {
|
|
277
370
|
yield Ok(Bytes::from(stream_error_sse(
|
|
278
371
|
"reasoning deltas are not supported by the active backend profile",
|
|
279
372
|
)));
|
|
280
373
|
break;
|
|
281
374
|
}
|
|
282
375
|
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
376
|
+
if profile.supports_reasoning() {
|
|
377
|
+
let (idx, transitions) = transition_to_thinking(
|
|
378
|
+
&mut active_block,
|
|
379
|
+
&mut next_content_index,
|
|
380
|
+
);
|
|
381
|
+
for event in transitions {
|
|
382
|
+
yield Ok(Bytes::from(event));
|
|
383
|
+
}
|
|
384
|
+
yield Ok(Bytes::from(delta_block_sse(
|
|
385
|
+
idx,
|
|
386
|
+
anthropic::ContentBlockDeltaData::ThinkingDelta {
|
|
387
|
+
thinking: reasoning.clone(),
|
|
388
|
+
},
|
|
389
|
+
)));
|
|
289
390
|
}
|
|
290
|
-
yield Ok(Bytes::from(delta_block_sse(
|
|
291
|
-
idx,
|
|
292
|
-
anthropic::ContentBlockDeltaData::ThinkingDelta {
|
|
293
|
-
thinking: reasoning.clone(),
|
|
294
|
-
},
|
|
295
|
-
)));
|
|
296
391
|
}
|
|
297
392
|
}
|
|
298
393
|
|
|
299
394
|
if let Some(content) = &choice.delta.content {
|
|
300
395
|
if !content.is_empty() {
|
|
301
|
-
let (
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
396
|
+
let (embedded_reasoning, visible_text) = think_filter.push(content);
|
|
397
|
+
|
|
398
|
+
if profile.supports_reasoning() {
|
|
399
|
+
for reasoning in embedded_reasoning {
|
|
400
|
+
let (idx, transitions) = transition_to_thinking(
|
|
401
|
+
&mut active_block,
|
|
402
|
+
&mut next_content_index,
|
|
403
|
+
);
|
|
404
|
+
for event in transitions {
|
|
405
|
+
yield Ok(Bytes::from(event));
|
|
406
|
+
}
|
|
407
|
+
yield Ok(Bytes::from(delta_block_sse(
|
|
408
|
+
idx,
|
|
409
|
+
anthropic::ContentBlockDeltaData::ThinkingDelta {
|
|
410
|
+
thinking: reasoning,
|
|
411
|
+
},
|
|
412
|
+
)));
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
if !visible_text.is_empty() {
|
|
417
|
+
let (idx, transitions) = transition_to_text(
|
|
418
|
+
&mut active_block,
|
|
419
|
+
&mut next_content_index,
|
|
420
|
+
);
|
|
421
|
+
for event in transitions {
|
|
422
|
+
yield Ok(Bytes::from(event));
|
|
423
|
+
}
|
|
424
|
+
yield Ok(Bytes::from(delta_block_sse(
|
|
425
|
+
idx,
|
|
426
|
+
anthropic::ContentBlockDeltaData::TextDelta {
|
|
427
|
+
text: visible_text,
|
|
428
|
+
},
|
|
429
|
+
)));
|
|
307
430
|
}
|
|
308
|
-
yield Ok(Bytes::from(delta_block_sse(
|
|
309
|
-
idx,
|
|
310
|
-
anthropic::ContentBlockDeltaData::TextDelta {
|
|
311
|
-
text: content.clone(),
|
|
312
|
-
},
|
|
313
|
-
)));
|
|
314
431
|
}
|
|
315
432
|
}
|
|
316
433
|
|
|
@@ -343,6 +460,9 @@ fn create_sse_stream(
|
|
|
343
460
|
}
|
|
344
461
|
}
|
|
345
462
|
} else if active_block != Some(ActiveBlock::ToolUse(tool_index, state.content_index.unwrap())) {
|
|
463
|
+
if !compat_mode.is_strict() {
|
|
464
|
+
continue;
|
|
465
|
+
}
|
|
346
466
|
yield Ok(Bytes::from(stream_error_sse(
|
|
347
467
|
"interleaved tool call deltas are not supported safely",
|
|
348
468
|
)));
|
|
@@ -381,6 +501,11 @@ fn create_sse_stream(
|
|
|
381
501
|
}),
|
|
382
502
|
};
|
|
383
503
|
yield Ok(Bytes::from(sse_event("message_delta", &event)));
|
|
504
|
+
has_sent_message_delta = true;
|
|
505
|
+
if !has_sent_message_stop {
|
|
506
|
+
yield Ok(Bytes::from(message_stop_sse()));
|
|
507
|
+
has_sent_message_stop = true;
|
|
508
|
+
}
|
|
384
509
|
}
|
|
385
510
|
}
|
|
386
511
|
}
|
|
@@ -393,14 +518,57 @@ fn create_sse_stream(
|
|
|
393
518
|
}
|
|
394
519
|
}
|
|
395
520
|
}
|
|
521
|
+
|
|
522
|
+
let (embedded_reasoning, visible_tail) = think_filter.finish();
|
|
523
|
+
if profile.supports_reasoning() {
|
|
524
|
+
for reasoning in embedded_reasoning {
|
|
525
|
+
let (idx, transitions) =
|
|
526
|
+
transition_to_thinking(&mut active_block, &mut next_content_index);
|
|
527
|
+
for event in transitions {
|
|
528
|
+
yield Ok(Bytes::from(event));
|
|
529
|
+
}
|
|
530
|
+
yield Ok(Bytes::from(delta_block_sse(
|
|
531
|
+
idx,
|
|
532
|
+
anthropic::ContentBlockDeltaData::ThinkingDelta { thinking: reasoning },
|
|
533
|
+
)));
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
if !visible_tail.is_empty() {
|
|
537
|
+
let (idx, transitions) = transition_to_text(&mut active_block, &mut next_content_index);
|
|
538
|
+
for event in transitions {
|
|
539
|
+
yield Ok(Bytes::from(event));
|
|
540
|
+
}
|
|
541
|
+
yield Ok(Bytes::from(delta_block_sse(
|
|
542
|
+
idx,
|
|
543
|
+
anthropic::ContentBlockDeltaData::TextDelta { text: visible_tail },
|
|
544
|
+
)));
|
|
545
|
+
}
|
|
546
|
+
if let Some(previous) = active_block.take() {
|
|
547
|
+
yield Ok(Bytes::from(stop_block_sse(previous.index())));
|
|
548
|
+
}
|
|
549
|
+
if has_sent_message_start && !has_sent_message_delta {
|
|
550
|
+
let event = anthropic::StreamEvent::MessageDelta {
|
|
551
|
+
delta: anthropic::MessageDeltaData {
|
|
552
|
+
stop_reason: Some("end_turn".to_string()),
|
|
553
|
+
stop_sequence: (),
|
|
554
|
+
},
|
|
555
|
+
usage: None,
|
|
556
|
+
};
|
|
557
|
+
yield Ok(Bytes::from(sse_event("message_delta", &event)));
|
|
558
|
+
}
|
|
559
|
+
if has_sent_message_start && !has_sent_message_stop {
|
|
560
|
+
yield Ok(Bytes::from(message_stop_sse()));
|
|
561
|
+
}
|
|
396
562
|
}
|
|
397
563
|
}
|
|
398
564
|
|
|
399
565
|
pub struct Config {
|
|
400
566
|
pub backend_url: String,
|
|
401
567
|
pub backend_profile: BackendProfile,
|
|
402
|
-
pub
|
|
568
|
+
pub compat_mode: CompatMode,
|
|
569
|
+
pub primary_model: String,
|
|
403
570
|
pub reasoning_model: Option<String>,
|
|
571
|
+
pub fallback_models: Vec<String>,
|
|
404
572
|
pub api_key: Option<String>,
|
|
405
573
|
pub ingress_api_key: Option<String>,
|
|
406
574
|
pub allow_origins: Vec<String>,
|
|
@@ -409,6 +577,33 @@ pub struct Config {
|
|
|
409
577
|
|
|
410
578
|
impl Config {
|
|
411
579
|
pub fn from_env() -> Self {
|
|
580
|
+
let legacy_model = std::env::var("ANTHMORPH_MODEL").ok();
|
|
581
|
+
let primary_model = std::env::var("ANTHMORPH_PRIMARY_MODEL")
|
|
582
|
+
.ok()
|
|
583
|
+
.or_else(|| {
|
|
584
|
+
legacy_model.as_ref().and_then(|value| {
|
|
585
|
+
value
|
|
586
|
+
.split(',')
|
|
587
|
+
.next()
|
|
588
|
+
.map(str::trim)
|
|
589
|
+
.map(ToOwned::to_owned)
|
|
590
|
+
})
|
|
591
|
+
})
|
|
592
|
+
.unwrap_or_else(|| "Qwen/Qwen3.5-397B-A17B-TEE".to_string());
|
|
593
|
+
let fallback_models = std::env::var("ANTHMORPH_FALLBACK_MODELS")
|
|
594
|
+
.ok()
|
|
595
|
+
.or_else(|| legacy_model.clone())
|
|
596
|
+
.map(|value| {
|
|
597
|
+
value
|
|
598
|
+
.split(',')
|
|
599
|
+
.map(str::trim)
|
|
600
|
+
.filter(|s| !s.is_empty())
|
|
601
|
+
.filter(|s| *s != primary_model)
|
|
602
|
+
.map(ToOwned::to_owned)
|
|
603
|
+
.collect()
|
|
604
|
+
})
|
|
605
|
+
.unwrap_or_default();
|
|
606
|
+
|
|
412
607
|
Self {
|
|
413
608
|
backend_url: std::env::var("ANTHMORPH_BACKEND_URL")
|
|
414
609
|
.unwrap_or_else(|_| "https://llm.chutes.ai/v1".to_string()),
|
|
@@ -416,11 +611,13 @@ impl Config {
|
|
|
416
611
|
.ok()
|
|
417
612
|
.and_then(|v| v.parse().ok())
|
|
418
613
|
.unwrap_or(BackendProfile::Chutes),
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
614
|
+
compat_mode: std::env::var("ANTHMORPH_COMPAT_MODE")
|
|
615
|
+
.ok()
|
|
616
|
+
.and_then(|v| v.parse().ok())
|
|
617
|
+
.unwrap_or(CompatMode::Compat),
|
|
618
|
+
primary_model,
|
|
423
619
|
reasoning_model: std::env::var("ANTHMORPH_REASONING_MODEL").ok(),
|
|
620
|
+
fallback_models,
|
|
424
621
|
api_key: std::env::var("ANTHMORPH_API_KEY").ok(),
|
|
425
622
|
ingress_api_key: std::env::var("ANTHMORPH_INGRESS_API_KEY").ok(),
|
|
426
623
|
allow_origins: std::env::var("ANTHMORPH_ALLOWED_ORIGINS")
|
|
@@ -446,6 +643,10 @@ impl Config {
|
|
|
446
643
|
self.backend_url.trim_end_matches('/')
|
|
447
644
|
)
|
|
448
645
|
}
|
|
646
|
+
|
|
647
|
+
pub fn models_url(&self) -> String {
|
|
648
|
+
format!("{}/models", self.backend_url.trim_end_matches('/'))
|
|
649
|
+
}
|
|
449
650
|
}
|
|
450
651
|
|
|
451
652
|
impl fmt::Debug for Config {
|
|
@@ -453,8 +654,10 @@ impl fmt::Debug for Config {
|
|
|
453
654
|
f.debug_struct("Config")
|
|
454
655
|
.field("backend_url", &self.backend_url)
|
|
455
656
|
.field("backend_profile", &self.backend_profile.as_str())
|
|
456
|
-
.field("
|
|
657
|
+
.field("compat_mode", &self.compat_mode.as_str())
|
|
658
|
+
.field("primary_model", &self.primary_model)
|
|
457
659
|
.field("reasoning_model", &self.reasoning_model)
|
|
660
|
+
.field("fallback_models", &self.fallback_models)
|
|
458
661
|
.field("api_key", &"<hidden>")
|
|
459
662
|
.field("ingress_api_key", &"<hidden>")
|
|
460
663
|
.field("allow_origins", &self.allow_origins)
|
|
@@ -486,6 +689,80 @@ struct ToolCallState {
|
|
|
486
689
|
content_index: Option<usize>,
|
|
487
690
|
}
|
|
488
691
|
|
|
692
|
+
#[derive(Debug, Default)]
|
|
693
|
+
struct ThinkTagStreamFilter {
|
|
694
|
+
carry: String,
|
|
695
|
+
in_think: bool,
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
impl ThinkTagStreamFilter {
|
|
699
|
+
fn push(&mut self, chunk: &str) -> (Vec<String>, String) {
|
|
700
|
+
let mut reasoning = Vec::new();
|
|
701
|
+
let mut visible = String::new();
|
|
702
|
+
let mut work = format!("{}{}", self.carry, chunk);
|
|
703
|
+
self.carry.clear();
|
|
704
|
+
|
|
705
|
+
loop {
|
|
706
|
+
if self.in_think {
|
|
707
|
+
if let Some(end) = work.find("</think>") {
|
|
708
|
+
let think_text = &work[..end];
|
|
709
|
+
if !think_text.is_empty() {
|
|
710
|
+
reasoning.push(think_text.to_string());
|
|
711
|
+
}
|
|
712
|
+
work = work[end + "</think>".len()..].to_string();
|
|
713
|
+
self.in_think = false;
|
|
714
|
+
continue;
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
let split_at = partial_tag_suffix_start(&work, &["</think>"]);
|
|
718
|
+
if split_at > 0 {
|
|
719
|
+
reasoning.push(work[..split_at].to_string());
|
|
720
|
+
}
|
|
721
|
+
self.carry = work[split_at..].to_string();
|
|
722
|
+
break;
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
if let Some(start) = work.find("<think>") {
|
|
726
|
+
visible.push_str(&work[..start]);
|
|
727
|
+
work = work[start + "<think>".len()..].to_string();
|
|
728
|
+
self.in_think = true;
|
|
729
|
+
continue;
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
let split_at = partial_tag_suffix_start(&work, &["<think>", "</think>"]);
|
|
733
|
+
visible.push_str(&work[..split_at]);
|
|
734
|
+
self.carry = work[split_at..].to_string();
|
|
735
|
+
break;
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
(reasoning, visible)
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
fn finish(&mut self) -> (Vec<String>, String) {
|
|
742
|
+
if self.carry.is_empty() {
|
|
743
|
+
return (Vec::new(), String::new());
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
let leftover = std::mem::take(&mut self.carry);
|
|
747
|
+
if self.in_think {
|
|
748
|
+
self.in_think = false;
|
|
749
|
+
(vec![leftover], String::new())
|
|
750
|
+
} else {
|
|
751
|
+
(Vec::new(), leftover)
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
fn partial_tag_suffix_start(value: &str, tags: &[&str]) -> usize {
|
|
757
|
+
for (start, _) in value.char_indices().rev() {
|
|
758
|
+
let suffix = &value[start..];
|
|
759
|
+
if tags.iter().any(|tag| tag.starts_with(suffix)) {
|
|
760
|
+
return start;
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
value.len()
|
|
764
|
+
}
|
|
765
|
+
|
|
489
766
|
fn transition_to_thinking(
|
|
490
767
|
active_block: &mut Option<ActiveBlock>,
|
|
491
768
|
next_content_index: &mut usize,
|
|
@@ -734,6 +1011,7 @@ mod tests {
|
|
|
734
1011
|
stream::iter(chunks),
|
|
735
1012
|
"fallback".to_string(),
|
|
736
1013
|
BackendProfile::Chutes,
|
|
1014
|
+
CompatMode::Strict,
|
|
737
1015
|
);
|
|
738
1016
|
tokio::pin!(sse);
|
|
739
1017
|
|
|
@@ -749,6 +1027,47 @@ mod tests {
|
|
|
749
1027
|
assert_eq!(joined.matches("event: content_block_start").count(), 1);
|
|
750
1028
|
}
|
|
751
1029
|
|
|
1030
|
+
#[tokio::test]
|
|
1031
|
+
async fn create_sse_stream_strips_think_tags_for_generic_compat() {
|
|
1032
|
+
let first = serde_json::to_string(&json!({
|
|
1033
|
+
"id": "abc",
|
|
1034
|
+
"model": "minimax",
|
|
1035
|
+
"choices": [{
|
|
1036
|
+
"index": 0,
|
|
1037
|
+
"delta": {
|
|
1038
|
+
"content": "<think>secret</think>visible"
|
|
1039
|
+
},
|
|
1040
|
+
"finish_reason": "stop"
|
|
1041
|
+
}],
|
|
1042
|
+
"usage": {
|
|
1043
|
+
"completion_tokens": 4
|
|
1044
|
+
}
|
|
1045
|
+
}))
|
|
1046
|
+
.unwrap();
|
|
1047
|
+
|
|
1048
|
+
let chunks = vec![
|
|
1049
|
+
Ok(Bytes::from(format!("data: {first}\n\n"))),
|
|
1050
|
+
Ok(Bytes::from("data: [DONE]\n\n")),
|
|
1051
|
+
];
|
|
1052
|
+
|
|
1053
|
+
let mut output = Vec::new();
|
|
1054
|
+
let sse = create_sse_stream(
|
|
1055
|
+
stream::iter(chunks),
|
|
1056
|
+
"fallback".to_string(),
|
|
1057
|
+
BackendProfile::OpenaiGeneric,
|
|
1058
|
+
CompatMode::Compat,
|
|
1059
|
+
);
|
|
1060
|
+
tokio::pin!(sse);
|
|
1061
|
+
|
|
1062
|
+
while let Some(item) = sse.next().await {
|
|
1063
|
+
output.push(String::from_utf8(item.unwrap().to_vec()).unwrap());
|
|
1064
|
+
}
|
|
1065
|
+
|
|
1066
|
+
let joined = output.join("");
|
|
1067
|
+
assert!(joined.contains("visible"));
|
|
1068
|
+
assert!(!joined.contains("secret"));
|
|
1069
|
+
}
|
|
1070
|
+
|
|
752
1071
|
#[test]
|
|
753
1072
|
fn message_start_sse_includes_required_anthropic_fields() {
|
|
754
1073
|
let event = anthropic::StreamEvent::MessageStart {
|
|
@@ -815,8 +1134,10 @@ mod tests {
|
|
|
815
1134
|
let config = Config {
|
|
816
1135
|
backend_url: "https://example.com".to_string(),
|
|
817
1136
|
backend_profile: BackendProfile::OpenaiGeneric,
|
|
818
|
-
|
|
1137
|
+
compat_mode: CompatMode::Strict,
|
|
1138
|
+
primary_model: "model".to_string(),
|
|
819
1139
|
reasoning_model: None,
|
|
1140
|
+
fallback_models: Vec::new(),
|
|
820
1141
|
api_key: None,
|
|
821
1142
|
ingress_api_key: Some("secret".to_string()),
|
|
822
1143
|
allow_origins: Vec::new(),
|
|
@@ -843,8 +1164,10 @@ mod tests {
|
|
|
843
1164
|
let config = Config {
|
|
844
1165
|
backend_url: "https://example.com".to_string(),
|
|
845
1166
|
backend_profile: BackendProfile::OpenaiGeneric,
|
|
846
|
-
|
|
1167
|
+
compat_mode: CompatMode::Strict,
|
|
1168
|
+
primary_model: "model".to_string(),
|
|
847
1169
|
reasoning_model: None,
|
|
1170
|
+
fallback_models: Vec::new(),
|
|
848
1171
|
api_key: None,
|
|
849
1172
|
ingress_api_key: Some("secret".to_string()),
|
|
850
1173
|
allow_origins: Vec::new(),
|
|
@@ -861,8 +1184,10 @@ mod tests {
|
|
|
861
1184
|
let config = Config {
|
|
862
1185
|
backend_url: "https://example.com".to_string(),
|
|
863
1186
|
backend_profile: BackendProfile::OpenaiGeneric,
|
|
864
|
-
|
|
1187
|
+
compat_mode: CompatMode::Strict,
|
|
1188
|
+
primary_model: "model".to_string(),
|
|
865
1189
|
reasoning_model: None,
|
|
1190
|
+
fallback_models: Vec::new(),
|
|
866
1191
|
api_key: None,
|
|
867
1192
|
ingress_api_key: None,
|
|
868
1193
|
allow_origins: vec!["https://allowed.example".to_string()],
|