@mmmbuto/anthmorph 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +34 -0
- package/Cargo.lock +1 -1
- package/Cargo.toml +1 -1
- package/README.md +48 -123
- package/bin/anthmorph +0 -0
- package/docs/CLAUDE_CODE_SETUP.md +78 -0
- package/docs/PACKAGING.md +59 -0
- package/docs/RELEASE.md +82 -0
- package/package.json +16 -4
- package/prebuilt/anthmorph +0 -0
- package/scripts/anthmorphctl +150 -8
- package/scripts/docker_build_linux.sh +11 -0
- package/scripts/docker_npm_dry_run.sh +25 -0
- package/scripts/docker_release_checks.sh +18 -0
- package/scripts/docker_rust_test.sh +35 -0
- package/scripts/docker_secret_scan.sh +11 -0
- package/scripts/postinstall.js +10 -1
- package/scripts/test_claude_code_patterns_real.sh +150 -0
- package/src/config.rs +33 -0
- package/src/main.rs +24 -5
- package/src/models/anthropic.rs +46 -1
- package/src/proxy.rs +432 -47
- package/src/transform.rs +364 -42
- package/scripts/smoke_test.sh +0 -72
- package/tests/real_backends.rs +0 -213
package/src/proxy.rs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
use crate::config::BackendProfile;
|
|
1
|
+
use crate::config::{BackendProfile, CompatMode};
|
|
2
2
|
use crate::error::{ProxyError, ProxyResult};
|
|
3
3
|
use crate::models::{anthropic, openai};
|
|
4
4
|
use crate::transform::{self, generate_message_id};
|
|
@@ -21,12 +21,24 @@ use tower_http::cors::{AllowOrigin, CorsLayer};
|
|
|
21
21
|
|
|
22
22
|
fn map_model(client_model: &str, config: &Config) -> String {
|
|
23
23
|
match client_model {
|
|
24
|
-
m if m.is_empty() || m == "default" => config.
|
|
25
|
-
m if m.starts_with("claude-") => config.
|
|
24
|
+
m if m.is_empty() || m == "default" => config.primary_model.clone(),
|
|
25
|
+
m if m.starts_with("claude-") => config.primary_model.clone(),
|
|
26
26
|
other => other.to_string(),
|
|
27
27
|
}
|
|
28
28
|
}
|
|
29
29
|
|
|
30
|
+
fn request_has_thinking(req: &anthropic::AnthropicRequest) -> bool {
|
|
31
|
+
if let Some(thinking) = &req.thinking {
|
|
32
|
+
return !thinking.thinking_type.eq_ignore_ascii_case("disabled");
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
req.extra
|
|
36
|
+
.get("thinking")
|
|
37
|
+
.and_then(|value| value.get("type").and_then(|type_value| type_value.as_str()))
|
|
38
|
+
.map(|value| !value.eq_ignore_ascii_case("disabled"))
|
|
39
|
+
.is_some()
|
|
40
|
+
}
|
|
41
|
+
|
|
30
42
|
pub async fn proxy_handler(
|
|
31
43
|
headers: HeaderMap,
|
|
32
44
|
Extension(config): Extension<Arc<Config>>,
|
|
@@ -49,9 +61,12 @@ pub async fn proxy_handler(
|
|
|
49
61
|
match &blocks[0] {
|
|
50
62
|
anthropic::ContentBlock::Text { .. } => "text_block",
|
|
51
63
|
anthropic::ContentBlock::Image { .. } => "image_block",
|
|
64
|
+
anthropic::ContentBlock::Document { .. } => "document_block",
|
|
52
65
|
anthropic::ContentBlock::ToolUse { .. } => "tool_use_block",
|
|
53
66
|
anthropic::ContentBlock::ToolResult { .. } => "tool_result_block",
|
|
54
67
|
anthropic::ContentBlock::Thinking { .. } => "thinking_block",
|
|
68
|
+
anthropic::ContentBlock::ServerToolUse { .. } => "server_tool_use_block",
|
|
69
|
+
anthropic::ContentBlock::SearchResult { .. } => "search_result_block",
|
|
55
70
|
anthropic::ContentBlock::Other => "unknown_block",
|
|
56
71
|
}
|
|
57
72
|
}
|
|
@@ -61,21 +76,17 @@ pub async fn proxy_handler(
|
|
|
61
76
|
}
|
|
62
77
|
tracing::debug!("Streaming: {}", is_streaming);
|
|
63
78
|
|
|
64
|
-
let model = if req
|
|
65
|
-
.extra
|
|
66
|
-
.get("thinking")
|
|
67
|
-
.and_then(|v| v.get("type"))
|
|
68
|
-
.is_some()
|
|
69
|
-
{
|
|
79
|
+
let model = if request_has_thinking(&req) {
|
|
70
80
|
config
|
|
71
81
|
.reasoning_model
|
|
72
82
|
.clone()
|
|
73
|
-
.unwrap_or_else(|| config.
|
|
83
|
+
.unwrap_or_else(|| config.primary_model.clone())
|
|
74
84
|
} else {
|
|
75
85
|
map_model(&req.model, &config)
|
|
76
86
|
};
|
|
77
87
|
|
|
78
|
-
let openai_req =
|
|
88
|
+
let openai_req =
|
|
89
|
+
transform::anthropic_to_openai(req, &model, config.backend_profile, config.compat_mode)?;
|
|
79
90
|
|
|
80
91
|
if is_streaming {
|
|
81
92
|
handle_streaming(config, client, openai_req).await
|
|
@@ -84,6 +95,58 @@ pub async fn proxy_handler(
|
|
|
84
95
|
}
|
|
85
96
|
}
|
|
86
97
|
|
|
98
|
+
pub async fn count_tokens_handler(
|
|
99
|
+
Extension(config): Extension<Arc<Config>>,
|
|
100
|
+
Json(req): Json<anthropic::AnthropicRequest>,
|
|
101
|
+
) -> ProxyResult<Json<anthropic::CountTokensResponse>> {
|
|
102
|
+
let model = if request_has_thinking(&req) {
|
|
103
|
+
config
|
|
104
|
+
.reasoning_model
|
|
105
|
+
.clone()
|
|
106
|
+
.unwrap_or_else(|| config.primary_model.clone())
|
|
107
|
+
} else {
|
|
108
|
+
map_model(&req.model, &config)
|
|
109
|
+
};
|
|
110
|
+
let openai_req =
|
|
111
|
+
transform::anthropic_to_openai(req, &model, config.backend_profile, config.compat_mode)?;
|
|
112
|
+
let serialized = serde_json::to_string(&openai_req)?;
|
|
113
|
+
let estimated = std::cmp::max(1, serialized.chars().count() / 4);
|
|
114
|
+
Ok(Json(anthropic::CountTokensResponse {
|
|
115
|
+
input_tokens: estimated,
|
|
116
|
+
}))
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
pub async fn models_handler(
|
|
120
|
+
Extension(config): Extension<Arc<Config>>,
|
|
121
|
+
Extension(client): Extension<Client>,
|
|
122
|
+
) -> ProxyResult<Response> {
|
|
123
|
+
let url = config.models_url();
|
|
124
|
+
let mut req_builder = client.get(&url).timeout(Duration::from_secs(60));
|
|
125
|
+
|
|
126
|
+
if let Some(api_key) = &config.api_key {
|
|
127
|
+
req_builder = req_builder.header("Authorization", format!("Bearer {}", api_key));
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
let response = req_builder.send().await.map_err(ProxyError::Http)?;
|
|
131
|
+
let status = response.status();
|
|
132
|
+
let body = response.bytes().await.map_err(ProxyError::Http)?;
|
|
133
|
+
|
|
134
|
+
if !status.is_success() {
|
|
135
|
+
return Err(ProxyError::Upstream(format!(
|
|
136
|
+
"Upstream returned {}: {}",
|
|
137
|
+
status,
|
|
138
|
+
String::from_utf8_lossy(&body)
|
|
139
|
+
)));
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
let mut headers = HeaderMap::new();
|
|
143
|
+
headers.insert(
|
|
144
|
+
header::CONTENT_TYPE,
|
|
145
|
+
HeaderValue::from_static("application/json"),
|
|
146
|
+
);
|
|
147
|
+
Ok((headers, body).into_response())
|
|
148
|
+
}
|
|
149
|
+
|
|
87
150
|
async fn handle_non_streaming(
|
|
88
151
|
config: Arc<Config>,
|
|
89
152
|
client: Client,
|
|
@@ -120,8 +183,12 @@ async fn handle_non_streaming(
|
|
|
120
183
|
}
|
|
121
184
|
|
|
122
185
|
let openai_resp: openai::OpenAIResponse = response.json().await?;
|
|
123
|
-
let anthropic_resp =
|
|
124
|
-
|
|
186
|
+
let anthropic_resp = transform::openai_to_anthropic(
|
|
187
|
+
openai_resp,
|
|
188
|
+
&openai_req.model,
|
|
189
|
+
config.backend_profile,
|
|
190
|
+
config.compat_mode,
|
|
191
|
+
)?;
|
|
125
192
|
|
|
126
193
|
Ok(Json(anthropic_resp).into_response())
|
|
127
194
|
}
|
|
@@ -162,7 +229,12 @@ async fn handle_streaming(
|
|
|
162
229
|
}
|
|
163
230
|
|
|
164
231
|
let stream = response.bytes_stream();
|
|
165
|
-
let sse_stream = create_sse_stream(
|
|
232
|
+
let sse_stream = create_sse_stream(
|
|
233
|
+
stream,
|
|
234
|
+
openai_req.model.clone(),
|
|
235
|
+
config.backend_profile,
|
|
236
|
+
config.compat_mode,
|
|
237
|
+
);
|
|
166
238
|
|
|
167
239
|
let mut headers = HeaderMap::new();
|
|
168
240
|
headers.insert(
|
|
@@ -179,6 +251,7 @@ fn create_sse_stream(
|
|
|
179
251
|
stream: impl Stream<Item = Result<Bytes, reqwest::Error>> + Send + 'static,
|
|
180
252
|
fallback_model: String,
|
|
181
253
|
profile: BackendProfile,
|
|
254
|
+
compat_mode: CompatMode,
|
|
182
255
|
) -> impl Stream<Item = Result<Bytes, std::io::Error>> + Send {
|
|
183
256
|
async_stream::stream! {
|
|
184
257
|
let mut buffer = String::new();
|
|
@@ -186,8 +259,11 @@ fn create_sse_stream(
|
|
|
186
259
|
let mut current_model = None;
|
|
187
260
|
let mut next_content_index = 0usize;
|
|
188
261
|
let mut has_sent_message_start = false;
|
|
262
|
+
let mut has_sent_message_delta = false;
|
|
263
|
+
let mut has_sent_message_stop = false;
|
|
189
264
|
let mut active_block: Option<ActiveBlock> = None;
|
|
190
265
|
let mut tool_states: BTreeMap<usize, ToolCallState> = BTreeMap::new();
|
|
266
|
+
let mut think_filter = ThinkTagStreamFilter::default();
|
|
191
267
|
|
|
192
268
|
pin!(stream);
|
|
193
269
|
|
|
@@ -232,7 +308,24 @@ fn create_sse_stream(
|
|
|
232
308
|
};
|
|
233
309
|
|
|
234
310
|
if data.trim() == "[DONE]" {
|
|
235
|
-
|
|
311
|
+
if let Some(previous) = active_block.take() {
|
|
312
|
+
yield Ok(Bytes::from(stop_block_sse(previous.index())));
|
|
313
|
+
}
|
|
314
|
+
if has_sent_message_start && !has_sent_message_delta {
|
|
315
|
+
let event = anthropic::StreamEvent::MessageDelta {
|
|
316
|
+
delta: anthropic::MessageDeltaData {
|
|
317
|
+
stop_reason: Some("end_turn".to_string()),
|
|
318
|
+
stop_sequence: (),
|
|
319
|
+
},
|
|
320
|
+
usage: None,
|
|
321
|
+
};
|
|
322
|
+
yield Ok(Bytes::from(sse_event("message_delta", &event)));
|
|
323
|
+
has_sent_message_delta = true;
|
|
324
|
+
}
|
|
325
|
+
if has_sent_message_start && !has_sent_message_stop {
|
|
326
|
+
yield Ok(Bytes::from(message_stop_sse()));
|
|
327
|
+
has_sent_message_stop = true;
|
|
328
|
+
}
|
|
236
329
|
continue;
|
|
237
330
|
}
|
|
238
331
|
|
|
@@ -255,9 +348,12 @@ fn create_sse_stream(
|
|
|
255
348
|
id: message_id.clone().unwrap_or_else(generate_message_id),
|
|
256
349
|
message_type: "message".to_string(),
|
|
257
350
|
role: "assistant".to_string(),
|
|
351
|
+
content: vec![],
|
|
258
352
|
model: current_model
|
|
259
353
|
.clone()
|
|
260
354
|
.unwrap_or_else(|| fallback_model.clone()),
|
|
355
|
+
stop_reason: None,
|
|
356
|
+
stop_sequence: None,
|
|
261
357
|
usage: anthropic::Usage {
|
|
262
358
|
input_tokens: 0,
|
|
263
359
|
output_tokens: 0,
|
|
@@ -270,44 +366,68 @@ fn create_sse_stream(
|
|
|
270
366
|
|
|
271
367
|
if let Some(reasoning) = &choice.delta.reasoning {
|
|
272
368
|
if !reasoning.is_empty() {
|
|
273
|
-
if !profile.supports_reasoning() {
|
|
369
|
+
if !profile.supports_reasoning() && compat_mode.is_strict() {
|
|
274
370
|
yield Ok(Bytes::from(stream_error_sse(
|
|
275
371
|
"reasoning deltas are not supported by the active backend profile",
|
|
276
372
|
)));
|
|
277
373
|
break;
|
|
278
374
|
}
|
|
279
375
|
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
376
|
+
if profile.supports_reasoning() {
|
|
377
|
+
let (idx, transitions) = transition_to_thinking(
|
|
378
|
+
&mut active_block,
|
|
379
|
+
&mut next_content_index,
|
|
380
|
+
);
|
|
381
|
+
for event in transitions {
|
|
382
|
+
yield Ok(Bytes::from(event));
|
|
383
|
+
}
|
|
384
|
+
yield Ok(Bytes::from(delta_block_sse(
|
|
385
|
+
idx,
|
|
386
|
+
anthropic::ContentBlockDeltaData::ThinkingDelta {
|
|
387
|
+
thinking: reasoning.clone(),
|
|
388
|
+
},
|
|
389
|
+
)));
|
|
286
390
|
}
|
|
287
|
-
yield Ok(Bytes::from(delta_block_sse(
|
|
288
|
-
idx,
|
|
289
|
-
anthropic::ContentBlockDeltaData::ThinkingDelta {
|
|
290
|
-
thinking: reasoning.clone(),
|
|
291
|
-
},
|
|
292
|
-
)));
|
|
293
391
|
}
|
|
294
392
|
}
|
|
295
393
|
|
|
296
394
|
if let Some(content) = &choice.delta.content {
|
|
297
395
|
if !content.is_empty() {
|
|
298
|
-
let (
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
396
|
+
let (embedded_reasoning, visible_text) = think_filter.push(content);
|
|
397
|
+
|
|
398
|
+
if profile.supports_reasoning() {
|
|
399
|
+
for reasoning in embedded_reasoning {
|
|
400
|
+
let (idx, transitions) = transition_to_thinking(
|
|
401
|
+
&mut active_block,
|
|
402
|
+
&mut next_content_index,
|
|
403
|
+
);
|
|
404
|
+
for event in transitions {
|
|
405
|
+
yield Ok(Bytes::from(event));
|
|
406
|
+
}
|
|
407
|
+
yield Ok(Bytes::from(delta_block_sse(
|
|
408
|
+
idx,
|
|
409
|
+
anthropic::ContentBlockDeltaData::ThinkingDelta {
|
|
410
|
+
thinking: reasoning,
|
|
411
|
+
},
|
|
412
|
+
)));
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
if !visible_text.is_empty() {
|
|
417
|
+
let (idx, transitions) = transition_to_text(
|
|
418
|
+
&mut active_block,
|
|
419
|
+
&mut next_content_index,
|
|
420
|
+
);
|
|
421
|
+
for event in transitions {
|
|
422
|
+
yield Ok(Bytes::from(event));
|
|
423
|
+
}
|
|
424
|
+
yield Ok(Bytes::from(delta_block_sse(
|
|
425
|
+
idx,
|
|
426
|
+
anthropic::ContentBlockDeltaData::TextDelta {
|
|
427
|
+
text: visible_text,
|
|
428
|
+
},
|
|
429
|
+
)));
|
|
304
430
|
}
|
|
305
|
-
yield Ok(Bytes::from(delta_block_sse(
|
|
306
|
-
idx,
|
|
307
|
-
anthropic::ContentBlockDeltaData::TextDelta {
|
|
308
|
-
text: content.clone(),
|
|
309
|
-
},
|
|
310
|
-
)));
|
|
311
431
|
}
|
|
312
432
|
}
|
|
313
433
|
|
|
@@ -340,6 +460,9 @@ fn create_sse_stream(
|
|
|
340
460
|
}
|
|
341
461
|
}
|
|
342
462
|
} else if active_block != Some(ActiveBlock::ToolUse(tool_index, state.content_index.unwrap())) {
|
|
463
|
+
if !compat_mode.is_strict() {
|
|
464
|
+
continue;
|
|
465
|
+
}
|
|
343
466
|
yield Ok(Bytes::from(stream_error_sse(
|
|
344
467
|
"interleaved tool call deltas are not supported safely",
|
|
345
468
|
)));
|
|
@@ -378,6 +501,11 @@ fn create_sse_stream(
|
|
|
378
501
|
}),
|
|
379
502
|
};
|
|
380
503
|
yield Ok(Bytes::from(sse_event("message_delta", &event)));
|
|
504
|
+
has_sent_message_delta = true;
|
|
505
|
+
if !has_sent_message_stop {
|
|
506
|
+
yield Ok(Bytes::from(message_stop_sse()));
|
|
507
|
+
has_sent_message_stop = true;
|
|
508
|
+
}
|
|
381
509
|
}
|
|
382
510
|
}
|
|
383
511
|
}
|
|
@@ -390,14 +518,57 @@ fn create_sse_stream(
|
|
|
390
518
|
}
|
|
391
519
|
}
|
|
392
520
|
}
|
|
521
|
+
|
|
522
|
+
let (embedded_reasoning, visible_tail) = think_filter.finish();
|
|
523
|
+
if profile.supports_reasoning() {
|
|
524
|
+
for reasoning in embedded_reasoning {
|
|
525
|
+
let (idx, transitions) =
|
|
526
|
+
transition_to_thinking(&mut active_block, &mut next_content_index);
|
|
527
|
+
for event in transitions {
|
|
528
|
+
yield Ok(Bytes::from(event));
|
|
529
|
+
}
|
|
530
|
+
yield Ok(Bytes::from(delta_block_sse(
|
|
531
|
+
idx,
|
|
532
|
+
anthropic::ContentBlockDeltaData::ThinkingDelta { thinking: reasoning },
|
|
533
|
+
)));
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
if !visible_tail.is_empty() {
|
|
537
|
+
let (idx, transitions) = transition_to_text(&mut active_block, &mut next_content_index);
|
|
538
|
+
for event in transitions {
|
|
539
|
+
yield Ok(Bytes::from(event));
|
|
540
|
+
}
|
|
541
|
+
yield Ok(Bytes::from(delta_block_sse(
|
|
542
|
+
idx,
|
|
543
|
+
anthropic::ContentBlockDeltaData::TextDelta { text: visible_tail },
|
|
544
|
+
)));
|
|
545
|
+
}
|
|
546
|
+
if let Some(previous) = active_block.take() {
|
|
547
|
+
yield Ok(Bytes::from(stop_block_sse(previous.index())));
|
|
548
|
+
}
|
|
549
|
+
if has_sent_message_start && !has_sent_message_delta {
|
|
550
|
+
let event = anthropic::StreamEvent::MessageDelta {
|
|
551
|
+
delta: anthropic::MessageDeltaData {
|
|
552
|
+
stop_reason: Some("end_turn".to_string()),
|
|
553
|
+
stop_sequence: (),
|
|
554
|
+
},
|
|
555
|
+
usage: None,
|
|
556
|
+
};
|
|
557
|
+
yield Ok(Bytes::from(sse_event("message_delta", &event)));
|
|
558
|
+
}
|
|
559
|
+
if has_sent_message_start && !has_sent_message_stop {
|
|
560
|
+
yield Ok(Bytes::from(message_stop_sse()));
|
|
561
|
+
}
|
|
393
562
|
}
|
|
394
563
|
}
|
|
395
564
|
|
|
396
565
|
pub struct Config {
|
|
397
566
|
pub backend_url: String,
|
|
398
567
|
pub backend_profile: BackendProfile,
|
|
399
|
-
pub
|
|
568
|
+
pub compat_mode: CompatMode,
|
|
569
|
+
pub primary_model: String,
|
|
400
570
|
pub reasoning_model: Option<String>,
|
|
571
|
+
pub fallback_models: Vec<String>,
|
|
401
572
|
pub api_key: Option<String>,
|
|
402
573
|
pub ingress_api_key: Option<String>,
|
|
403
574
|
pub allow_origins: Vec<String>,
|
|
@@ -406,6 +577,33 @@ pub struct Config {
|
|
|
406
577
|
|
|
407
578
|
impl Config {
|
|
408
579
|
pub fn from_env() -> Self {
|
|
580
|
+
let legacy_model = std::env::var("ANTHMORPH_MODEL").ok();
|
|
581
|
+
let primary_model = std::env::var("ANTHMORPH_PRIMARY_MODEL")
|
|
582
|
+
.ok()
|
|
583
|
+
.or_else(|| {
|
|
584
|
+
legacy_model.as_ref().and_then(|value| {
|
|
585
|
+
value
|
|
586
|
+
.split(',')
|
|
587
|
+
.next()
|
|
588
|
+
.map(str::trim)
|
|
589
|
+
.map(ToOwned::to_owned)
|
|
590
|
+
})
|
|
591
|
+
})
|
|
592
|
+
.unwrap_or_else(|| "Qwen/Qwen3.5-397B-A17B-TEE".to_string());
|
|
593
|
+
let fallback_models = std::env::var("ANTHMORPH_FALLBACK_MODELS")
|
|
594
|
+
.ok()
|
|
595
|
+
.or_else(|| legacy_model.clone())
|
|
596
|
+
.map(|value| {
|
|
597
|
+
value
|
|
598
|
+
.split(',')
|
|
599
|
+
.map(str::trim)
|
|
600
|
+
.filter(|s| !s.is_empty())
|
|
601
|
+
.filter(|s| *s != primary_model)
|
|
602
|
+
.map(ToOwned::to_owned)
|
|
603
|
+
.collect()
|
|
604
|
+
})
|
|
605
|
+
.unwrap_or_default();
|
|
606
|
+
|
|
409
607
|
Self {
|
|
410
608
|
backend_url: std::env::var("ANTHMORPH_BACKEND_URL")
|
|
411
609
|
.unwrap_or_else(|_| "https://llm.chutes.ai/v1".to_string()),
|
|
@@ -413,9 +611,13 @@ impl Config {
|
|
|
413
611
|
.ok()
|
|
414
612
|
.and_then(|v| v.parse().ok())
|
|
415
613
|
.unwrap_or(BackendProfile::Chutes),
|
|
416
|
-
|
|
417
|
-
.
|
|
614
|
+
compat_mode: std::env::var("ANTHMORPH_COMPAT_MODE")
|
|
615
|
+
.ok()
|
|
616
|
+
.and_then(|v| v.parse().ok())
|
|
617
|
+
.unwrap_or(CompatMode::Compat),
|
|
618
|
+
primary_model,
|
|
418
619
|
reasoning_model: std::env::var("ANTHMORPH_REASONING_MODEL").ok(),
|
|
620
|
+
fallback_models,
|
|
419
621
|
api_key: std::env::var("ANTHMORPH_API_KEY").ok(),
|
|
420
622
|
ingress_api_key: std::env::var("ANTHMORPH_INGRESS_API_KEY").ok(),
|
|
421
623
|
allow_origins: std::env::var("ANTHMORPH_ALLOWED_ORIGINS")
|
|
@@ -441,6 +643,10 @@ impl Config {
|
|
|
441
643
|
self.backend_url.trim_end_matches('/')
|
|
442
644
|
)
|
|
443
645
|
}
|
|
646
|
+
|
|
647
|
+
pub fn models_url(&self) -> String {
|
|
648
|
+
format!("{}/models", self.backend_url.trim_end_matches('/'))
|
|
649
|
+
}
|
|
444
650
|
}
|
|
445
651
|
|
|
446
652
|
impl fmt::Debug for Config {
|
|
@@ -448,8 +654,10 @@ impl fmt::Debug for Config {
|
|
|
448
654
|
f.debug_struct("Config")
|
|
449
655
|
.field("backend_url", &self.backend_url)
|
|
450
656
|
.field("backend_profile", &self.backend_profile.as_str())
|
|
451
|
-
.field("
|
|
657
|
+
.field("compat_mode", &self.compat_mode.as_str())
|
|
658
|
+
.field("primary_model", &self.primary_model)
|
|
452
659
|
.field("reasoning_model", &self.reasoning_model)
|
|
660
|
+
.field("fallback_models", &self.fallback_models)
|
|
453
661
|
.field("api_key", &"<hidden>")
|
|
454
662
|
.field("ingress_api_key", &"<hidden>")
|
|
455
663
|
.field("allow_origins", &self.allow_origins)
|
|
@@ -481,6 +689,80 @@ struct ToolCallState {
|
|
|
481
689
|
content_index: Option<usize>,
|
|
482
690
|
}
|
|
483
691
|
|
|
692
|
+
#[derive(Debug, Default)]
|
|
693
|
+
struct ThinkTagStreamFilter {
|
|
694
|
+
carry: String,
|
|
695
|
+
in_think: bool,
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
impl ThinkTagStreamFilter {
|
|
699
|
+
fn push(&mut self, chunk: &str) -> (Vec<String>, String) {
|
|
700
|
+
let mut reasoning = Vec::new();
|
|
701
|
+
let mut visible = String::new();
|
|
702
|
+
let mut work = format!("{}{}", self.carry, chunk);
|
|
703
|
+
self.carry.clear();
|
|
704
|
+
|
|
705
|
+
loop {
|
|
706
|
+
if self.in_think {
|
|
707
|
+
if let Some(end) = work.find("</think>") {
|
|
708
|
+
let think_text = &work[..end];
|
|
709
|
+
if !think_text.is_empty() {
|
|
710
|
+
reasoning.push(think_text.to_string());
|
|
711
|
+
}
|
|
712
|
+
work = work[end + "</think>".len()..].to_string();
|
|
713
|
+
self.in_think = false;
|
|
714
|
+
continue;
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
let split_at = partial_tag_suffix_start(&work, &["</think>"]);
|
|
718
|
+
if split_at > 0 {
|
|
719
|
+
reasoning.push(work[..split_at].to_string());
|
|
720
|
+
}
|
|
721
|
+
self.carry = work[split_at..].to_string();
|
|
722
|
+
break;
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
if let Some(start) = work.find("<think>") {
|
|
726
|
+
visible.push_str(&work[..start]);
|
|
727
|
+
work = work[start + "<think>".len()..].to_string();
|
|
728
|
+
self.in_think = true;
|
|
729
|
+
continue;
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
let split_at = partial_tag_suffix_start(&work, &["<think>", "</think>"]);
|
|
733
|
+
visible.push_str(&work[..split_at]);
|
|
734
|
+
self.carry = work[split_at..].to_string();
|
|
735
|
+
break;
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
(reasoning, visible)
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
fn finish(&mut self) -> (Vec<String>, String) {
|
|
742
|
+
if self.carry.is_empty() {
|
|
743
|
+
return (Vec::new(), String::new());
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
let leftover = std::mem::take(&mut self.carry);
|
|
747
|
+
if self.in_think {
|
|
748
|
+
self.in_think = false;
|
|
749
|
+
(vec![leftover], String::new())
|
|
750
|
+
} else {
|
|
751
|
+
(Vec::new(), leftover)
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
fn partial_tag_suffix_start(value: &str, tags: &[&str]) -> usize {
|
|
757
|
+
for (start, _) in value.char_indices().rev() {
|
|
758
|
+
let suffix = &value[start..];
|
|
759
|
+
if tags.iter().any(|tag| tag.starts_with(suffix)) {
|
|
760
|
+
return start;
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
value.len()
|
|
764
|
+
}
|
|
765
|
+
|
|
484
766
|
fn transition_to_thinking(
|
|
485
767
|
active_block: &mut Option<ActiveBlock>,
|
|
486
768
|
next_content_index: &mut usize,
|
|
@@ -729,6 +1011,7 @@ mod tests {
|
|
|
729
1011
|
stream::iter(chunks),
|
|
730
1012
|
"fallback".to_string(),
|
|
731
1013
|
BackendProfile::Chutes,
|
|
1014
|
+
CompatMode::Strict,
|
|
732
1015
|
);
|
|
733
1016
|
tokio::pin!(sse);
|
|
734
1017
|
|
|
@@ -744,6 +1027,102 @@ mod tests {
|
|
|
744
1027
|
assert_eq!(joined.matches("event: content_block_start").count(), 1);
|
|
745
1028
|
}
|
|
746
1029
|
|
|
1030
|
+
#[tokio::test]
|
|
1031
|
+
async fn create_sse_stream_strips_think_tags_for_generic_compat() {
|
|
1032
|
+
let first = serde_json::to_string(&json!({
|
|
1033
|
+
"id": "abc",
|
|
1034
|
+
"model": "minimax",
|
|
1035
|
+
"choices": [{
|
|
1036
|
+
"index": 0,
|
|
1037
|
+
"delta": {
|
|
1038
|
+
"content": "<think>secret</think>visible"
|
|
1039
|
+
},
|
|
1040
|
+
"finish_reason": "stop"
|
|
1041
|
+
}],
|
|
1042
|
+
"usage": {
|
|
1043
|
+
"completion_tokens": 4
|
|
1044
|
+
}
|
|
1045
|
+
}))
|
|
1046
|
+
.unwrap();
|
|
1047
|
+
|
|
1048
|
+
let chunks = vec![
|
|
1049
|
+
Ok(Bytes::from(format!("data: {first}\n\n"))),
|
|
1050
|
+
Ok(Bytes::from("data: [DONE]\n\n")),
|
|
1051
|
+
];
|
|
1052
|
+
|
|
1053
|
+
let mut output = Vec::new();
|
|
1054
|
+
let sse = create_sse_stream(
|
|
1055
|
+
stream::iter(chunks),
|
|
1056
|
+
"fallback".to_string(),
|
|
1057
|
+
BackendProfile::OpenaiGeneric,
|
|
1058
|
+
CompatMode::Compat,
|
|
1059
|
+
);
|
|
1060
|
+
tokio::pin!(sse);
|
|
1061
|
+
|
|
1062
|
+
while let Some(item) = sse.next().await {
|
|
1063
|
+
output.push(String::from_utf8(item.unwrap().to_vec()).unwrap());
|
|
1064
|
+
}
|
|
1065
|
+
|
|
1066
|
+
let joined = output.join("");
|
|
1067
|
+
assert!(joined.contains("visible"));
|
|
1068
|
+
assert!(!joined.contains("secret"));
|
|
1069
|
+
}
|
|
1070
|
+
|
|
1071
|
+
#[test]
|
|
1072
|
+
fn message_start_sse_includes_required_anthropic_fields() {
|
|
1073
|
+
let event = anthropic::StreamEvent::MessageStart {
|
|
1074
|
+
message: anthropic::MessageStartData {
|
|
1075
|
+
id: "msg_test".to_string(),
|
|
1076
|
+
message_type: "message".to_string(),
|
|
1077
|
+
role: "assistant".to_string(),
|
|
1078
|
+
content: vec![],
|
|
1079
|
+
model: "glm-5.1".to_string(),
|
|
1080
|
+
stop_reason: None,
|
|
1081
|
+
stop_sequence: None,
|
|
1082
|
+
usage: anthropic::Usage {
|
|
1083
|
+
input_tokens: 0,
|
|
1084
|
+
output_tokens: 0,
|
|
1085
|
+
},
|
|
1086
|
+
},
|
|
1087
|
+
};
|
|
1088
|
+
|
|
1089
|
+
let serialized = sse_event("message_start", &event);
|
|
1090
|
+
let payload = serialized
|
|
1091
|
+
.lines()
|
|
1092
|
+
.find_map(|line| line.strip_prefix("data: "))
|
|
1093
|
+
.expect("message_start data line");
|
|
1094
|
+
let parsed: serde_json::Value = serde_json::from_str(payload).expect("valid json");
|
|
1095
|
+
|
|
1096
|
+
assert_eq!(parsed["message"]["type"], "message");
|
|
1097
|
+
assert_eq!(parsed["message"]["role"], "assistant");
|
|
1098
|
+
assert_eq!(parsed["message"]["content"], json!([]));
|
|
1099
|
+
assert!(parsed["message"]["stop_reason"].is_null());
|
|
1100
|
+
assert!(parsed["message"]["stop_sequence"].is_null());
|
|
1101
|
+
}
|
|
1102
|
+
|
|
1103
|
+
#[test]
|
|
1104
|
+
fn content_block_start_tool_use_has_flat_anthropic_shape() {
|
|
1105
|
+
let payload = start_block_sse(
|
|
1106
|
+
0,
|
|
1107
|
+
anthropic::ContentBlockStartData::ToolUse {
|
|
1108
|
+
id: "toolu_123".to_string(),
|
|
1109
|
+
name: "mcp__memory__memory_read".to_string(),
|
|
1110
|
+
input: json!({}),
|
|
1111
|
+
},
|
|
1112
|
+
)
|
|
1113
|
+
.lines()
|
|
1114
|
+
.find_map(|line| line.strip_prefix("data: "))
|
|
1115
|
+
.expect("content_block_start data line")
|
|
1116
|
+
.to_string();
|
|
1117
|
+
|
|
1118
|
+
let parsed: serde_json::Value = serde_json::from_str(&payload).expect("valid json");
|
|
1119
|
+
assert_eq!(parsed["content_block"]["type"], "tool_use");
|
|
1120
|
+
assert_eq!(parsed["content_block"]["id"], "toolu_123");
|
|
1121
|
+
assert_eq!(parsed["content_block"]["name"], "mcp__memory__memory_read");
|
|
1122
|
+
assert_eq!(parsed["content_block"]["input"], json!({}));
|
|
1123
|
+
assert!(parsed["content_block"].get("content_block").is_none());
|
|
1124
|
+
}
|
|
1125
|
+
|
|
747
1126
|
#[test]
|
|
748
1127
|
fn extracts_multi_line_sse_data() {
|
|
749
1128
|
let block = "event: message\ndata: first\ndata: second\n";
|
|
@@ -755,8 +1134,10 @@ mod tests {
|
|
|
755
1134
|
let config = Config {
|
|
756
1135
|
backend_url: "https://example.com".to_string(),
|
|
757
1136
|
backend_profile: BackendProfile::OpenaiGeneric,
|
|
758
|
-
|
|
1137
|
+
compat_mode: CompatMode::Strict,
|
|
1138
|
+
primary_model: "model".to_string(),
|
|
759
1139
|
reasoning_model: None,
|
|
1140
|
+
fallback_models: Vec::new(),
|
|
760
1141
|
api_key: None,
|
|
761
1142
|
ingress_api_key: Some("secret".to_string()),
|
|
762
1143
|
allow_origins: Vec::new(),
|
|
@@ -783,8 +1164,10 @@ mod tests {
|
|
|
783
1164
|
let config = Config {
|
|
784
1165
|
backend_url: "https://example.com".to_string(),
|
|
785
1166
|
backend_profile: BackendProfile::OpenaiGeneric,
|
|
786
|
-
|
|
1167
|
+
compat_mode: CompatMode::Strict,
|
|
1168
|
+
primary_model: "model".to_string(),
|
|
787
1169
|
reasoning_model: None,
|
|
1170
|
+
fallback_models: Vec::new(),
|
|
788
1171
|
api_key: None,
|
|
789
1172
|
ingress_api_key: Some("secret".to_string()),
|
|
790
1173
|
allow_origins: Vec::new(),
|
|
@@ -801,8 +1184,10 @@ mod tests {
|
|
|
801
1184
|
let config = Config {
|
|
802
1185
|
backend_url: "https://example.com".to_string(),
|
|
803
1186
|
backend_profile: BackendProfile::OpenaiGeneric,
|
|
804
|
-
|
|
1187
|
+
compat_mode: CompatMode::Strict,
|
|
1188
|
+
primary_model: "model".to_string(),
|
|
805
1189
|
reasoning_model: None,
|
|
1190
|
+
fallback_models: Vec::new(),
|
|
806
1191
|
api_key: None,
|
|
807
1192
|
ingress_api_key: None,
|
|
808
1193
|
allow_origins: vec!["https://allowed.example".to_string()],
|