@mmmbuto/anthmorph 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,11 +23,29 @@ pub struct AnthropicRequest {
23
23
  #[serde(default)]
24
24
  pub tools: Option<Vec<Tool>>,
25
25
  #[serde(default)]
26
+ pub thinking: Option<ThinkingConfig>,
27
+ #[serde(default)]
28
+ pub output_config: Option<OutputConfig>,
29
+ #[serde(default)]
26
30
  pub stop_sequences: Option<Vec<String>>,
27
31
  #[serde(flatten)]
28
32
  pub extra: serde_json::Map<String, serde_json::Value>,
29
33
  }
30
34
 
35
+ #[derive(Debug, Clone, Deserialize)]
36
+ pub struct ThinkingConfig {
37
+ #[serde(rename = "type")]
38
+ pub thinking_type: String,
39
+ #[serde(default, alias = "budgetTokens")]
40
+ pub budget_tokens: Option<usize>,
41
+ }
42
+
43
+ #[derive(Debug, Clone, Deserialize)]
44
+ pub struct OutputConfig {
45
+ #[serde(default)]
46
+ pub effort: Option<String>,
47
+ }
48
+
31
49
  #[derive(Debug, Clone)]
32
50
  pub enum SystemPrompt {
33
51
  Single(String),
@@ -115,6 +133,8 @@ pub enum ContentBlock {
115
133
  Text { text: String },
116
134
  #[serde(rename = "image")]
117
135
  Image { source: ImageSource },
136
+ #[serde(rename = "document")]
137
+ Document { source: serde_json::Value },
118
138
  #[serde(rename = "tool_use")]
119
139
  ToolUse {
120
140
  id: String,
@@ -130,6 +150,20 @@ pub enum ContentBlock {
130
150
  },
131
151
  #[serde(rename = "thinking")]
132
152
  Thinking { thinking: String },
153
+ #[serde(rename = "server_tool_use")]
154
+ ServerToolUse {
155
+ #[serde(default)]
156
+ name: Option<String>,
157
+ #[serde(default)]
158
+ input: Option<serde_json::Value>,
159
+ },
160
+ #[serde(rename = "search_result")]
161
+ SearchResult {
162
+ #[serde(default)]
163
+ query: Option<String>,
164
+ #[serde(default)]
165
+ content: Vec<serde_json::Value>,
166
+ },
133
167
  #[serde(other)]
134
168
  Other,
135
169
  }
@@ -281,3 +315,9 @@ pub struct MessageDeltaUsage {
281
315
  #[serde(rename = "output_tokens")]
282
316
  pub output_tokens: usize,
283
317
  }
318
+
319
+ #[derive(Debug, Clone, Serialize)]
320
+ pub struct CountTokensResponse {
321
+ #[serde(rename = "input_tokens")]
322
+ pub input_tokens: usize,
323
+ }
package/src/proxy.rs CHANGED
@@ -1,4 +1,4 @@
1
- use crate::config::BackendProfile;
1
+ use crate::config::{BackendProfile, CompatMode};
2
2
  use crate::error::{ProxyError, ProxyResult};
3
3
  use crate::models::{anthropic, openai};
4
4
  use crate::transform::{self, generate_message_id};
@@ -21,12 +21,24 @@ use tower_http::cors::{AllowOrigin, CorsLayer};
21
21
 
22
22
  fn map_model(client_model: &str, config: &Config) -> String {
23
23
  match client_model {
24
- m if m.is_empty() || m == "default" => config.model.clone(),
25
- m if m.starts_with("claude-") => config.model.clone(),
24
+ m if m.is_empty() || m == "default" => config.primary_model.clone(),
25
+ m if m.starts_with("claude-") => config.primary_model.clone(),
26
26
  other => other.to_string(),
27
27
  }
28
28
  }
29
29
 
30
+ fn request_has_thinking(req: &anthropic::AnthropicRequest) -> bool {
31
+ if let Some(thinking) = &req.thinking {
32
+ return !thinking.thinking_type.eq_ignore_ascii_case("disabled");
33
+ }
34
+
35
+ req.extra
36
+ .get("thinking")
37
+ .and_then(|value| value.get("type").and_then(|type_value| type_value.as_str()))
38
+ .map(|value| !value.eq_ignore_ascii_case("disabled"))
39
+ .is_some()
40
+ }
41
+
30
42
  pub async fn proxy_handler(
31
43
  headers: HeaderMap,
32
44
  Extension(config): Extension<Arc<Config>>,
@@ -49,9 +61,12 @@ pub async fn proxy_handler(
49
61
  match &blocks[0] {
50
62
  anthropic::ContentBlock::Text { .. } => "text_block",
51
63
  anthropic::ContentBlock::Image { .. } => "image_block",
64
+ anthropic::ContentBlock::Document { .. } => "document_block",
52
65
  anthropic::ContentBlock::ToolUse { .. } => "tool_use_block",
53
66
  anthropic::ContentBlock::ToolResult { .. } => "tool_result_block",
54
67
  anthropic::ContentBlock::Thinking { .. } => "thinking_block",
68
+ anthropic::ContentBlock::ServerToolUse { .. } => "server_tool_use_block",
69
+ anthropic::ContentBlock::SearchResult { .. } => "search_result_block",
55
70
  anthropic::ContentBlock::Other => "unknown_block",
56
71
  }
57
72
  }
@@ -61,21 +76,17 @@ pub async fn proxy_handler(
61
76
  }
62
77
  tracing::debug!("Streaming: {}", is_streaming);
63
78
 
64
- let model = if req
65
- .extra
66
- .get("thinking")
67
- .and_then(|v| v.get("type"))
68
- .is_some()
69
- {
79
+ let model = if request_has_thinking(&req) {
70
80
  config
71
81
  .reasoning_model
72
82
  .clone()
73
- .unwrap_or_else(|| config.model.clone())
83
+ .unwrap_or_else(|| config.primary_model.clone())
74
84
  } else {
75
85
  map_model(&req.model, &config)
76
86
  };
77
87
 
78
- let openai_req = transform::anthropic_to_openai(req, &model, config.backend_profile)?;
88
+ let openai_req =
89
+ transform::anthropic_to_openai(req, &model, config.backend_profile, config.compat_mode)?;
79
90
 
80
91
  if is_streaming {
81
92
  handle_streaming(config, client, openai_req).await
@@ -84,6 +95,58 @@ pub async fn proxy_handler(
84
95
  }
85
96
  }
86
97
 
98
+ pub async fn count_tokens_handler(
99
+ Extension(config): Extension<Arc<Config>>,
100
+ Json(req): Json<anthropic::AnthropicRequest>,
101
+ ) -> ProxyResult<Json<anthropic::CountTokensResponse>> {
102
+ let model = if request_has_thinking(&req) {
103
+ config
104
+ .reasoning_model
105
+ .clone()
106
+ .unwrap_or_else(|| config.primary_model.clone())
107
+ } else {
108
+ map_model(&req.model, &config)
109
+ };
110
+ let openai_req =
111
+ transform::anthropic_to_openai(req, &model, config.backend_profile, config.compat_mode)?;
112
+ let serialized = serde_json::to_string(&openai_req)?;
113
+ let estimated = std::cmp::max(1, serialized.chars().count() / 4);
114
+ Ok(Json(anthropic::CountTokensResponse {
115
+ input_tokens: estimated,
116
+ }))
117
+ }
118
+
119
+ pub async fn models_handler(
120
+ Extension(config): Extension<Arc<Config>>,
121
+ Extension(client): Extension<Client>,
122
+ ) -> ProxyResult<Response> {
123
+ let url = config.models_url();
124
+ let mut req_builder = client.get(&url).timeout(Duration::from_secs(60));
125
+
126
+ if let Some(api_key) = &config.api_key {
127
+ req_builder = req_builder.header("Authorization", format!("Bearer {}", api_key));
128
+ }
129
+
130
+ let response = req_builder.send().await.map_err(ProxyError::Http)?;
131
+ let status = response.status();
132
+ let body = response.bytes().await.map_err(ProxyError::Http)?;
133
+
134
+ if !status.is_success() {
135
+ return Err(ProxyError::Upstream(format!(
136
+ "Upstream returned {}: {}",
137
+ status,
138
+ String::from_utf8_lossy(&body)
139
+ )));
140
+ }
141
+
142
+ let mut headers = HeaderMap::new();
143
+ headers.insert(
144
+ header::CONTENT_TYPE,
145
+ HeaderValue::from_static("application/json"),
146
+ );
147
+ Ok((headers, body).into_response())
148
+ }
149
+
87
150
  async fn handle_non_streaming(
88
151
  config: Arc<Config>,
89
152
  client: Client,
@@ -120,8 +183,12 @@ async fn handle_non_streaming(
120
183
  }
121
184
 
122
185
  let openai_resp: openai::OpenAIResponse = response.json().await?;
123
- let anthropic_resp =
124
- transform::openai_to_anthropic(openai_resp, &openai_req.model, config.backend_profile)?;
186
+ let anthropic_resp = transform::openai_to_anthropic(
187
+ openai_resp,
188
+ &openai_req.model,
189
+ config.backend_profile,
190
+ config.compat_mode,
191
+ )?;
125
192
 
126
193
  Ok(Json(anthropic_resp).into_response())
127
194
  }
@@ -162,7 +229,12 @@ async fn handle_streaming(
162
229
  }
163
230
 
164
231
  let stream = response.bytes_stream();
165
- let sse_stream = create_sse_stream(stream, openai_req.model.clone(), config.backend_profile);
232
+ let sse_stream = create_sse_stream(
233
+ stream,
234
+ openai_req.model.clone(),
235
+ config.backend_profile,
236
+ config.compat_mode,
237
+ );
166
238
 
167
239
  let mut headers = HeaderMap::new();
168
240
  headers.insert(
@@ -179,6 +251,7 @@ fn create_sse_stream(
179
251
  stream: impl Stream<Item = Result<Bytes, reqwest::Error>> + Send + 'static,
180
252
  fallback_model: String,
181
253
  profile: BackendProfile,
254
+ compat_mode: CompatMode,
182
255
  ) -> impl Stream<Item = Result<Bytes, std::io::Error>> + Send {
183
256
  async_stream::stream! {
184
257
  let mut buffer = String::new();
@@ -186,8 +259,11 @@ fn create_sse_stream(
186
259
  let mut current_model = None;
187
260
  let mut next_content_index = 0usize;
188
261
  let mut has_sent_message_start = false;
262
+ let mut has_sent_message_delta = false;
263
+ let mut has_sent_message_stop = false;
189
264
  let mut active_block: Option<ActiveBlock> = None;
190
265
  let mut tool_states: BTreeMap<usize, ToolCallState> = BTreeMap::new();
266
+ let mut think_filter = ThinkTagStreamFilter::default();
191
267
 
192
268
  pin!(stream);
193
269
 
@@ -232,7 +308,24 @@ fn create_sse_stream(
232
308
  };
233
309
 
234
310
  if data.trim() == "[DONE]" {
235
- yield Ok(Bytes::from(message_stop_sse()));
311
+ if let Some(previous) = active_block.take() {
312
+ yield Ok(Bytes::from(stop_block_sse(previous.index())));
313
+ }
314
+ if has_sent_message_start && !has_sent_message_delta {
315
+ let event = anthropic::StreamEvent::MessageDelta {
316
+ delta: anthropic::MessageDeltaData {
317
+ stop_reason: Some("end_turn".to_string()),
318
+ stop_sequence: (),
319
+ },
320
+ usage: None,
321
+ };
322
+ yield Ok(Bytes::from(sse_event("message_delta", &event)));
323
+ has_sent_message_delta = true;
324
+ }
325
+ if has_sent_message_start && !has_sent_message_stop {
326
+ yield Ok(Bytes::from(message_stop_sse()));
327
+ has_sent_message_stop = true;
328
+ }
236
329
  continue;
237
330
  }
238
331
 
@@ -273,44 +366,68 @@ fn create_sse_stream(
273
366
 
274
367
  if let Some(reasoning) = &choice.delta.reasoning {
275
368
  if !reasoning.is_empty() {
276
- if !profile.supports_reasoning() {
369
+ if !profile.supports_reasoning() && compat_mode.is_strict() {
277
370
  yield Ok(Bytes::from(stream_error_sse(
278
371
  "reasoning deltas are not supported by the active backend profile",
279
372
  )));
280
373
  break;
281
374
  }
282
375
 
283
- let (idx, transitions) = transition_to_thinking(
284
- &mut active_block,
285
- &mut next_content_index,
286
- );
287
- for event in transitions {
288
- yield Ok(Bytes::from(event));
376
+ if profile.supports_reasoning() {
377
+ let (idx, transitions) = transition_to_thinking(
378
+ &mut active_block,
379
+ &mut next_content_index,
380
+ );
381
+ for event in transitions {
382
+ yield Ok(Bytes::from(event));
383
+ }
384
+ yield Ok(Bytes::from(delta_block_sse(
385
+ idx,
386
+ anthropic::ContentBlockDeltaData::ThinkingDelta {
387
+ thinking: reasoning.clone(),
388
+ },
389
+ )));
289
390
  }
290
- yield Ok(Bytes::from(delta_block_sse(
291
- idx,
292
- anthropic::ContentBlockDeltaData::ThinkingDelta {
293
- thinking: reasoning.clone(),
294
- },
295
- )));
296
391
  }
297
392
  }
298
393
 
299
394
  if let Some(content) = &choice.delta.content {
300
395
  if !content.is_empty() {
301
- let (idx, transitions) = transition_to_text(
302
- &mut active_block,
303
- &mut next_content_index,
304
- );
305
- for event in transitions {
306
- yield Ok(Bytes::from(event));
396
+ let (embedded_reasoning, visible_text) = think_filter.push(content);
397
+
398
+ if profile.supports_reasoning() {
399
+ for reasoning in embedded_reasoning {
400
+ let (idx, transitions) = transition_to_thinking(
401
+ &mut active_block,
402
+ &mut next_content_index,
403
+ );
404
+ for event in transitions {
405
+ yield Ok(Bytes::from(event));
406
+ }
407
+ yield Ok(Bytes::from(delta_block_sse(
408
+ idx,
409
+ anthropic::ContentBlockDeltaData::ThinkingDelta {
410
+ thinking: reasoning,
411
+ },
412
+ )));
413
+ }
414
+ }
415
+
416
+ if !visible_text.is_empty() {
417
+ let (idx, transitions) = transition_to_text(
418
+ &mut active_block,
419
+ &mut next_content_index,
420
+ );
421
+ for event in transitions {
422
+ yield Ok(Bytes::from(event));
423
+ }
424
+ yield Ok(Bytes::from(delta_block_sse(
425
+ idx,
426
+ anthropic::ContentBlockDeltaData::TextDelta {
427
+ text: visible_text,
428
+ },
429
+ )));
307
430
  }
308
- yield Ok(Bytes::from(delta_block_sse(
309
- idx,
310
- anthropic::ContentBlockDeltaData::TextDelta {
311
- text: content.clone(),
312
- },
313
- )));
314
431
  }
315
432
  }
316
433
 
@@ -343,6 +460,9 @@ fn create_sse_stream(
343
460
  }
344
461
  }
345
462
  } else if active_block != Some(ActiveBlock::ToolUse(tool_index, state.content_index.unwrap())) {
463
+ if !compat_mode.is_strict() {
464
+ continue;
465
+ }
346
466
  yield Ok(Bytes::from(stream_error_sse(
347
467
  "interleaved tool call deltas are not supported safely",
348
468
  )));
@@ -381,6 +501,11 @@ fn create_sse_stream(
381
501
  }),
382
502
  };
383
503
  yield Ok(Bytes::from(sse_event("message_delta", &event)));
504
+ has_sent_message_delta = true;
505
+ if !has_sent_message_stop {
506
+ yield Ok(Bytes::from(message_stop_sse()));
507
+ has_sent_message_stop = true;
508
+ }
384
509
  }
385
510
  }
386
511
  }
@@ -393,14 +518,57 @@ fn create_sse_stream(
393
518
  }
394
519
  }
395
520
  }
521
+
522
+ let (embedded_reasoning, visible_tail) = think_filter.finish();
523
+ if profile.supports_reasoning() {
524
+ for reasoning in embedded_reasoning {
525
+ let (idx, transitions) =
526
+ transition_to_thinking(&mut active_block, &mut next_content_index);
527
+ for event in transitions {
528
+ yield Ok(Bytes::from(event));
529
+ }
530
+ yield Ok(Bytes::from(delta_block_sse(
531
+ idx,
532
+ anthropic::ContentBlockDeltaData::ThinkingDelta { thinking: reasoning },
533
+ )));
534
+ }
535
+ }
536
+ if !visible_tail.is_empty() {
537
+ let (idx, transitions) = transition_to_text(&mut active_block, &mut next_content_index);
538
+ for event in transitions {
539
+ yield Ok(Bytes::from(event));
540
+ }
541
+ yield Ok(Bytes::from(delta_block_sse(
542
+ idx,
543
+ anthropic::ContentBlockDeltaData::TextDelta { text: visible_tail },
544
+ )));
545
+ }
546
+ if let Some(previous) = active_block.take() {
547
+ yield Ok(Bytes::from(stop_block_sse(previous.index())));
548
+ }
549
+ if has_sent_message_start && !has_sent_message_delta {
550
+ let event = anthropic::StreamEvent::MessageDelta {
551
+ delta: anthropic::MessageDeltaData {
552
+ stop_reason: Some("end_turn".to_string()),
553
+ stop_sequence: (),
554
+ },
555
+ usage: None,
556
+ };
557
+ yield Ok(Bytes::from(sse_event("message_delta", &event)));
558
+ }
559
+ if has_sent_message_start && !has_sent_message_stop {
560
+ yield Ok(Bytes::from(message_stop_sse()));
561
+ }
396
562
  }
397
563
  }
398
564
 
399
565
  pub struct Config {
400
566
  pub backend_url: String,
401
567
  pub backend_profile: BackendProfile,
402
- pub model: String,
568
+ pub compat_mode: CompatMode,
569
+ pub primary_model: String,
403
570
  pub reasoning_model: Option<String>,
571
+ pub fallback_models: Vec<String>,
404
572
  pub api_key: Option<String>,
405
573
  pub ingress_api_key: Option<String>,
406
574
  pub allow_origins: Vec<String>,
@@ -409,6 +577,33 @@ pub struct Config {
409
577
 
410
578
  impl Config {
411
579
  pub fn from_env() -> Self {
580
+ let legacy_model = std::env::var("ANTHMORPH_MODEL").ok();
581
+ let primary_model = std::env::var("ANTHMORPH_PRIMARY_MODEL")
582
+ .ok()
583
+ .or_else(|| {
584
+ legacy_model.as_ref().and_then(|value| {
585
+ value
586
+ .split(',')
587
+ .next()
588
+ .map(str::trim)
589
+ .map(ToOwned::to_owned)
590
+ })
591
+ })
592
+ .unwrap_or_else(|| "Qwen/Qwen3.5-397B-A17B-TEE".to_string());
593
+ let fallback_models = std::env::var("ANTHMORPH_FALLBACK_MODELS")
594
+ .ok()
595
+ .or_else(|| legacy_model.clone())
596
+ .map(|value| {
597
+ value
598
+ .split(',')
599
+ .map(str::trim)
600
+ .filter(|s| !s.is_empty())
601
+ .filter(|s| *s != primary_model)
602
+ .map(ToOwned::to_owned)
603
+ .collect()
604
+ })
605
+ .unwrap_or_default();
606
+
412
607
  Self {
413
608
  backend_url: std::env::var("ANTHMORPH_BACKEND_URL")
414
609
  .unwrap_or_else(|_| "https://llm.chutes.ai/v1".to_string()),
@@ -416,11 +611,13 @@ impl Config {
416
611
  .ok()
417
612
  .and_then(|v| v.parse().ok())
418
613
  .unwrap_or(BackendProfile::Chutes),
419
- model: std::env::var("ANTHMORPH_MODEL").unwrap_or_else(|_| {
420
- "Qwen/Qwen3.5-397B-A17B-TEE,zai-org/GLM-5-TEE,deepseek-ai/DeepSeek-V3.2-TEE"
421
- .to_string()
422
- }),
614
+ compat_mode: std::env::var("ANTHMORPH_COMPAT_MODE")
615
+ .ok()
616
+ .and_then(|v| v.parse().ok())
617
+ .unwrap_or(CompatMode::Compat),
618
+ primary_model,
423
619
  reasoning_model: std::env::var("ANTHMORPH_REASONING_MODEL").ok(),
620
+ fallback_models,
424
621
  api_key: std::env::var("ANTHMORPH_API_KEY").ok(),
425
622
  ingress_api_key: std::env::var("ANTHMORPH_INGRESS_API_KEY").ok(),
426
623
  allow_origins: std::env::var("ANTHMORPH_ALLOWED_ORIGINS")
@@ -446,6 +643,10 @@ impl Config {
446
643
  self.backend_url.trim_end_matches('/')
447
644
  )
448
645
  }
646
+
647
+ pub fn models_url(&self) -> String {
648
+ format!("{}/models", self.backend_url.trim_end_matches('/'))
649
+ }
449
650
  }
450
651
 
451
652
  impl fmt::Debug for Config {
@@ -453,8 +654,10 @@ impl fmt::Debug for Config {
453
654
  f.debug_struct("Config")
454
655
  .field("backend_url", &self.backend_url)
455
656
  .field("backend_profile", &self.backend_profile.as_str())
456
- .field("model", &self.model)
657
+ .field("compat_mode", &self.compat_mode.as_str())
658
+ .field("primary_model", &self.primary_model)
457
659
  .field("reasoning_model", &self.reasoning_model)
660
+ .field("fallback_models", &self.fallback_models)
458
661
  .field("api_key", &"<hidden>")
459
662
  .field("ingress_api_key", &"<hidden>")
460
663
  .field("allow_origins", &self.allow_origins)
@@ -486,6 +689,80 @@ struct ToolCallState {
486
689
  content_index: Option<usize>,
487
690
  }
488
691
 
692
+ #[derive(Debug, Default)]
693
+ struct ThinkTagStreamFilter {
694
+ carry: String,
695
+ in_think: bool,
696
+ }
697
+
698
+ impl ThinkTagStreamFilter {
699
+ fn push(&mut self, chunk: &str) -> (Vec<String>, String) {
700
+ let mut reasoning = Vec::new();
701
+ let mut visible = String::new();
702
+ let mut work = format!("{}{}", self.carry, chunk);
703
+ self.carry.clear();
704
+
705
+ loop {
706
+ if self.in_think {
707
+ if let Some(end) = work.find("</think>") {
708
+ let think_text = &work[..end];
709
+ if !think_text.is_empty() {
710
+ reasoning.push(think_text.to_string());
711
+ }
712
+ work = work[end + "</think>".len()..].to_string();
713
+ self.in_think = false;
714
+ continue;
715
+ }
716
+
717
+ let split_at = partial_tag_suffix_start(&work, &["</think>"]);
718
+ if split_at > 0 {
719
+ reasoning.push(work[..split_at].to_string());
720
+ }
721
+ self.carry = work[split_at..].to_string();
722
+ break;
723
+ }
724
+
725
+ if let Some(start) = work.find("<think>") {
726
+ visible.push_str(&work[..start]);
727
+ work = work[start + "<think>".len()..].to_string();
728
+ self.in_think = true;
729
+ continue;
730
+ }
731
+
732
+ let split_at = partial_tag_suffix_start(&work, &["<think>", "</think>"]);
733
+ visible.push_str(&work[..split_at]);
734
+ self.carry = work[split_at..].to_string();
735
+ break;
736
+ }
737
+
738
+ (reasoning, visible)
739
+ }
740
+
741
+ fn finish(&mut self) -> (Vec<String>, String) {
742
+ if self.carry.is_empty() {
743
+ return (Vec::new(), String::new());
744
+ }
745
+
746
+ let leftover = std::mem::take(&mut self.carry);
747
+ if self.in_think {
748
+ self.in_think = false;
749
+ (vec![leftover], String::new())
750
+ } else {
751
+ (Vec::new(), leftover)
752
+ }
753
+ }
754
+ }
755
+
756
+ fn partial_tag_suffix_start(value: &str, tags: &[&str]) -> usize {
757
+ for (start, _) in value.char_indices().rev() {
758
+ let suffix = &value[start..];
759
+ if tags.iter().any(|tag| tag.starts_with(suffix)) {
760
+ return start;
761
+ }
762
+ }
763
+ value.len()
764
+ }
765
+
489
766
  fn transition_to_thinking(
490
767
  active_block: &mut Option<ActiveBlock>,
491
768
  next_content_index: &mut usize,
@@ -734,6 +1011,7 @@ mod tests {
734
1011
  stream::iter(chunks),
735
1012
  "fallback".to_string(),
736
1013
  BackendProfile::Chutes,
1014
+ CompatMode::Strict,
737
1015
  );
738
1016
  tokio::pin!(sse);
739
1017
 
@@ -749,6 +1027,47 @@ mod tests {
749
1027
  assert_eq!(joined.matches("event: content_block_start").count(), 1);
750
1028
  }
751
1029
 
1030
+ #[tokio::test]
1031
+ async fn create_sse_stream_strips_think_tags_for_generic_compat() {
1032
+ let first = serde_json::to_string(&json!({
1033
+ "id": "abc",
1034
+ "model": "minimax",
1035
+ "choices": [{
1036
+ "index": 0,
1037
+ "delta": {
1038
+ "content": "<think>secret</think>visible"
1039
+ },
1040
+ "finish_reason": "stop"
1041
+ }],
1042
+ "usage": {
1043
+ "completion_tokens": 4
1044
+ }
1045
+ }))
1046
+ .unwrap();
1047
+
1048
+ let chunks = vec![
1049
+ Ok(Bytes::from(format!("data: {first}\n\n"))),
1050
+ Ok(Bytes::from("data: [DONE]\n\n")),
1051
+ ];
1052
+
1053
+ let mut output = Vec::new();
1054
+ let sse = create_sse_stream(
1055
+ stream::iter(chunks),
1056
+ "fallback".to_string(),
1057
+ BackendProfile::OpenaiGeneric,
1058
+ CompatMode::Compat,
1059
+ );
1060
+ tokio::pin!(sse);
1061
+
1062
+ while let Some(item) = sse.next().await {
1063
+ output.push(String::from_utf8(item.unwrap().to_vec()).unwrap());
1064
+ }
1065
+
1066
+ let joined = output.join("");
1067
+ assert!(joined.contains("visible"));
1068
+ assert!(!joined.contains("secret"));
1069
+ }
1070
+
752
1071
  #[test]
753
1072
  fn message_start_sse_includes_required_anthropic_fields() {
754
1073
  let event = anthropic::StreamEvent::MessageStart {
@@ -815,8 +1134,10 @@ mod tests {
815
1134
  let config = Config {
816
1135
  backend_url: "https://example.com".to_string(),
817
1136
  backend_profile: BackendProfile::OpenaiGeneric,
818
- model: "model".to_string(),
1137
+ compat_mode: CompatMode::Strict,
1138
+ primary_model: "model".to_string(),
819
1139
  reasoning_model: None,
1140
+ fallback_models: Vec::new(),
820
1141
  api_key: None,
821
1142
  ingress_api_key: Some("secret".to_string()),
822
1143
  allow_origins: Vec::new(),
@@ -843,8 +1164,10 @@ mod tests {
843
1164
  let config = Config {
844
1165
  backend_url: "https://example.com".to_string(),
845
1166
  backend_profile: BackendProfile::OpenaiGeneric,
846
- model: "model".to_string(),
1167
+ compat_mode: CompatMode::Strict,
1168
+ primary_model: "model".to_string(),
847
1169
  reasoning_model: None,
1170
+ fallback_models: Vec::new(),
848
1171
  api_key: None,
849
1172
  ingress_api_key: Some("secret".to_string()),
850
1173
  allow_origins: Vec::new(),
@@ -861,8 +1184,10 @@ mod tests {
861
1184
  let config = Config {
862
1185
  backend_url: "https://example.com".to_string(),
863
1186
  backend_profile: BackendProfile::OpenaiGeneric,
864
- model: "model".to_string(),
1187
+ compat_mode: CompatMode::Strict,
1188
+ primary_model: "model".to_string(),
865
1189
  reasoning_model: None,
1190
+ fallback_models: Vec::new(),
866
1191
  api_key: None,
867
1192
  ingress_api_key: None,
868
1193
  allow_origins: vec!["https://allowed.example".to_string()],