@mmmbuto/anthmorph 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/proxy.rs CHANGED
@@ -1,4 +1,4 @@
1
- use crate::config::BackendProfile;
1
+ use crate::config::{BackendProfile, CompatMode};
2
2
  use crate::error::{ProxyError, ProxyResult};
3
3
  use crate::models::{anthropic, openai};
4
4
  use crate::transform::{self, generate_message_id};
@@ -21,12 +21,24 @@ use tower_http::cors::{AllowOrigin, CorsLayer};
21
21
 
22
22
  fn map_model(client_model: &str, config: &Config) -> String {
23
23
  match client_model {
24
- m if m.is_empty() || m == "default" => config.model.clone(),
25
- m if m.starts_with("claude-") => config.model.clone(),
24
+ m if m.is_empty() || m == "default" => config.primary_model.clone(),
25
+ m if m.starts_with("claude-") => config.primary_model.clone(),
26
26
  other => other.to_string(),
27
27
  }
28
28
  }
29
29
 
30
+ fn request_has_thinking(req: &anthropic::AnthropicRequest) -> bool {
31
+ if let Some(thinking) = &req.thinking {
32
+ return !thinking.thinking_type.eq_ignore_ascii_case("disabled");
33
+ }
34
+
35
+ req.extra
36
+ .get("thinking")
37
+ .and_then(|value| value.get("type").and_then(|type_value| type_value.as_str()))
38
+ .map(|value| !value.eq_ignore_ascii_case("disabled"))
39
+ .is_some()
40
+ }
41
+
30
42
  pub async fn proxy_handler(
31
43
  headers: HeaderMap,
32
44
  Extension(config): Extension<Arc<Config>>,
@@ -49,9 +61,12 @@ pub async fn proxy_handler(
49
61
  match &blocks[0] {
50
62
  anthropic::ContentBlock::Text { .. } => "text_block",
51
63
  anthropic::ContentBlock::Image { .. } => "image_block",
64
+ anthropic::ContentBlock::Document { .. } => "document_block",
52
65
  anthropic::ContentBlock::ToolUse { .. } => "tool_use_block",
53
66
  anthropic::ContentBlock::ToolResult { .. } => "tool_result_block",
54
67
  anthropic::ContentBlock::Thinking { .. } => "thinking_block",
68
+ anthropic::ContentBlock::ServerToolUse { .. } => "server_tool_use_block",
69
+ anthropic::ContentBlock::SearchResult { .. } => "search_result_block",
55
70
  anthropic::ContentBlock::Other => "unknown_block",
56
71
  }
57
72
  }
@@ -61,21 +76,17 @@ pub async fn proxy_handler(
61
76
  }
62
77
  tracing::debug!("Streaming: {}", is_streaming);
63
78
 
64
- let model = if req
65
- .extra
66
- .get("thinking")
67
- .and_then(|v| v.get("type"))
68
- .is_some()
69
- {
79
+ let model = if request_has_thinking(&req) {
70
80
  config
71
81
  .reasoning_model
72
82
  .clone()
73
- .unwrap_or_else(|| config.model.clone())
83
+ .unwrap_or_else(|| config.primary_model.clone())
74
84
  } else {
75
85
  map_model(&req.model, &config)
76
86
  };
77
87
 
78
- let openai_req = transform::anthropic_to_openai(req, &model, config.backend_profile)?;
88
+ let openai_req =
89
+ transform::anthropic_to_openai(req, &model, config.backend_profile, config.compat_mode)?;
79
90
 
80
91
  if is_streaming {
81
92
  handle_streaming(config, client, openai_req).await
@@ -84,6 +95,58 @@ pub async fn proxy_handler(
84
95
  }
85
96
  }
86
97
 
98
+ pub async fn count_tokens_handler(
99
+ Extension(config): Extension<Arc<Config>>,
100
+ Json(req): Json<anthropic::AnthropicRequest>,
101
+ ) -> ProxyResult<Json<anthropic::CountTokensResponse>> {
102
+ let model = if request_has_thinking(&req) {
103
+ config
104
+ .reasoning_model
105
+ .clone()
106
+ .unwrap_or_else(|| config.primary_model.clone())
107
+ } else {
108
+ map_model(&req.model, &config)
109
+ };
110
+ let openai_req =
111
+ transform::anthropic_to_openai(req, &model, config.backend_profile, config.compat_mode)?;
112
+ let serialized = serde_json::to_string(&openai_req)?;
113
+ let estimated = std::cmp::max(1, serialized.chars().count() / 4);
114
+ Ok(Json(anthropic::CountTokensResponse {
115
+ input_tokens: estimated,
116
+ }))
117
+ }
118
+
119
+ pub async fn models_handler(
120
+ Extension(config): Extension<Arc<Config>>,
121
+ Extension(client): Extension<Client>,
122
+ ) -> ProxyResult<Response> {
123
+ let url = config.models_url();
124
+ let mut req_builder = client.get(&url).timeout(Duration::from_secs(60));
125
+
126
+ if let Some(api_key) = &config.api_key {
127
+ req_builder = req_builder.header("Authorization", format!("Bearer {}", api_key));
128
+ }
129
+
130
+ let response = req_builder.send().await.map_err(ProxyError::Http)?;
131
+ let status = response.status();
132
+ let body = response.bytes().await.map_err(ProxyError::Http)?;
133
+
134
+ if !status.is_success() {
135
+ return Err(ProxyError::Upstream(format!(
136
+ "Upstream returned {}: {}",
137
+ status,
138
+ String::from_utf8_lossy(&body)
139
+ )));
140
+ }
141
+
142
+ let mut headers = HeaderMap::new();
143
+ headers.insert(
144
+ header::CONTENT_TYPE,
145
+ HeaderValue::from_static("application/json"),
146
+ );
147
+ Ok((headers, body).into_response())
148
+ }
149
+
87
150
  async fn handle_non_streaming(
88
151
  config: Arc<Config>,
89
152
  client: Client,
@@ -120,8 +183,12 @@ async fn handle_non_streaming(
120
183
  }
121
184
 
122
185
  let openai_resp: openai::OpenAIResponse = response.json().await?;
123
- let anthropic_resp =
124
- transform::openai_to_anthropic(openai_resp, &openai_req.model, config.backend_profile)?;
186
+ let anthropic_resp = transform::openai_to_anthropic(
187
+ openai_resp,
188
+ &openai_req.model,
189
+ config.backend_profile,
190
+ config.compat_mode,
191
+ )?;
125
192
 
126
193
  Ok(Json(anthropic_resp).into_response())
127
194
  }
@@ -162,7 +229,12 @@ async fn handle_streaming(
162
229
  }
163
230
 
164
231
  let stream = response.bytes_stream();
165
- let sse_stream = create_sse_stream(stream, openai_req.model.clone(), config.backend_profile);
232
+ let sse_stream = create_sse_stream(
233
+ stream,
234
+ openai_req.model.clone(),
235
+ config.backend_profile,
236
+ config.compat_mode,
237
+ );
166
238
 
167
239
  let mut headers = HeaderMap::new();
168
240
  headers.insert(
@@ -179,6 +251,7 @@ fn create_sse_stream(
179
251
  stream: impl Stream<Item = Result<Bytes, reqwest::Error>> + Send + 'static,
180
252
  fallback_model: String,
181
253
  profile: BackendProfile,
254
+ compat_mode: CompatMode,
182
255
  ) -> impl Stream<Item = Result<Bytes, std::io::Error>> + Send {
183
256
  async_stream::stream! {
184
257
  let mut buffer = String::new();
@@ -186,8 +259,11 @@ fn create_sse_stream(
186
259
  let mut current_model = None;
187
260
  let mut next_content_index = 0usize;
188
261
  let mut has_sent_message_start = false;
262
+ let mut has_sent_message_delta = false;
263
+ let mut has_sent_message_stop = false;
189
264
  let mut active_block: Option<ActiveBlock> = None;
190
265
  let mut tool_states: BTreeMap<usize, ToolCallState> = BTreeMap::new();
266
+ let mut think_filter = ThinkTagStreamFilter::default();
191
267
 
192
268
  pin!(stream);
193
269
 
@@ -232,7 +308,24 @@ fn create_sse_stream(
232
308
  };
233
309
 
234
310
  if data.trim() == "[DONE]" {
235
- yield Ok(Bytes::from(message_stop_sse()));
311
+ if let Some(previous) = active_block.take() {
312
+ yield Ok(Bytes::from(stop_block_sse(previous.index())));
313
+ }
314
+ if has_sent_message_start && !has_sent_message_delta {
315
+ let event = anthropic::StreamEvent::MessageDelta {
316
+ delta: anthropic::MessageDeltaData {
317
+ stop_reason: Some("end_turn".to_string()),
318
+ stop_sequence: (),
319
+ },
320
+ usage: None,
321
+ };
322
+ yield Ok(Bytes::from(sse_event("message_delta", &event)));
323
+ has_sent_message_delta = true;
324
+ }
325
+ if has_sent_message_start && !has_sent_message_stop {
326
+ yield Ok(Bytes::from(message_stop_sse()));
327
+ has_sent_message_stop = true;
328
+ }
236
329
  continue;
237
330
  }
238
331
 
@@ -255,9 +348,12 @@ fn create_sse_stream(
255
348
  id: message_id.clone().unwrap_or_else(generate_message_id),
256
349
  message_type: "message".to_string(),
257
350
  role: "assistant".to_string(),
351
+ content: vec![],
258
352
  model: current_model
259
353
  .clone()
260
354
  .unwrap_or_else(|| fallback_model.clone()),
355
+ stop_reason: None,
356
+ stop_sequence: None,
261
357
  usage: anthropic::Usage {
262
358
  input_tokens: 0,
263
359
  output_tokens: 0,
@@ -270,44 +366,68 @@ fn create_sse_stream(
270
366
 
271
367
  if let Some(reasoning) = &choice.delta.reasoning {
272
368
  if !reasoning.is_empty() {
273
- if !profile.supports_reasoning() {
369
+ if !profile.supports_reasoning() && compat_mode.is_strict() {
274
370
  yield Ok(Bytes::from(stream_error_sse(
275
371
  "reasoning deltas are not supported by the active backend profile",
276
372
  )));
277
373
  break;
278
374
  }
279
375
 
280
- let (idx, transitions) = transition_to_thinking(
281
- &mut active_block,
282
- &mut next_content_index,
283
- );
284
- for event in transitions {
285
- yield Ok(Bytes::from(event));
376
+ if profile.supports_reasoning() {
377
+ let (idx, transitions) = transition_to_thinking(
378
+ &mut active_block,
379
+ &mut next_content_index,
380
+ );
381
+ for event in transitions {
382
+ yield Ok(Bytes::from(event));
383
+ }
384
+ yield Ok(Bytes::from(delta_block_sse(
385
+ idx,
386
+ anthropic::ContentBlockDeltaData::ThinkingDelta {
387
+ thinking: reasoning.clone(),
388
+ },
389
+ )));
286
390
  }
287
- yield Ok(Bytes::from(delta_block_sse(
288
- idx,
289
- anthropic::ContentBlockDeltaData::ThinkingDelta {
290
- thinking: reasoning.clone(),
291
- },
292
- )));
293
391
  }
294
392
  }
295
393
 
296
394
  if let Some(content) = &choice.delta.content {
297
395
  if !content.is_empty() {
298
- let (idx, transitions) = transition_to_text(
299
- &mut active_block,
300
- &mut next_content_index,
301
- );
302
- for event in transitions {
303
- yield Ok(Bytes::from(event));
396
+ let (embedded_reasoning, visible_text) = think_filter.push(content);
397
+
398
+ if profile.supports_reasoning() {
399
+ for reasoning in embedded_reasoning {
400
+ let (idx, transitions) = transition_to_thinking(
401
+ &mut active_block,
402
+ &mut next_content_index,
403
+ );
404
+ for event in transitions {
405
+ yield Ok(Bytes::from(event));
406
+ }
407
+ yield Ok(Bytes::from(delta_block_sse(
408
+ idx,
409
+ anthropic::ContentBlockDeltaData::ThinkingDelta {
410
+ thinking: reasoning,
411
+ },
412
+ )));
413
+ }
414
+ }
415
+
416
+ if !visible_text.is_empty() {
417
+ let (idx, transitions) = transition_to_text(
418
+ &mut active_block,
419
+ &mut next_content_index,
420
+ );
421
+ for event in transitions {
422
+ yield Ok(Bytes::from(event));
423
+ }
424
+ yield Ok(Bytes::from(delta_block_sse(
425
+ idx,
426
+ anthropic::ContentBlockDeltaData::TextDelta {
427
+ text: visible_text,
428
+ },
429
+ )));
304
430
  }
305
- yield Ok(Bytes::from(delta_block_sse(
306
- idx,
307
- anthropic::ContentBlockDeltaData::TextDelta {
308
- text: content.clone(),
309
- },
310
- )));
311
431
  }
312
432
  }
313
433
 
@@ -340,6 +460,9 @@ fn create_sse_stream(
340
460
  }
341
461
  }
342
462
  } else if active_block != Some(ActiveBlock::ToolUse(tool_index, state.content_index.unwrap())) {
463
+ if !compat_mode.is_strict() {
464
+ continue;
465
+ }
343
466
  yield Ok(Bytes::from(stream_error_sse(
344
467
  "interleaved tool call deltas are not supported safely",
345
468
  )));
@@ -378,6 +501,11 @@ fn create_sse_stream(
378
501
  }),
379
502
  };
380
503
  yield Ok(Bytes::from(sse_event("message_delta", &event)));
504
+ has_sent_message_delta = true;
505
+ if !has_sent_message_stop {
506
+ yield Ok(Bytes::from(message_stop_sse()));
507
+ has_sent_message_stop = true;
508
+ }
381
509
  }
382
510
  }
383
511
  }
@@ -390,14 +518,57 @@ fn create_sse_stream(
390
518
  }
391
519
  }
392
520
  }
521
+
522
+ let (embedded_reasoning, visible_tail) = think_filter.finish();
523
+ if profile.supports_reasoning() {
524
+ for reasoning in embedded_reasoning {
525
+ let (idx, transitions) =
526
+ transition_to_thinking(&mut active_block, &mut next_content_index);
527
+ for event in transitions {
528
+ yield Ok(Bytes::from(event));
529
+ }
530
+ yield Ok(Bytes::from(delta_block_sse(
531
+ idx,
532
+ anthropic::ContentBlockDeltaData::ThinkingDelta { thinking: reasoning },
533
+ )));
534
+ }
535
+ }
536
+ if !visible_tail.is_empty() {
537
+ let (idx, transitions) = transition_to_text(&mut active_block, &mut next_content_index);
538
+ for event in transitions {
539
+ yield Ok(Bytes::from(event));
540
+ }
541
+ yield Ok(Bytes::from(delta_block_sse(
542
+ idx,
543
+ anthropic::ContentBlockDeltaData::TextDelta { text: visible_tail },
544
+ )));
545
+ }
546
+ if let Some(previous) = active_block.take() {
547
+ yield Ok(Bytes::from(stop_block_sse(previous.index())));
548
+ }
549
+ if has_sent_message_start && !has_sent_message_delta {
550
+ let event = anthropic::StreamEvent::MessageDelta {
551
+ delta: anthropic::MessageDeltaData {
552
+ stop_reason: Some("end_turn".to_string()),
553
+ stop_sequence: (),
554
+ },
555
+ usage: None,
556
+ };
557
+ yield Ok(Bytes::from(sse_event("message_delta", &event)));
558
+ }
559
+ if has_sent_message_start && !has_sent_message_stop {
560
+ yield Ok(Bytes::from(message_stop_sse()));
561
+ }
393
562
  }
394
563
  }
395
564
 
396
565
  pub struct Config {
397
566
  pub backend_url: String,
398
567
  pub backend_profile: BackendProfile,
399
- pub model: String,
568
+ pub compat_mode: CompatMode,
569
+ pub primary_model: String,
400
570
  pub reasoning_model: Option<String>,
571
+ pub fallback_models: Vec<String>,
401
572
  pub api_key: Option<String>,
402
573
  pub ingress_api_key: Option<String>,
403
574
  pub allow_origins: Vec<String>,
@@ -406,6 +577,33 @@ pub struct Config {
406
577
 
407
578
  impl Config {
408
579
  pub fn from_env() -> Self {
580
+ let legacy_model = std::env::var("ANTHMORPH_MODEL").ok();
581
+ let primary_model = std::env::var("ANTHMORPH_PRIMARY_MODEL")
582
+ .ok()
583
+ .or_else(|| {
584
+ legacy_model.as_ref().and_then(|value| {
585
+ value
586
+ .split(',')
587
+ .next()
588
+ .map(str::trim)
589
+ .map(ToOwned::to_owned)
590
+ })
591
+ })
592
+ .unwrap_or_else(|| "Qwen/Qwen3.5-397B-A17B-TEE".to_string());
593
+ let fallback_models = std::env::var("ANTHMORPH_FALLBACK_MODELS")
594
+ .ok()
595
+ .or_else(|| legacy_model.clone())
596
+ .map(|value| {
597
+ value
598
+ .split(',')
599
+ .map(str::trim)
600
+ .filter(|s| !s.is_empty())
601
+ .filter(|s| *s != primary_model)
602
+ .map(ToOwned::to_owned)
603
+ .collect()
604
+ })
605
+ .unwrap_or_default();
606
+
409
607
  Self {
410
608
  backend_url: std::env::var("ANTHMORPH_BACKEND_URL")
411
609
  .unwrap_or_else(|_| "https://llm.chutes.ai/v1".to_string()),
@@ -413,9 +611,13 @@ impl Config {
413
611
  .ok()
414
612
  .and_then(|v| v.parse().ok())
415
613
  .unwrap_or(BackendProfile::Chutes),
416
- model: std::env::var("ANTHMORPH_MODEL")
417
- .unwrap_or_else(|_| "Qwen/Qwen3-Coder-Next-TEE".to_string()),
614
+ compat_mode: std::env::var("ANTHMORPH_COMPAT_MODE")
615
+ .ok()
616
+ .and_then(|v| v.parse().ok())
617
+ .unwrap_or(CompatMode::Compat),
618
+ primary_model,
418
619
  reasoning_model: std::env::var("ANTHMORPH_REASONING_MODEL").ok(),
620
+ fallback_models,
419
621
  api_key: std::env::var("ANTHMORPH_API_KEY").ok(),
420
622
  ingress_api_key: std::env::var("ANTHMORPH_INGRESS_API_KEY").ok(),
421
623
  allow_origins: std::env::var("ANTHMORPH_ALLOWED_ORIGINS")
@@ -441,6 +643,10 @@ impl Config {
441
643
  self.backend_url.trim_end_matches('/')
442
644
  )
443
645
  }
646
+
647
+ pub fn models_url(&self) -> String {
648
+ format!("{}/models", self.backend_url.trim_end_matches('/'))
649
+ }
444
650
  }
445
651
 
446
652
  impl fmt::Debug for Config {
@@ -448,8 +654,10 @@ impl fmt::Debug for Config {
448
654
  f.debug_struct("Config")
449
655
  .field("backend_url", &self.backend_url)
450
656
  .field("backend_profile", &self.backend_profile.as_str())
451
- .field("model", &self.model)
657
+ .field("compat_mode", &self.compat_mode.as_str())
658
+ .field("primary_model", &self.primary_model)
452
659
  .field("reasoning_model", &self.reasoning_model)
660
+ .field("fallback_models", &self.fallback_models)
453
661
  .field("api_key", &"<hidden>")
454
662
  .field("ingress_api_key", &"<hidden>")
455
663
  .field("allow_origins", &self.allow_origins)
@@ -481,6 +689,80 @@ struct ToolCallState {
481
689
  content_index: Option<usize>,
482
690
  }
483
691
 
692
+ #[derive(Debug, Default)]
693
+ struct ThinkTagStreamFilter {
694
+ carry: String,
695
+ in_think: bool,
696
+ }
697
+
698
+ impl ThinkTagStreamFilter {
699
+ fn push(&mut self, chunk: &str) -> (Vec<String>, String) {
700
+ let mut reasoning = Vec::new();
701
+ let mut visible = String::new();
702
+ let mut work = format!("{}{}", self.carry, chunk);
703
+ self.carry.clear();
704
+
705
+ loop {
706
+ if self.in_think {
707
+ if let Some(end) = work.find("</think>") {
708
+ let think_text = &work[..end];
709
+ if !think_text.is_empty() {
710
+ reasoning.push(think_text.to_string());
711
+ }
712
+ work = work[end + "</think>".len()..].to_string();
713
+ self.in_think = false;
714
+ continue;
715
+ }
716
+
717
+ let split_at = partial_tag_suffix_start(&work, &["</think>"]);
718
+ if split_at > 0 {
719
+ reasoning.push(work[..split_at].to_string());
720
+ }
721
+ self.carry = work[split_at..].to_string();
722
+ break;
723
+ }
724
+
725
+ if let Some(start) = work.find("<think>") {
726
+ visible.push_str(&work[..start]);
727
+ work = work[start + "<think>".len()..].to_string();
728
+ self.in_think = true;
729
+ continue;
730
+ }
731
+
732
+ let split_at = partial_tag_suffix_start(&work, &["<think>", "</think>"]);
733
+ visible.push_str(&work[..split_at]);
734
+ self.carry = work[split_at..].to_string();
735
+ break;
736
+ }
737
+
738
+ (reasoning, visible)
739
+ }
740
+
741
+ fn finish(&mut self) -> (Vec<String>, String) {
742
+ if self.carry.is_empty() {
743
+ return (Vec::new(), String::new());
744
+ }
745
+
746
+ let leftover = std::mem::take(&mut self.carry);
747
+ if self.in_think {
748
+ self.in_think = false;
749
+ (vec![leftover], String::new())
750
+ } else {
751
+ (Vec::new(), leftover)
752
+ }
753
+ }
754
+ }
755
+
756
+ fn partial_tag_suffix_start(value: &str, tags: &[&str]) -> usize {
757
+ for (start, _) in value.char_indices().rev() {
758
+ let suffix = &value[start..];
759
+ if tags.iter().any(|tag| tag.starts_with(suffix)) {
760
+ return start;
761
+ }
762
+ }
763
+ value.len()
764
+ }
765
+
484
766
  fn transition_to_thinking(
485
767
  active_block: &mut Option<ActiveBlock>,
486
768
  next_content_index: &mut usize,
@@ -729,6 +1011,7 @@ mod tests {
729
1011
  stream::iter(chunks),
730
1012
  "fallback".to_string(),
731
1013
  BackendProfile::Chutes,
1014
+ CompatMode::Strict,
732
1015
  );
733
1016
  tokio::pin!(sse);
734
1017
 
@@ -744,6 +1027,102 @@ mod tests {
744
1027
  assert_eq!(joined.matches("event: content_block_start").count(), 1);
745
1028
  }
746
1029
 
1030
+ #[tokio::test]
1031
+ async fn create_sse_stream_strips_think_tags_for_generic_compat() {
1032
+ let first = serde_json::to_string(&json!({
1033
+ "id": "abc",
1034
+ "model": "minimax",
1035
+ "choices": [{
1036
+ "index": 0,
1037
+ "delta": {
1038
+ "content": "<think>secret</think>visible"
1039
+ },
1040
+ "finish_reason": "stop"
1041
+ }],
1042
+ "usage": {
1043
+ "completion_tokens": 4
1044
+ }
1045
+ }))
1046
+ .unwrap();
1047
+
1048
+ let chunks = vec![
1049
+ Ok(Bytes::from(format!("data: {first}\n\n"))),
1050
+ Ok(Bytes::from("data: [DONE]\n\n")),
1051
+ ];
1052
+
1053
+ let mut output = Vec::new();
1054
+ let sse = create_sse_stream(
1055
+ stream::iter(chunks),
1056
+ "fallback".to_string(),
1057
+ BackendProfile::OpenaiGeneric,
1058
+ CompatMode::Compat,
1059
+ );
1060
+ tokio::pin!(sse);
1061
+
1062
+ while let Some(item) = sse.next().await {
1063
+ output.push(String::from_utf8(item.unwrap().to_vec()).unwrap());
1064
+ }
1065
+
1066
+ let joined = output.join("");
1067
+ assert!(joined.contains("visible"));
1068
+ assert!(!joined.contains("secret"));
1069
+ }
1070
+
1071
+ #[test]
1072
+ fn message_start_sse_includes_required_anthropic_fields() {
1073
+ let event = anthropic::StreamEvent::MessageStart {
1074
+ message: anthropic::MessageStartData {
1075
+ id: "msg_test".to_string(),
1076
+ message_type: "message".to_string(),
1077
+ role: "assistant".to_string(),
1078
+ content: vec![],
1079
+ model: "glm-5.1".to_string(),
1080
+ stop_reason: None,
1081
+ stop_sequence: None,
1082
+ usage: anthropic::Usage {
1083
+ input_tokens: 0,
1084
+ output_tokens: 0,
1085
+ },
1086
+ },
1087
+ };
1088
+
1089
+ let serialized = sse_event("message_start", &event);
1090
+ let payload = serialized
1091
+ .lines()
1092
+ .find_map(|line| line.strip_prefix("data: "))
1093
+ .expect("message_start data line");
1094
+ let parsed: serde_json::Value = serde_json::from_str(payload).expect("valid json");
1095
+
1096
+ assert_eq!(parsed["message"]["type"], "message");
1097
+ assert_eq!(parsed["message"]["role"], "assistant");
1098
+ assert_eq!(parsed["message"]["content"], json!([]));
1099
+ assert!(parsed["message"]["stop_reason"].is_null());
1100
+ assert!(parsed["message"]["stop_sequence"].is_null());
1101
+ }
1102
+
1103
+ #[test]
1104
+ fn content_block_start_tool_use_has_flat_anthropic_shape() {
1105
+ let payload = start_block_sse(
1106
+ 0,
1107
+ anthropic::ContentBlockStartData::ToolUse {
1108
+ id: "toolu_123".to_string(),
1109
+ name: "mcp__memory__memory_read".to_string(),
1110
+ input: json!({}),
1111
+ },
1112
+ )
1113
+ .lines()
1114
+ .find_map(|line| line.strip_prefix("data: "))
1115
+ .expect("content_block_start data line")
1116
+ .to_string();
1117
+
1118
+ let parsed: serde_json::Value = serde_json::from_str(&payload).expect("valid json");
1119
+ assert_eq!(parsed["content_block"]["type"], "tool_use");
1120
+ assert_eq!(parsed["content_block"]["id"], "toolu_123");
1121
+ assert_eq!(parsed["content_block"]["name"], "mcp__memory__memory_read");
1122
+ assert_eq!(parsed["content_block"]["input"], json!({}));
1123
+ assert!(parsed["content_block"].get("content_block").is_none());
1124
+ }
1125
+
747
1126
  #[test]
748
1127
  fn extracts_multi_line_sse_data() {
749
1128
  let block = "event: message\ndata: first\ndata: second\n";
@@ -755,8 +1134,10 @@ mod tests {
755
1134
  let config = Config {
756
1135
  backend_url: "https://example.com".to_string(),
757
1136
  backend_profile: BackendProfile::OpenaiGeneric,
758
- model: "model".to_string(),
1137
+ compat_mode: CompatMode::Strict,
1138
+ primary_model: "model".to_string(),
759
1139
  reasoning_model: None,
1140
+ fallback_models: Vec::new(),
760
1141
  api_key: None,
761
1142
  ingress_api_key: Some("secret".to_string()),
762
1143
  allow_origins: Vec::new(),
@@ -783,8 +1164,10 @@ mod tests {
783
1164
  let config = Config {
784
1165
  backend_url: "https://example.com".to_string(),
785
1166
  backend_profile: BackendProfile::OpenaiGeneric,
786
- model: "model".to_string(),
1167
+ compat_mode: CompatMode::Strict,
1168
+ primary_model: "model".to_string(),
787
1169
  reasoning_model: None,
1170
+ fallback_models: Vec::new(),
788
1171
  api_key: None,
789
1172
  ingress_api_key: Some("secret".to_string()),
790
1173
  allow_origins: Vec::new(),
@@ -801,8 +1184,10 @@ mod tests {
801
1184
  let config = Config {
802
1185
  backend_url: "https://example.com".to_string(),
803
1186
  backend_profile: BackendProfile::OpenaiGeneric,
804
- model: "model".to_string(),
1187
+ compat_mode: CompatMode::Strict,
1188
+ primary_model: "model".to_string(),
805
1189
  reasoning_model: None,
1190
+ fallback_models: Vec::new(),
806
1191
  api_key: None,
807
1192
  ingress_api_key: None,
808
1193
  allow_origins: vec!["https://allowed.example".to_string()],