anveesa 0.2.7 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Cargo.lock CHANGED
@@ -54,7 +54,7 @@ dependencies = [
54
54
 
55
55
  [[package]]
56
56
  name = "anveesa"
57
- version = "0.2.7"
57
+ version = "0.2.8"
58
58
  dependencies = [
59
59
  "anyhow",
60
60
  "base64",
package/Cargo.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "anveesa"
3
- version = "0.2.7"
3
+ version = "0.2.8"
4
4
  edition = "2024"
5
5
  default-run = "anveesa"
6
6
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "anveesa",
3
- "version": "0.2.7",
3
+ "version": "0.2.8",
4
4
  "description": "A terminal CLI that wraps AI providers (OpenAI-compatible APIs and local CLIs) into a single unified command",
5
5
  "main": "bin/anveesa.js",
6
6
  "bin": {
package/src/config.rs CHANGED
@@ -22,6 +22,9 @@ kind = "openai-compatible"
22
22
  base_url = "https://openrouter.ai/api/v1"
23
23
  api_key_env = "OPENROUTER_API_KEY"
24
24
  # default_model = "openai/gpt-4.1-mini"
25
+ # Raise the per-response output cap to reduce truncation on long answers.
26
+ # Anveesa continues truncated answers automatically either way.
27
+ # max_tokens = 8192
25
28
 
26
29
  [providers.sumopod]
27
30
  kind = "openai-compatible"
@@ -436,6 +439,12 @@ pub struct OpenAiCompatibleProviderConfig {
436
439
  /// For Anthropic models this also sends the `anthropic-beta: prompt-caching-2024-07-31` header.
437
440
  #[serde(default, skip_serializing_if = "Option::is_none")]
438
441
  pub prompt_cache: Option<bool>,
442
+
443
+ /// Upper bound on tokens the model may generate per response. When unset the
444
+ /// provider default applies. Raising this reduces how often long answers are
445
+ /// truncated by the output limit (Anveesa continues truncated answers either way).
446
+ #[serde(default, skip_serializing_if = "Option::is_none")]
447
+ pub max_tokens: Option<u32>,
439
448
  }
440
449
 
441
450
  #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -472,6 +481,7 @@ fn insert_openai_provider(
472
481
  default_model: None,
473
482
  headers: BTreeMap::new(),
474
483
  prompt_cache: None,
484
+ max_tokens: None,
475
485
  }),
476
486
  );
477
487
  }
@@ -22,6 +22,9 @@ const CONNECT_TIMEOUT: Duration = Duration::from_secs(15);
22
22
  /// How many times the model may call the exact same (tool, arguments) pair before we refuse.
23
23
  const MAX_IDENTICAL_CALLS: usize = 3;
24
24
  const MAX_TOOL_INTENT_REPROMPTS: usize = 2;
25
+ /// How many times we ask the model to continue after its output was cut off by the
26
+ /// provider's token limit (`finish_reason == "length"`) before giving up.
27
+ const MAX_LENGTH_CONTINUATIONS: usize = 8;
25
28
 
26
29
  pub async fn ask(
27
30
  provider_name: &str,
@@ -60,6 +63,7 @@ pub async fn ask(
60
63
  let mut full_text = String::new();
61
64
  let mut last_usage: Option<Usage> = None;
62
65
  let mut tool_intent_reprompts = 0usize;
66
+ let mut length_continuations = 0usize;
63
67
 
64
68
  loop {
65
69
  let _ = events.send(StreamEvent::Status {
@@ -78,6 +82,9 @@ pub async fn ask(
78
82
  if usage_requested {
79
83
  body["stream_options"] = json!({ "include_usage": true });
80
84
  }
85
+ if let Some(max_tokens) = config.max_tokens {
86
+ body["max_tokens"] = json!(max_tokens);
87
+ }
81
88
  if tools_enabled {
82
89
  body["tools"] = json!(tools::definitions(policy.allows_write_tools()));
83
90
  body["tool_choice"] = json!("auto");
@@ -108,6 +115,31 @@ pub async fn ask(
108
115
  last_usage = Some(usage);
109
116
  }
110
117
 
118
+ // The provider cut the response off at its output-token limit. Treating the
119
+ // partial text (or partial tool call) as final is what makes Anveesa appear to
120
+ // "stop suddenly" mid-task — instead, keep what we have and ask it to continue.
121
+ if state.finish_reason.as_deref() == Some("length")
122
+ && length_continuations < MAX_LENGTH_CONTINUATIONS
123
+ {
124
+ length_continuations += 1;
125
+ full_text.push_str(&state.content);
126
+ let _ = events.send(StreamEvent::Status {
127
+ message: "Response hit the output token limit; asking the model to continue"
128
+ .to_string(),
129
+ });
130
+ // Drop any partial tool call: a length-truncated call has incomplete
131
+ // arguments and can't be dispatched. The continuation nudge tells the
132
+ // model to re-issue it.
133
+ if !state.content.is_empty() {
134
+ messages.push(json!({
135
+ "role": "assistant",
136
+ "content": state.content,
137
+ }));
138
+ }
139
+ messages.push(length_continuation_message());
140
+ continue;
141
+ }
142
+
111
143
  if state.tool_calls.is_empty() {
112
144
  if tools_enabled
113
145
  && tool_intent_reprompts < MAX_TOOL_INTENT_REPROMPTS
@@ -457,6 +489,13 @@ fn tool_limit_message(max_tool_rounds: usize) -> Value {
457
489
  })
458
490
  }
459
491
 
492
+ fn length_continuation_message() -> Value {
493
+ json!({
494
+ "role": "system",
495
+ "content": "Your previous response was cut off because it reached the output token limit. Continue from exactly where you left off. Do not repeat text you already produced and do not restart the answer. If you were in the middle of a tool call, re-issue that complete tool call now."
496
+ })
497
+ }
498
+
460
499
  fn tool_intent_reprompt_message() -> Value {
461
500
  json!({
462
501
  "role": "system",
@@ -819,6 +858,7 @@ struct StreamState {
819
858
  content: String,
820
859
  tool_calls: Vec<PartialToolCall>,
821
860
  usage: Option<Usage>,
861
+ finish_reason: Option<String>,
822
862
  done: bool,
823
863
  }
824
864
 
@@ -861,6 +901,13 @@ impl StreamState {
861
901
  let Some(first_choice) = choices.get(0) else {
862
902
  return None;
863
903
  };
904
+
905
+ // `finish_reason` is a sibling of `delta` and only carries a string on the
906
+ // final chunk for the choice (it's null on every intermediate chunk).
907
+ if let Some(reason) = first_choice.get("finish_reason").and_then(Value::as_str) {
908
+ self.finish_reason = Some(reason.to_string());
909
+ }
910
+
864
911
  let Some(delta) = first_choice.get("delta") else {
865
912
  return None;
866
913
  };
@@ -1016,6 +1063,31 @@ mod tests {
1016
1063
  assert_eq!(state.tool_calls[0].arguments, "{\"path\":\"x\"}");
1017
1064
  }
1018
1065
 
1066
+ #[test]
1067
+ fn captures_finish_reason_from_final_chunk() {
1068
+ let mut state = StreamState::default();
1069
+ // Intermediate chunk: finish_reason is null and must not be recorded.
1070
+ state.apply_chunk(&json!({
1071
+ "choices": [{ "delta": { "content": "partial" }, "finish_reason": null }]
1072
+ }));
1073
+ assert_eq!(state.finish_reason, None);
1074
+ // Final chunk reports truncation.
1075
+ state.apply_chunk(&json!({
1076
+ "choices": [{ "delta": {}, "finish_reason": "length" }]
1077
+ }));
1078
+ assert_eq!(state.finish_reason.as_deref(), Some("length"));
1079
+ assert_eq!(state.content, "partial");
1080
+ }
1081
+
1082
+ #[test]
1083
+ fn length_continuation_message_asks_to_resume_without_repeating() {
1084
+ let message = length_continuation_message();
1085
+ assert_eq!(message["role"], json!("system"));
1086
+ let content = message["content"].as_str().unwrap();
1087
+ assert!(content.contains("cut off"));
1088
+ assert!(content.contains("Do not repeat"));
1089
+ }
1090
+
1019
1091
  #[test]
1020
1092
  fn parses_usage_chunk() {
1021
1093
  let mut state = StreamState::default();