npm - anveesa - Versions diffs - 0.2.7 → 0.2.8 - Mend

anveesa 0.2.7 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/Cargo.lock +1 -1
package/Cargo.toml +1 -1
package/package.json +1 -1
package/src/config.rs +10 -0
package/src/provider/openai_compatible.rs +72 -0

package/Cargo.lock CHANGED Viewed

@@ -54,7 +54,7 @@ dependencies = [
 [[package]]
 name = "anveesa"
-version = "0.2.7"
+version = "0.2.8"
 dependencies = [
  "anyhow",
  "base64",

package/Cargo.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [package]
 name = "anveesa"
-version = "0.2.7"
+version = "0.2.8"
 edition = "2024"
 default-run = "anveesa"

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "anveesa",
-  "version": "0.2.7",
+  "version": "0.2.8",
   "description": "A terminal CLI that wraps AI providers (OpenAI-compatible APIs and local CLIs) into a single unified command",
   "main": "bin/anveesa.js",
   "bin": {

package/src/config.rs CHANGED Viewed

@@ -22,6 +22,9 @@ kind = "openai-compatible"
 base_url = "https://openrouter.ai/api/v1"
 api_key_env = "OPENROUTER_API_KEY"
 # default_model = "openai/gpt-4.1-mini"
+# Raise the per-response output cap to reduce truncation on long answers.
+# Anveesa continues truncated answers automatically either way.
+# max_tokens = 8192
 [providers.sumopod]
 kind = "openai-compatible"
@@ -436,6 +439,12 @@ pub struct OpenAiCompatibleProviderConfig {
     /// For Anthropic models this also sends the `anthropic-beta: prompt-caching-2024-07-31` header.
     #[serde(default, skip_serializing_if = "Option::is_none")]
     pub prompt_cache: Option<bool>,
+    /// Upper bound on tokens the model may generate per response. When unset the
+    /// provider default applies. Raising this reduces how often long answers are
+    /// truncated by the output limit (Anveesa continues truncated answers either way).
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub max_tokens: Option<u32>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -472,6 +481,7 @@ fn insert_openai_provider(
             default_model: None,
             headers: BTreeMap::new(),
             prompt_cache: None,
+            max_tokens: None,
         }),
     );
 }

package/src/provider/openai_compatible.rs CHANGED Viewed

@@ -22,6 +22,9 @@ const CONNECT_TIMEOUT: Duration = Duration::from_secs(15);
 /// How many times the model may call the exact same (tool, arguments) pair before we refuse.
 const MAX_IDENTICAL_CALLS: usize = 3;
 const MAX_TOOL_INTENT_REPROMPTS: usize = 2;
+/// How many times we ask the model to continue after its output was cut off by the
+/// provider's token limit (`finish_reason == "length"`) before giving up.
+const MAX_LENGTH_CONTINUATIONS: usize = 8;
 pub async fn ask(
     provider_name: &str,
@@ -60,6 +63,7 @@ pub async fn ask(
     let mut full_text = String::new();
     let mut last_usage: Option<Usage> = None;
     let mut tool_intent_reprompts = 0usize;
+    let mut length_continuations = 0usize;
     loop {
         let _ = events.send(StreamEvent::Status {
@@ -78,6 +82,9 @@ pub async fn ask(
         if usage_requested {
             body["stream_options"] = json!({ "include_usage": true });
         }
+        if let Some(max_tokens) = config.max_tokens {
+            body["max_tokens"] = json!(max_tokens);
+        }
         if tools_enabled {
             body["tools"] = json!(tools::definitions(policy.allows_write_tools()));
             body["tool_choice"] = json!("auto");
@@ -108,6 +115,31 @@ pub async fn ask(
             last_usage = Some(usage);
         }
+        // The provider cut the response off at its output-token limit. Treating the
+        // partial text (or partial tool call) as final is what makes Anveesa appear to
+        // "stop suddenly" mid-task — instead, keep what we have and ask it to continue.
+        if state.finish_reason.as_deref() == Some("length")
+            && length_continuations < MAX_LENGTH_CONTINUATIONS
+        {
+            length_continuations += 1;
+            full_text.push_str(&state.content);
+            let _ = events.send(StreamEvent::Status {
+                message: "Response hit the output token limit; asking the model to continue"
+                    .to_string(),
+            });
+            // Drop any partial tool call: a length-truncated call has incomplete
+            // arguments and can't be dispatched. The continuation nudge tells the
+            // model to re-issue it.
+            if !state.content.is_empty() {
+                messages.push(json!({
+                    "role": "assistant",
+                    "content": state.content,
+                }));
+            }
+            messages.push(length_continuation_message());
+            continue;
+        }
         if state.tool_calls.is_empty() {
             if tools_enabled
                 && tool_intent_reprompts < MAX_TOOL_INTENT_REPROMPTS
@@ -457,6 +489,13 @@ fn tool_limit_message(max_tool_rounds: usize) -> Value {
     })
 }
+fn length_continuation_message() -> Value {
+    json!({
+        "role": "system",
+        "content": "Your previous response was cut off because it reached the output token limit. Continue from exactly where you left off. Do not repeat text you already produced and do not restart the answer. If you were in the middle of a tool call, re-issue that complete tool call now."
+    })
+}
 fn tool_intent_reprompt_message() -> Value {
     json!({
         "role": "system",
@@ -819,6 +858,7 @@ struct StreamState {
     content: String,
     tool_calls: Vec<PartialToolCall>,
     usage: Option<Usage>,
+    finish_reason: Option<String>,
     done: bool,
 }
@@ -861,6 +901,13 @@ impl StreamState {
         let Some(first_choice) = choices.get(0) else {
             return None;
         };
+        // `finish_reason` is a sibling of `delta` and only carries a string on the
+        // final chunk for the choice (it's null on every intermediate chunk).
+        if let Some(reason) = first_choice.get("finish_reason").and_then(Value::as_str) {
+            self.finish_reason = Some(reason.to_string());
+        }
         let Some(delta) = first_choice.get("delta") else {
             return None;
         };
@@ -1016,6 +1063,31 @@ mod tests {
         assert_eq!(state.tool_calls[0].arguments, "{\"path\":\"x\"}");
     }
+    #[test]
+    fn captures_finish_reason_from_final_chunk() {
+        let mut state = StreamState::default();
+        // Intermediate chunk: finish_reason is null and must not be recorded.
+        state.apply_chunk(&json!({
+            "choices": [{ "delta": { "content": "partial" }, "finish_reason": null }]
+        }));
+        assert_eq!(state.finish_reason, None);
+        // Final chunk reports truncation.
+        state.apply_chunk(&json!({
+            "choices": [{ "delta": {}, "finish_reason": "length" }]
+        }));
+        assert_eq!(state.finish_reason.as_deref(), Some("length"));
+        assert_eq!(state.content, "partial");
+    }
+    #[test]
+    fn length_continuation_message_asks_to_resume_without_repeating() {
+        let message = length_continuation_message();
+        assert_eq!(message["role"], json!("system"));
+        let content = message["content"].as_str().unwrap();
+        assert!(content.contains("cut off"));
+        assert!(content.contains("Do not repeat"));
+    }
     #[test]
     fn parses_usage_chunk() {
         let mut state = StreamState::default();