npm - anveesa - Versions diffs - 0.2.6 → 0.2.8 - Mend

anveesa 0.2.6 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/Cargo.lock +1 -1
package/Cargo.toml +1 -1
package/package.json +1 -1
package/src/config.rs +10 -0
package/src/lib.rs +97 -2
package/src/provider/command.rs +4 -0
package/src/provider/mod.rs +9 -0
package/src/provider/openai_compatible.rs +128 -2

package/Cargo.lock CHANGED Viewed

@@ -54,7 +54,7 @@ dependencies = [
 [[package]]
 name = "anveesa"
-version = "0.2.6"
+version = "0.2.8"
 dependencies = [
  "anyhow",
  "base64",

package/Cargo.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [package]
 name = "anveesa"
-version = "0.2.6"
+version = "0.2.8"
 edition = "2024"
 default-run = "anveesa"

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "anveesa",
-  "version": "0.2.6",
+  "version": "0.2.8",
   "description": "A terminal CLI that wraps AI providers (OpenAI-compatible APIs and local CLIs) into a single unified command",
   "main": "bin/anveesa.js",
   "bin": {

package/src/config.rs CHANGED Viewed

@@ -22,6 +22,9 @@ kind = "openai-compatible"
 base_url = "https://openrouter.ai/api/v1"
 api_key_env = "OPENROUTER_API_KEY"
 # default_model = "openai/gpt-4.1-mini"
+# Raise the per-response output cap to reduce truncation on long answers.
+# Anveesa continues truncated answers automatically either way.
+# max_tokens = 8192
 [providers.sumopod]
 kind = "openai-compatible"
@@ -436,6 +439,12 @@ pub struct OpenAiCompatibleProviderConfig {
     /// For Anthropic models this also sends the `anthropic-beta: prompt-caching-2024-07-31` header.
     #[serde(default, skip_serializing_if = "Option::is_none")]
     pub prompt_cache: Option<bool>,
+    /// Upper bound on tokens the model may generate per response. When unset the
+    /// provider default applies. Raising this reduces how often long answers are
+    /// truncated by the output limit (Anveesa continues truncated answers either way).
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub max_tokens: Option<u32>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -472,6 +481,7 @@ fn insert_openai_provider(
             default_model: None,
             headers: BTreeMap::new(),
             prompt_cache: None,
+            max_tokens: None,
         }),
     );
 }

package/src/lib.rs CHANGED Viewed

@@ -326,6 +326,7 @@ async fn render_stream(
     let mut usage: Option<Usage> = None;
     let mut plan_tasks: Vec<String> = vec![];
     let mut plan_done: Vec<bool> = vec![];
+    let mut status_message = "Waiting for provider response".to_string();
     static TIPS: &[&str] = &[
         "Tip: type /clear to reset context",
@@ -337,6 +338,18 @@ async fn render_stream(
     loop {
         tokio::select! {
             maybe = rx.recv() => match maybe {
+                Some(StreamEvent::Status { message }) => {
+                    clear_spinner(spinner, spinner_active);
+                    spinner_active = false;
+                    if line_open {
+                        println!();
+                        line_open = false;
+                    }
+                    status_message = message;
+                    print_status(&status_message, spinner);
+                    first_token = true;
+                    frame = 0;
+                }
                 Some(StreamEvent::Token(text)) => {
                     if first_token {
                         clear_spinner(spinner, spinner_active);
@@ -359,10 +372,27 @@ async fn render_stream(
                         println!();
                         line_open = false;
                     }
+                    status_message = format!("Running {summary}");
                     print_tool_call(&summary, spinner);
                     first_token = true;
                     frame = 0;
                 }
+                Some(StreamEvent::ToolResult { summary, ok, elapsed_ms, error }) => {
+                    clear_spinner(spinner, spinner_active);
+                    spinner_active = false;
+                    if line_open {
+                        println!();
+                        line_open = false;
+                    }
+                    print_tool_result(&summary, ok, elapsed_ms, error.as_deref(), spinner);
+                    status_message = if ok {
+                        "Waiting for the model to continue".to_string()
+                    } else {
+                        "Waiting for the model to handle the tool failure".to_string()
+                    };
+                    first_token = true;
+                    frame = 0;
+                }
                 Some(StreamEvent::Confirm { preview, reply }) => {
                     clear_spinner(spinner, spinner_active);
                     spinner_active = false;
@@ -374,6 +404,20 @@ async fn render_stream(
                         show_confirm_preview(&preview, spinner);
                         prompt_confirm_decision(spinner)
                     });
+                    match decision {
+                        ApprovalDecision::AllowOnce => {
+                            print_status("Approved; applying action", spinner);
+                            status_message = "Applying approved action".to_string();
+                        }
+                        ApprovalDecision::AllowForTurn => {
+                            print_status("Approved all actions for this turn; applying action", spinner);
+                            status_message = "Applying approved action".to_string();
+                        }
+                        ApprovalDecision::Deny => {
+                            print_status("Declined action; returning decision to model", spinner);
+                            status_message = "Waiting for the model to continue".to_string();
+                        }
+                    }
                     let _ = reply.send(decision);
                     // Re-arm the spinner for the next API round.
                     first_token = true;
@@ -428,17 +472,18 @@ async fn render_stream(
                 let dots = ["", ".", "..", "…"][frame % 4];
                 // Tip rotates every 40 frames (~4 s)
                 let tip = TIPS[(frame / 40) % TIPS.len()];
+                let status = truncate_for_status(&status_message, 76);
                 if !spinner_active {
                     // First paint — just print 2 lines (no overwrite needed).
                     eprint!(
-                        "\x1b[1;32m+\x1b[0m Thinking{dots} \x1b[2m({time_str})\x1b[0m\n  \x1b[90m└\x1b[0m \x1b[2m{tip}\x1b[0m"
+                        "\x1b[1;32m+\x1b[0m {status}{dots} \x1b[2m({time_str})\x1b[0m\n  \x1b[90m└\x1b[0m \x1b[2m{tip}\x1b[0m"
                     );
                     spinner_active = true;
                 } else {
                     // Overwrite: move up 1 line, clear both lines, reprint.
                     eprint!(
-                        "\r\x1b[2K\x1b[1A\x1b[2K\r\x1b[1;32m+\x1b[0m Thinking{dots} \x1b[2m({time_str})\x1b[0m\n  \x1b[90m└\x1b[0m \x1b[2m{tip}\x1b[0m"
+                        "\r\x1b[2K\x1b[1A\x1b[2K\r\x1b[1;32m+\x1b[0m {status}{dots} \x1b[2m({time_str})\x1b[0m\n  \x1b[90m└\x1b[0m \x1b[2m{tip}\x1b[0m"
                     );
                 }
                 let _ = io::stderr().flush();
@@ -483,6 +528,33 @@ fn print_tool_call(summary: &str, is_tty: bool) {
     }
 }
+fn print_status(message: &str, is_tty: bool) {
+    if is_tty {
+        eprintln!("\x1b[90m  · {message}\x1b[0m");
+    } else {
+        eprintln!("status: {message}");
+    }
+}
+fn print_tool_result(summary: &str, ok: bool, elapsed_ms: u128, error: Option<&str>, is_tty: bool) {
+    let elapsed = format_duration_ms(elapsed_ms);
+    if is_tty {
+        if ok {
+            eprintln!("\x1b[1;32m  ✓\x1b[0m \x1b[90m{summary} completed in {elapsed}\x1b[0m");
+        } else if let Some(error) = error {
+            eprintln!("\x1b[1;31m  ✗\x1b[0m \x1b[90m{summary} failed in {elapsed}: {error}\x1b[0m");
+        } else {
+            eprintln!("\x1b[1;31m  ✗\x1b[0m \x1b[90m{summary} failed in {elapsed}\x1b[0m");
+        }
+    } else if ok {
+        eprintln!("tool ok: {summary} ({elapsed})");
+    } else if let Some(error) = error {
+        eprintln!("tool failed: {summary} ({elapsed}): {error}");
+    } else {
+        eprintln!("tool failed: {summary} ({elapsed})");
+    }
+}
 fn print_file_op(
     verb: &str,
     path: &str,
@@ -590,6 +662,29 @@ fn format_elapsed(secs: f32) -> String {
     }
 }
+fn format_duration_ms(ms: u128) -> String {
+    if ms >= 1000 {
+        format!("{:.1}s", ms as f64 / 1000.0)
+    } else {
+        format!("{ms}ms")
+    }
+}
+fn truncate_for_status(value: &str, max_chars: usize) -> String {
+    let mut chars = value.chars();
+    let mut output = String::new();
+    for _ in 0..max_chars {
+        let Some(ch) = chars.next() else {
+            return output;
+        };
+        output.push(ch);
+    }
+    if chars.next().is_some() {
+        output.push('…');
+    }
+    output
+}
 fn show_confirm_preview(preview: &ToolConfirmPreview, is_tty: bool) {
     match preview {
         ToolConfirmPreview::FileOp {

package/src/provider/command.rs CHANGED Viewed

@@ -37,6 +37,10 @@ pub async fn ask(
         command.stdin(Stdio::piped());
     }
+    let _ = events.send(StreamEvent::Status {
+        message: format!("Running command provider `{}`", config.command),
+    });
     let mut child = command.spawn().with_context(|| {
         format!(
             "failed to spawn command provider '{}' at {}",

package/src/provider/mod.rs CHANGED Viewed

@@ -112,12 +112,21 @@ pub enum ToolConfirmPreview {
 /// Events streamed from a provider back to the renderer, which owns the terminal.
 #[derive(Debug)]
 pub enum StreamEvent {
+    /// Durable progress/status message for long waits between model/tool phases.
+    Status { message: String },
     /// A chunk of assistant text to display as it arrives.
     Token(String),
     /// Final token accounting for the turn.
     Usage(Usage),
     /// A read-only tool is running. Used to make multi-round inspection visible.
     ToolCall { summary: String },
+    /// A tool finished running. Used to show explicit success/failure after approval.
+    ToolResult {
+        summary: String,
+        ok: bool,
+        elapsed_ms: u128,
+        error: Option<String>,
+    },
     /// A write/run tool needs the user's approval. The renderer shows the
     /// preview, prompts for a decision, and sends it back through the reply channel.
     Confirm {

package/src/provider/openai_compatible.rs CHANGED Viewed

@@ -1,4 +1,4 @@
-use std::time::Duration;
+use std::time::{Duration, Instant};
 use anyhow::{Context, Result, bail};
 use reqwest::header::{AUTHORIZATION, CONTENT_TYPE, HeaderMap, HeaderName, HeaderValue};
@@ -22,6 +22,9 @@ const CONNECT_TIMEOUT: Duration = Duration::from_secs(15);
 /// How many times the model may call the exact same (tool, arguments) pair before we refuse.
 const MAX_IDENTICAL_CALLS: usize = 3;
 const MAX_TOOL_INTENT_REPROMPTS: usize = 2;
+/// How many times we ask the model to continue after its output was cut off by the
+/// provider's token limit (`finish_reason == "length"`) before giving up.
+const MAX_LENGTH_CONTINUATIONS: usize = 8;
 pub async fn ask(
     provider_name: &str,
@@ -60,8 +63,17 @@ pub async fn ask(
     let mut full_text = String::new();
     let mut last_usage: Option<Usage> = None;
     let mut tool_intent_reprompts = 0usize;
+    let mut length_continuations = 0usize;
     loop {
+        let _ = events.send(StreamEvent::Status {
+            message: if tool_rounds == 0 {
+                format!("Waiting for {provider_name} response")
+            } else {
+                format!("Sending tool results to {provider_name}")
+            },
+        });
         let mut body = json!({
             "model": model,
             "messages": messages,
@@ -70,6 +82,9 @@ pub async fn ask(
         if usage_requested {
             body["stream_options"] = json!({ "include_usage": true });
         }
+        if let Some(max_tokens) = config.max_tokens {
+            body["max_tokens"] = json!(max_tokens);
+        }
         if tools_enabled {
             body["tools"] = json!(tools::definitions(policy.allows_write_tools()));
             body["tool_choice"] = json!("auto");
@@ -100,6 +115,31 @@ pub async fn ask(
             last_usage = Some(usage);
         }
+        // The provider cut the response off at its output-token limit. Treating the
+        // partial text (or partial tool call) as final is what makes Anveesa appear to
+        // "stop suddenly" mid-task — instead, keep what we have and ask it to continue.
+        if state.finish_reason.as_deref() == Some("length")
+            && length_continuations < MAX_LENGTH_CONTINUATIONS
+        {
+            length_continuations += 1;
+            full_text.push_str(&state.content);
+            let _ = events.send(StreamEvent::Status {
+                message: "Response hit the output token limit; asking the model to continue"
+                    .to_string(),
+            });
+            // Drop any partial tool call: a length-truncated call has incomplete
+            // arguments and can't be dispatched. The continuation nudge tells the
+            // model to re-issue it.
+            if !state.content.is_empty() {
+                messages.push(json!({
+                    "role": "assistant",
+                    "content": state.content,
+                }));
+            }
+            messages.push(length_continuation_message());
+            continue;
+        }
         if state.tool_calls.is_empty() {
             if tools_enabled
                 && tool_intent_reprompts < MAX_TOOL_INTENT_REPROMPTS
@@ -134,6 +174,10 @@ pub async fn ask(
             }));
         }
+        let _ = events.send(StreamEvent::Status {
+            message: "Tool results sent; waiting for the next model response".to_string(),
+        });
         if tool_rounds >= max_tool_rounds {
             tools_enabled = false;
             messages.push(tool_limit_message(max_tool_rounds));
@@ -163,6 +207,8 @@ async fn dispatch_tool(
     approval_state: &mut ToolApprovalState,
     events: &UnboundedSender<StreamEvent>,
 ) -> String {
+    let summary = tools::describe_call(&call.name, &call.arguments);
     // Plan tools — display only, no approval or filesystem access needed.
     if call.name == "set_plan" {
         if let Ok(args) = serde_json::from_str::<serde_json::Value>(&call.arguments) {
@@ -213,7 +259,7 @@ async fn dispatch_tool(
         }
     } else {
         let _ = events.send(StreamEvent::ToolCall {
-            summary: tools::describe_call(&call.name, &call.arguments),
+            summary: summary.clone(),
         });
     }
@@ -233,9 +279,24 @@ async fn dispatch_tool(
             ApprovalDecision::AllowForTurn => approval_state.allow_for_turn = true,
             ApprovalDecision::Deny => return denied_message("user declined this action"),
         }
+        let _ = events.send(StreamEvent::Status {
+            message: format!("Applying approved action: {summary}"),
+        });
+    } else if tools::is_write_tool(&call.name) {
+        let _ = events.send(StreamEvent::ToolCall {
+            summary: summary.clone(),
+        });
     }
+    let tool_started = Instant::now();
     let result = tools::run(&call.name, &call.arguments).await;
+    let (ok, error) = parse_tool_result_status(&result);
+    let _ = events.send(StreamEvent::ToolResult {
+        summary: summary.clone(),
+        ok,
+        elapsed_ms: tool_started.elapsed().as_millis(),
+        error,
+    });
     // When the user already reviewed the diff in the approval preview, skip the
     // post-run FileOp so the same diff isn't printed twice.
@@ -252,6 +313,18 @@ async fn dispatch_tool(
     result
 }
+fn parse_tool_result_status(result: &str) -> (bool, Option<String>) {
+    let Ok(json) = serde_json::from_str::<Value>(result) else {
+        return (true, None);
+    };
+    let ok = json.get("ok").and_then(Value::as_bool).unwrap_or(true);
+    let error = json
+        .get("error")
+        .and_then(Value::as_str)
+        .map(str::to_string);
+    (ok, error)
+}
 // ── File-op diff helpers ──────────────────────────────────────────────────────
 enum FileOpSnapshot {
@@ -416,6 +489,13 @@ fn tool_limit_message(max_tool_rounds: usize) -> Value {
     })
 }
+fn length_continuation_message() -> Value {
+    json!({
+        "role": "system",
+        "content": "Your previous response was cut off because it reached the output token limit. Continue from exactly where you left off. Do not repeat text you already produced and do not restart the answer. If you were in the middle of a tool call, re-issue that complete tool call now."
+    })
+}
 fn tool_intent_reprompt_message() -> Value {
     json!({
         "role": "system",
@@ -778,6 +858,7 @@ struct StreamState {
     content: String,
     tool_calls: Vec<PartialToolCall>,
     usage: Option<Usage>,
+    finish_reason: Option<String>,
     done: bool,
 }
@@ -820,6 +901,13 @@ impl StreamState {
         let Some(first_choice) = choices.get(0) else {
             return None;
         };
+        // `finish_reason` is a sibling of `delta` and only carries a string on the
+        // final chunk for the choice (it's null on every intermediate chunk).
+        if let Some(reason) = first_choice.get("finish_reason").and_then(Value::as_str) {
+            self.finish_reason = Some(reason.to_string());
+        }
         let Some(delta) = first_choice.get("delta") else {
             return None;
         };
@@ -975,6 +1063,31 @@ mod tests {
         assert_eq!(state.tool_calls[0].arguments, "{\"path\":\"x\"}");
     }
+    #[test]
+    fn captures_finish_reason_from_final_chunk() {
+        let mut state = StreamState::default();
+        // Intermediate chunk: finish_reason is null and must not be recorded.
+        state.apply_chunk(&json!({
+            "choices": [{ "delta": { "content": "partial" }, "finish_reason": null }]
+        }));
+        assert_eq!(state.finish_reason, None);
+        // Final chunk reports truncation.
+        state.apply_chunk(&json!({
+            "choices": [{ "delta": {}, "finish_reason": "length" }]
+        }));
+        assert_eq!(state.finish_reason.as_deref(), Some("length"));
+        assert_eq!(state.content, "partial");
+    }
+    #[test]
+    fn length_continuation_message_asks_to_resume_without_repeating() {
+        let message = length_continuation_message();
+        assert_eq!(message["role"], json!("system"));
+        let content = message["content"].as_str().unwrap();
+        assert!(content.contains("cut off"));
+        assert!(content.contains("Do not repeat"));
+    }
     #[test]
     fn parses_usage_chunk() {
         let mut state = StreamState::default();
@@ -1048,4 +1161,17 @@ mod tests {
         ));
         assert!(!looks_like_unfinished_tool_intent(""));
     }
+    #[test]
+    fn parses_tool_result_status() {
+        assert_eq!(parse_tool_result_status(r#"{"ok":true}"#), (true, None));
+        assert_eq!(
+            parse_tool_result_status(r#"{"ok":false,"error":"boom"}"#),
+            (false, Some("boom".to_string()))
+        );
+        assert_eq!(
+            parse_tool_result_status(r#"{"content":"no explicit ok"}"#),
+            (true, None)
+        );
+    }
 }