anveesa 0.2.6 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Cargo.lock CHANGED
@@ -54,7 +54,7 @@ dependencies = [
54
54
 
55
55
  [[package]]
56
56
  name = "anveesa"
57
- version = "0.2.6"
57
+ version = "0.2.8"
58
58
  dependencies = [
59
59
  "anyhow",
60
60
  "base64",
package/Cargo.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "anveesa"
3
- version = "0.2.6"
3
+ version = "0.2.8"
4
4
  edition = "2024"
5
5
  default-run = "anveesa"
6
6
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "anveesa",
3
- "version": "0.2.6",
3
+ "version": "0.2.8",
4
4
  "description": "A terminal CLI that wraps AI providers (OpenAI-compatible APIs and local CLIs) into a single unified command",
5
5
  "main": "bin/anveesa.js",
6
6
  "bin": {
package/src/config.rs CHANGED
@@ -22,6 +22,9 @@ kind = "openai-compatible"
22
22
  base_url = "https://openrouter.ai/api/v1"
23
23
  api_key_env = "OPENROUTER_API_KEY"
24
24
  # default_model = "openai/gpt-4.1-mini"
25
+ # Raise the per-response output cap to reduce truncation on long answers.
26
+ # Anveesa continues truncated answers automatically either way.
27
+ # max_tokens = 8192
25
28
 
26
29
  [providers.sumopod]
27
30
  kind = "openai-compatible"
@@ -436,6 +439,12 @@ pub struct OpenAiCompatibleProviderConfig {
436
439
  /// For Anthropic models this also sends the `anthropic-beta: prompt-caching-2024-07-31` header.
437
440
  #[serde(default, skip_serializing_if = "Option::is_none")]
438
441
  pub prompt_cache: Option<bool>,
442
+
443
+ /// Upper bound on tokens the model may generate per response. When unset the
444
+ /// provider default applies. Raising this reduces how often long answers are
445
+ /// truncated by the output limit (Anveesa continues truncated answers either way).
446
+ #[serde(default, skip_serializing_if = "Option::is_none")]
447
+ pub max_tokens: Option<u32>,
439
448
  }
440
449
 
441
450
  #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -472,6 +481,7 @@ fn insert_openai_provider(
472
481
  default_model: None,
473
482
  headers: BTreeMap::new(),
474
483
  prompt_cache: None,
484
+ max_tokens: None,
475
485
  }),
476
486
  );
477
487
  }
package/src/lib.rs CHANGED
@@ -326,6 +326,7 @@ async fn render_stream(
326
326
  let mut usage: Option<Usage> = None;
327
327
  let mut plan_tasks: Vec<String> = vec![];
328
328
  let mut plan_done: Vec<bool> = vec![];
329
+ let mut status_message = "Waiting for provider response".to_string();
329
330
 
330
331
  static TIPS: &[&str] = &[
331
332
  "Tip: type /clear to reset context",
@@ -337,6 +338,18 @@ async fn render_stream(
337
338
  loop {
338
339
  tokio::select! {
339
340
  maybe = rx.recv() => match maybe {
341
+ Some(StreamEvent::Status { message }) => {
342
+ clear_spinner(spinner, spinner_active);
343
+ spinner_active = false;
344
+ if line_open {
345
+ println!();
346
+ line_open = false;
347
+ }
348
+ status_message = message;
349
+ print_status(&status_message, spinner);
350
+ first_token = true;
351
+ frame = 0;
352
+ }
340
353
  Some(StreamEvent::Token(text)) => {
341
354
  if first_token {
342
355
  clear_spinner(spinner, spinner_active);
@@ -359,10 +372,27 @@ async fn render_stream(
359
372
  println!();
360
373
  line_open = false;
361
374
  }
375
+ status_message = format!("Running {summary}");
362
376
  print_tool_call(&summary, spinner);
363
377
  first_token = true;
364
378
  frame = 0;
365
379
  }
380
+ Some(StreamEvent::ToolResult { summary, ok, elapsed_ms, error }) => {
381
+ clear_spinner(spinner, spinner_active);
382
+ spinner_active = false;
383
+ if line_open {
384
+ println!();
385
+ line_open = false;
386
+ }
387
+ print_tool_result(&summary, ok, elapsed_ms, error.as_deref(), spinner);
388
+ status_message = if ok {
389
+ "Waiting for the model to continue".to_string()
390
+ } else {
391
+ "Waiting for the model to handle the tool failure".to_string()
392
+ };
393
+ first_token = true;
394
+ frame = 0;
395
+ }
366
396
  Some(StreamEvent::Confirm { preview, reply }) => {
367
397
  clear_spinner(spinner, spinner_active);
368
398
  spinner_active = false;
@@ -374,6 +404,20 @@ async fn render_stream(
374
404
  show_confirm_preview(&preview, spinner);
375
405
  prompt_confirm_decision(spinner)
376
406
  });
407
+ match decision {
408
+ ApprovalDecision::AllowOnce => {
409
+ print_status("Approved; applying action", spinner);
410
+ status_message = "Applying approved action".to_string();
411
+ }
412
+ ApprovalDecision::AllowForTurn => {
413
+ print_status("Approved all actions for this turn; applying action", spinner);
414
+ status_message = "Applying approved action".to_string();
415
+ }
416
+ ApprovalDecision::Deny => {
417
+ print_status("Declined action; returning decision to model", spinner);
418
+ status_message = "Waiting for the model to continue".to_string();
419
+ }
420
+ }
377
421
  let _ = reply.send(decision);
378
422
  // Re-arm the spinner for the next API round.
379
423
  first_token = true;
@@ -428,17 +472,18 @@ async fn render_stream(
428
472
  let dots = ["", ".", "..", "…"][frame % 4];
429
473
  // Tip rotates every 40 frames (~4 s)
430
474
  let tip = TIPS[(frame / 40) % TIPS.len()];
475
+ let status = truncate_for_status(&status_message, 76);
431
476
 
432
477
  if !spinner_active {
433
478
  // First paint — just print 2 lines (no overwrite needed).
434
479
  eprint!(
435
- "\x1b[1;32m+\x1b[0m Thinking{dots} \x1b[2m({time_str})\x1b[0m\n \x1b[90m└\x1b[0m \x1b[2m{tip}\x1b[0m"
480
+ "\x1b[1;32m+\x1b[0m {status}{dots} \x1b[2m({time_str})\x1b[0m\n \x1b[90m└\x1b[0m \x1b[2m{tip}\x1b[0m"
436
481
  );
437
482
  spinner_active = true;
438
483
  } else {
439
484
  // Overwrite: move up 1 line, clear both lines, reprint.
440
485
  eprint!(
441
- "\r\x1b[2K\x1b[1A\x1b[2K\r\x1b[1;32m+\x1b[0m Thinking{dots} \x1b[2m({time_str})\x1b[0m\n \x1b[90m└\x1b[0m \x1b[2m{tip}\x1b[0m"
486
+ "\r\x1b[2K\x1b[1A\x1b[2K\r\x1b[1;32m+\x1b[0m {status}{dots} \x1b[2m({time_str})\x1b[0m\n \x1b[90m└\x1b[0m \x1b[2m{tip}\x1b[0m"
442
487
  );
443
488
  }
444
489
  let _ = io::stderr().flush();
@@ -483,6 +528,33 @@ fn print_tool_call(summary: &str, is_tty: bool) {
483
528
  }
484
529
  }
485
530
 
531
+ fn print_status(message: &str, is_tty: bool) {
532
+ if is_tty {
533
+ eprintln!("\x1b[90m · {message}\x1b[0m");
534
+ } else {
535
+ eprintln!("status: {message}");
536
+ }
537
+ }
538
+
539
+ fn print_tool_result(summary: &str, ok: bool, elapsed_ms: u128, error: Option<&str>, is_tty: bool) {
540
+ let elapsed = format_duration_ms(elapsed_ms);
541
+ if is_tty {
542
+ if ok {
543
+ eprintln!("\x1b[1;32m ✓\x1b[0m \x1b[90m{summary} completed in {elapsed}\x1b[0m");
544
+ } else if let Some(error) = error {
545
+ eprintln!("\x1b[1;31m ✗\x1b[0m \x1b[90m{summary} failed in {elapsed}: {error}\x1b[0m");
546
+ } else {
547
+ eprintln!("\x1b[1;31m ✗\x1b[0m \x1b[90m{summary} failed in {elapsed}\x1b[0m");
548
+ }
549
+ } else if ok {
550
+ eprintln!("tool ok: {summary} ({elapsed})");
551
+ } else if let Some(error) = error {
552
+ eprintln!("tool failed: {summary} ({elapsed}): {error}");
553
+ } else {
554
+ eprintln!("tool failed: {summary} ({elapsed})");
555
+ }
556
+ }
557
+
486
558
  fn print_file_op(
487
559
  verb: &str,
488
560
  path: &str,
@@ -590,6 +662,29 @@ fn format_elapsed(secs: f32) -> String {
590
662
  }
591
663
  }
592
664
 
665
+ fn format_duration_ms(ms: u128) -> String {
666
+ if ms >= 1000 {
667
+ format!("{:.1}s", ms as f64 / 1000.0)
668
+ } else {
669
+ format!("{ms}ms")
670
+ }
671
+ }
672
+
673
+ fn truncate_for_status(value: &str, max_chars: usize) -> String {
674
+ let mut chars = value.chars();
675
+ let mut output = String::new();
676
+ for _ in 0..max_chars {
677
+ let Some(ch) = chars.next() else {
678
+ return output;
679
+ };
680
+ output.push(ch);
681
+ }
682
+ if chars.next().is_some() {
683
+ output.push('…');
684
+ }
685
+ output
686
+ }
687
+
593
688
  fn show_confirm_preview(preview: &ToolConfirmPreview, is_tty: bool) {
594
689
  match preview {
595
690
  ToolConfirmPreview::FileOp {
@@ -37,6 +37,10 @@ pub async fn ask(
37
37
  command.stdin(Stdio::piped());
38
38
  }
39
39
 
40
+ let _ = events.send(StreamEvent::Status {
41
+ message: format!("Running command provider `{}`", config.command),
42
+ });
43
+
40
44
  let mut child = command.spawn().with_context(|| {
41
45
  format!(
42
46
  "failed to spawn command provider '{}' at {}",
@@ -112,12 +112,21 @@ pub enum ToolConfirmPreview {
112
112
  /// Events streamed from a provider back to the renderer, which owns the terminal.
113
113
  #[derive(Debug)]
114
114
  pub enum StreamEvent {
115
+ /// Durable progress/status message for long waits between model/tool phases.
116
+ Status { message: String },
115
117
  /// A chunk of assistant text to display as it arrives.
116
118
  Token(String),
117
119
  /// Final token accounting for the turn.
118
120
  Usage(Usage),
119
121
  /// A read-only tool is running. Used to make multi-round inspection visible.
120
122
  ToolCall { summary: String },
123
+ /// A tool finished running. Used to show explicit success/failure after approval.
124
+ ToolResult {
125
+ summary: String,
126
+ ok: bool,
127
+ elapsed_ms: u128,
128
+ error: Option<String>,
129
+ },
121
130
  /// A write/run tool needs the user's approval. The renderer shows the
122
131
  /// preview, prompts for a decision, and sends it back through the reply channel.
123
132
  Confirm {
@@ -1,4 +1,4 @@
1
- use std::time::Duration;
1
+ use std::time::{Duration, Instant};
2
2
 
3
3
  use anyhow::{Context, Result, bail};
4
4
  use reqwest::header::{AUTHORIZATION, CONTENT_TYPE, HeaderMap, HeaderName, HeaderValue};
@@ -22,6 +22,9 @@ const CONNECT_TIMEOUT: Duration = Duration::from_secs(15);
22
22
  /// How many times the model may call the exact same (tool, arguments) pair before we refuse.
23
23
  const MAX_IDENTICAL_CALLS: usize = 3;
24
24
  const MAX_TOOL_INTENT_REPROMPTS: usize = 2;
25
+ /// How many times we ask the model to continue after its output was cut off by the
26
+ /// provider's token limit (`finish_reason == "length"`) before giving up.
27
+ const MAX_LENGTH_CONTINUATIONS: usize = 8;
25
28
 
26
29
  pub async fn ask(
27
30
  provider_name: &str,
@@ -60,8 +63,17 @@ pub async fn ask(
60
63
  let mut full_text = String::new();
61
64
  let mut last_usage: Option<Usage> = None;
62
65
  let mut tool_intent_reprompts = 0usize;
66
+ let mut length_continuations = 0usize;
63
67
 
64
68
  loop {
69
+ let _ = events.send(StreamEvent::Status {
70
+ message: if tool_rounds == 0 {
71
+ format!("Waiting for {provider_name} response")
72
+ } else {
73
+ format!("Sending tool results to {provider_name}")
74
+ },
75
+ });
76
+
65
77
  let mut body = json!({
66
78
  "model": model,
67
79
  "messages": messages,
@@ -70,6 +82,9 @@ pub async fn ask(
70
82
  if usage_requested {
71
83
  body["stream_options"] = json!({ "include_usage": true });
72
84
  }
85
+ if let Some(max_tokens) = config.max_tokens {
86
+ body["max_tokens"] = json!(max_tokens);
87
+ }
73
88
  if tools_enabled {
74
89
  body["tools"] = json!(tools::definitions(policy.allows_write_tools()));
75
90
  body["tool_choice"] = json!("auto");
@@ -100,6 +115,31 @@ pub async fn ask(
100
115
  last_usage = Some(usage);
101
116
  }
102
117
 
118
+ // The provider cut the response off at its output-token limit. Treating the
119
+ // partial text (or partial tool call) as final is what makes Anveesa appear to
120
+ // "stop suddenly" mid-task — instead, keep what we have and ask it to continue.
121
+ if state.finish_reason.as_deref() == Some("length")
122
+ && length_continuations < MAX_LENGTH_CONTINUATIONS
123
+ {
124
+ length_continuations += 1;
125
+ full_text.push_str(&state.content);
126
+ let _ = events.send(StreamEvent::Status {
127
+ message: "Response hit the output token limit; asking the model to continue"
128
+ .to_string(),
129
+ });
130
+ // Drop any partial tool call: a length-truncated call has incomplete
131
+ // arguments and can't be dispatched. The continuation nudge tells the
132
+ // model to re-issue it.
133
+ if !state.content.is_empty() {
134
+ messages.push(json!({
135
+ "role": "assistant",
136
+ "content": state.content,
137
+ }));
138
+ }
139
+ messages.push(length_continuation_message());
140
+ continue;
141
+ }
142
+
103
143
  if state.tool_calls.is_empty() {
104
144
  if tools_enabled
105
145
  && tool_intent_reprompts < MAX_TOOL_INTENT_REPROMPTS
@@ -134,6 +174,10 @@ pub async fn ask(
134
174
  }));
135
175
  }
136
176
 
177
+ let _ = events.send(StreamEvent::Status {
178
+ message: "Tool results sent; waiting for the next model response".to_string(),
179
+ });
180
+
137
181
  if tool_rounds >= max_tool_rounds {
138
182
  tools_enabled = false;
139
183
  messages.push(tool_limit_message(max_tool_rounds));
@@ -163,6 +207,8 @@ async fn dispatch_tool(
163
207
  approval_state: &mut ToolApprovalState,
164
208
  events: &UnboundedSender<StreamEvent>,
165
209
  ) -> String {
210
+ let summary = tools::describe_call(&call.name, &call.arguments);
211
+
166
212
  // Plan tools — display only, no approval or filesystem access needed.
167
213
  if call.name == "set_plan" {
168
214
  if let Ok(args) = serde_json::from_str::<serde_json::Value>(&call.arguments) {
@@ -213,7 +259,7 @@ async fn dispatch_tool(
213
259
  }
214
260
  } else {
215
261
  let _ = events.send(StreamEvent::ToolCall {
216
- summary: tools::describe_call(&call.name, &call.arguments),
262
+ summary: summary.clone(),
217
263
  });
218
264
  }
219
265
 
@@ -233,9 +279,24 @@ async fn dispatch_tool(
233
279
  ApprovalDecision::AllowForTurn => approval_state.allow_for_turn = true,
234
280
  ApprovalDecision::Deny => return denied_message("user declined this action"),
235
281
  }
282
+ let _ = events.send(StreamEvent::Status {
283
+ message: format!("Applying approved action: {summary}"),
284
+ });
285
+ } else if tools::is_write_tool(&call.name) {
286
+ let _ = events.send(StreamEvent::ToolCall {
287
+ summary: summary.clone(),
288
+ });
236
289
  }
237
290
 
291
+ let tool_started = Instant::now();
238
292
  let result = tools::run(&call.name, &call.arguments).await;
293
+ let (ok, error) = parse_tool_result_status(&result);
294
+ let _ = events.send(StreamEvent::ToolResult {
295
+ summary: summary.clone(),
296
+ ok,
297
+ elapsed_ms: tool_started.elapsed().as_millis(),
298
+ error,
299
+ });
239
300
 
240
301
  // When the user already reviewed the diff in the approval preview, skip the
241
302
  // post-run FileOp so the same diff isn't printed twice.
@@ -252,6 +313,18 @@ async fn dispatch_tool(
252
313
  result
253
314
  }
254
315
 
316
+ fn parse_tool_result_status(result: &str) -> (bool, Option<String>) {
317
+ let Ok(json) = serde_json::from_str::<Value>(result) else {
318
+ return (true, None);
319
+ };
320
+ let ok = json.get("ok").and_then(Value::as_bool).unwrap_or(true);
321
+ let error = json
322
+ .get("error")
323
+ .and_then(Value::as_str)
324
+ .map(str::to_string);
325
+ (ok, error)
326
+ }
327
+
255
328
  // ── File-op diff helpers ──────────────────────────────────────────────────────
256
329
 
257
330
  enum FileOpSnapshot {
@@ -416,6 +489,13 @@ fn tool_limit_message(max_tool_rounds: usize) -> Value {
416
489
  })
417
490
  }
418
491
 
492
+ fn length_continuation_message() -> Value {
493
+ json!({
494
+ "role": "system",
495
+ "content": "Your previous response was cut off because it reached the output token limit. Continue from exactly where you left off. Do not repeat text you already produced and do not restart the answer. If you were in the middle of a tool call, re-issue that complete tool call now."
496
+ })
497
+ }
498
+
419
499
  fn tool_intent_reprompt_message() -> Value {
420
500
  json!({
421
501
  "role": "system",
@@ -778,6 +858,7 @@ struct StreamState {
778
858
  content: String,
779
859
  tool_calls: Vec<PartialToolCall>,
780
860
  usage: Option<Usage>,
861
+ finish_reason: Option<String>,
781
862
  done: bool,
782
863
  }
783
864
 
@@ -820,6 +901,13 @@ impl StreamState {
820
901
  let Some(first_choice) = choices.get(0) else {
821
902
  return None;
822
903
  };
904
+
905
+ // `finish_reason` is a sibling of `delta` and only carries a string on the
906
+ // final chunk for the choice (it's null on every intermediate chunk).
907
+ if let Some(reason) = first_choice.get("finish_reason").and_then(Value::as_str) {
908
+ self.finish_reason = Some(reason.to_string());
909
+ }
910
+
823
911
  let Some(delta) = first_choice.get("delta") else {
824
912
  return None;
825
913
  };
@@ -975,6 +1063,31 @@ mod tests {
975
1063
  assert_eq!(state.tool_calls[0].arguments, "{\"path\":\"x\"}");
976
1064
  }
977
1065
 
1066
+ #[test]
1067
+ fn captures_finish_reason_from_final_chunk() {
1068
+ let mut state = StreamState::default();
1069
+ // Intermediate chunk: finish_reason is null and must not be recorded.
1070
+ state.apply_chunk(&json!({
1071
+ "choices": [{ "delta": { "content": "partial" }, "finish_reason": null }]
1072
+ }));
1073
+ assert_eq!(state.finish_reason, None);
1074
+ // Final chunk reports truncation.
1075
+ state.apply_chunk(&json!({
1076
+ "choices": [{ "delta": {}, "finish_reason": "length" }]
1077
+ }));
1078
+ assert_eq!(state.finish_reason.as_deref(), Some("length"));
1079
+ assert_eq!(state.content, "partial");
1080
+ }
1081
+
1082
+ #[test]
1083
+ fn length_continuation_message_asks_to_resume_without_repeating() {
1084
+ let message = length_continuation_message();
1085
+ assert_eq!(message["role"], json!("system"));
1086
+ let content = message["content"].as_str().unwrap();
1087
+ assert!(content.contains("cut off"));
1088
+ assert!(content.contains("Do not repeat"));
1089
+ }
1090
+
978
1091
  #[test]
979
1092
  fn parses_usage_chunk() {
980
1093
  let mut state = StreamState::default();
@@ -1048,4 +1161,17 @@ mod tests {
1048
1161
  ));
1049
1162
  assert!(!looks_like_unfinished_tool_intent(""));
1050
1163
  }
1164
+
1165
+ #[test]
1166
+ fn parses_tool_result_status() {
1167
+ assert_eq!(parse_tool_result_status(r#"{"ok":true}"#), (true, None));
1168
+ assert_eq!(
1169
+ parse_tool_result_status(r#"{"ok":false,"error":"boom"}"#),
1170
+ (false, Some("boom".to_string()))
1171
+ );
1172
+ assert_eq!(
1173
+ parse_tool_result_status(r#"{"content":"no explicit ok"}"#),
1174
+ (true, None)
1175
+ );
1176
+ }
1051
1177
  }