aru-code 0.19.0__tar.gz → 0.19.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {aru_code-0.19.0/aru_code.egg-info → aru_code-0.19.2}/PKG-INFO +1 -1
  2. aru_code-0.19.2/aru/__init__.py +1 -0
  3. {aru_code-0.19.0 → aru_code-0.19.2}/aru/context.py +53 -15
  4. {aru_code-0.19.0 → aru_code-0.19.2}/aru/providers.py +162 -21
  5. {aru_code-0.19.0 → aru_code-0.19.2/aru_code.egg-info}/PKG-INFO +1 -1
  6. {aru_code-0.19.0 → aru_code-0.19.2}/pyproject.toml +1 -1
  7. aru_code-0.19.0/aru/__init__.py +0 -1
  8. {aru_code-0.19.0 → aru_code-0.19.2}/LICENSE +0 -0
  9. {aru_code-0.19.0 → aru_code-0.19.2}/README.md +0 -0
  10. {aru_code-0.19.0 → aru_code-0.19.2}/aru/agent_factory.py +0 -0
  11. {aru_code-0.19.0 → aru_code-0.19.2}/aru/agents/__init__.py +0 -0
  12. {aru_code-0.19.0 → aru_code-0.19.2}/aru/agents/base.py +0 -0
  13. {aru_code-0.19.0 → aru_code-0.19.2}/aru/agents/executor.py +0 -0
  14. {aru_code-0.19.0 → aru_code-0.19.2}/aru/agents/planner.py +0 -0
  15. {aru_code-0.19.0 → aru_code-0.19.2}/aru/cache_patch.py +0 -0
  16. {aru_code-0.19.0 → aru_code-0.19.2}/aru/cli.py +0 -0
  17. {aru_code-0.19.0 → aru_code-0.19.2}/aru/commands.py +0 -0
  18. {aru_code-0.19.0 → aru_code-0.19.2}/aru/completers.py +0 -0
  19. {aru_code-0.19.0 → aru_code-0.19.2}/aru/config.py +0 -0
  20. {aru_code-0.19.0 → aru_code-0.19.2}/aru/display.py +0 -0
  21. {aru_code-0.19.0 → aru_code-0.19.2}/aru/history_blocks.py +0 -0
  22. {aru_code-0.19.0 → aru_code-0.19.2}/aru/permissions.py +0 -0
  23. {aru_code-0.19.0 → aru_code-0.19.2}/aru/runner.py +0 -0
  24. {aru_code-0.19.0 → aru_code-0.19.2}/aru/runtime.py +0 -0
  25. {aru_code-0.19.0 → aru_code-0.19.2}/aru/session.py +0 -0
  26. {aru_code-0.19.0 → aru_code-0.19.2}/aru/tools/__init__.py +0 -0
  27. {aru_code-0.19.0 → aru_code-0.19.2}/aru/tools/ast_tools.py +0 -0
  28. {aru_code-0.19.0 → aru_code-0.19.2}/aru/tools/codebase.py +0 -0
  29. {aru_code-0.19.0 → aru_code-0.19.2}/aru/tools/gitignore.py +0 -0
  30. {aru_code-0.19.0 → aru_code-0.19.2}/aru/tools/mcp_client.py +0 -0
  31. {aru_code-0.19.0 → aru_code-0.19.2}/aru/tools/ranker.py +0 -0
  32. {aru_code-0.19.0 → aru_code-0.19.2}/aru/tools/tasklist.py +0 -0
  33. {aru_code-0.19.0 → aru_code-0.19.2}/aru_code.egg-info/SOURCES.txt +0 -0
  34. {aru_code-0.19.0 → aru_code-0.19.2}/aru_code.egg-info/dependency_links.txt +0 -0
  35. {aru_code-0.19.0 → aru_code-0.19.2}/aru_code.egg-info/entry_points.txt +0 -0
  36. {aru_code-0.19.0 → aru_code-0.19.2}/aru_code.egg-info/requires.txt +0 -0
  37. {aru_code-0.19.0 → aru_code-0.19.2}/aru_code.egg-info/top_level.txt +0 -0
  38. {aru_code-0.19.0 → aru_code-0.19.2}/setup.cfg +0 -0
  39. {aru_code-0.19.0 → aru_code-0.19.2}/tests/test_agents_base.py +0 -0
  40. {aru_code-0.19.0 → aru_code-0.19.2}/tests/test_cli.py +0 -0
  41. {aru_code-0.19.0 → aru_code-0.19.2}/tests/test_cli_advanced.py +0 -0
  42. {aru_code-0.19.0 → aru_code-0.19.2}/tests/test_cli_base.py +0 -0
  43. {aru_code-0.19.0 → aru_code-0.19.2}/tests/test_cli_completers.py +0 -0
  44. {aru_code-0.19.0 → aru_code-0.19.2}/tests/test_cli_new.py +0 -0
  45. {aru_code-0.19.0 → aru_code-0.19.2}/tests/test_cli_run_cli.py +0 -0
  46. {aru_code-0.19.0 → aru_code-0.19.2}/tests/test_cli_session.py +0 -0
  47. {aru_code-0.19.0 → aru_code-0.19.2}/tests/test_cli_shell.py +0 -0
  48. {aru_code-0.19.0 → aru_code-0.19.2}/tests/test_codebase.py +0 -0
  49. {aru_code-0.19.0 → aru_code-0.19.2}/tests/test_confabulation_regression.py +0 -0
  50. {aru_code-0.19.0 → aru_code-0.19.2}/tests/test_config.py +0 -0
  51. {aru_code-0.19.0 → aru_code-0.19.2}/tests/test_context.py +0 -0
  52. {aru_code-0.19.0 → aru_code-0.19.2}/tests/test_executor.py +0 -0
  53. {aru_code-0.19.0 → aru_code-0.19.2}/tests/test_gitignore.py +0 -0
  54. {aru_code-0.19.0 → aru_code-0.19.2}/tests/test_main.py +0 -0
  55. {aru_code-0.19.0 → aru_code-0.19.2}/tests/test_mcp_client.py +0 -0
  56. {aru_code-0.19.0 → aru_code-0.19.2}/tests/test_permissions.py +0 -0
  57. {aru_code-0.19.0 → aru_code-0.19.2}/tests/test_planner.py +0 -0
  58. {aru_code-0.19.0 → aru_code-0.19.2}/tests/test_providers.py +0 -0
  59. {aru_code-0.19.0 → aru_code-0.19.2}/tests/test_ranker.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aru-code
3
- Version: 0.19.0
3
+ Version: 0.19.2
4
4
  Summary: A Claude Code clone built with Agno agents
5
5
  Author-email: Estevao <estevaofon@gmail.com>
6
6
  License-Expression: MIT
@@ -0,0 +1 @@
1
+ __version__ = "0.19.2"
@@ -47,6 +47,23 @@ TRUNCATE_MAX_LINE_LENGTH = 1500 # chars per individual line (prevents minified
47
47
  # Directory for saving full truncated outputs (like opencode pattern)
48
48
  TRUNCATE_SAVE_DIR = ".aru/truncated"
49
49
 
50
+ # Compaction: chars of recent conversation preserved verbatim post-compact.
51
+ #
52
+ # Separate from the prune protect window (160K) because they measure
53
+ # different things:
54
+ # - Prune protect: "how much tool_result content stays intact"
55
+ # - Compact recent: "how much full-message history stays verbatim after
56
+ # the summary replaces the older portion"
57
+ #
58
+ # Set to 80K chars (~20K tokens) — half the prune window. Rationale:
59
+ # with the compactor now running on the main model (not a small one),
60
+ # summaries are faithful enough that we don't need 40K of recent overlap
61
+ # as a safety net. 20K still covers 3-6 recent turns verbatim, which
62
+ # mirrors the "last few exchanges" a human would re-read to resume work.
63
+ # Going to zero would match opencode exactly but requires the reactive
64
+ # overflow replay flow we haven't implemented yet.
65
+ COMPACT_RECENT_CHARS = 80_000
66
+
50
67
  # Compaction: trigger when per-call input tokens approach real overflow.
51
68
  # Matches opencode's philosophy: only fire near the model's actual context
52
69
  # limit, not routinely. Routine context reduction is handled by prune_history
@@ -491,19 +508,29 @@ def would_prune(history: list[dict], model_id: str = "default") -> bool:
491
508
  def _split_history(history: list[dict], model_id: str = "default") -> tuple[list[dict], list[dict]]:
492
509
  """Split history into old (to summarize) and recent (to keep intact).
493
510
 
494
- Uses the same protection window as pruning. Defensively, the first
495
- user turn (index 0) is always pulled into `recent` so the original
496
- ask survives literal even through a full compaction the compactor
497
- extracts it into the `## Goal` section of the summary, but keeping
498
- it in recent too means the agent can quote it verbatim afterward.
511
+ Uses `COMPACT_RECENT_CHARS` (80K chars 20K tokens) as the "recent"
512
+ budget half of the prune protect window. Rationale: the compactor
513
+ now runs on the main model and produces high-fidelity summaries, so
514
+ we don't need 40K of recent overlap as a safety net. 20K covers 3-6
515
+ recent turns verbatim, which is enough to absorb the gap between
516
+ the last summarized state and the next turn.
517
+
518
+ Defensively, the first user turn (index 0) is always pulled into
519
+ `recent` so the original ask survives literal even through a full
520
+ compaction — the compactor extracts it into the `## Goal` section
521
+ of the summary, but keeping it in recent too means the agent can
522
+ quote it verbatim afterward.
523
+
524
+ The `model_id` parameter is retained for signature compatibility;
525
+ the recent budget is a flat value not scaled by model context.
499
526
  """
527
+ del model_id # unused — recent budget is flat across models
500
528
  from aru.history_blocks import item_char_len
501
- protect_chars = _get_prune_protect_chars(model_id)
502
529
  protected = 0
503
530
  split_idx = len(history)
504
531
  for i in range(len(history) - 1, -1, -1):
505
532
  msg_len = item_char_len(history[i])
506
- if protected + msg_len <= protect_chars:
533
+ if protected + msg_len <= COMPACT_RECENT_CHARS:
507
534
  protected += msg_len
508
535
  split_idx = i
509
536
  else:
@@ -617,10 +644,20 @@ async def compact_conversation(
617
644
  ) -> list[dict[str, str]]:
618
645
  """Run the compaction agent to summarize and replace history.
619
646
 
620
- Uses a small/fast model for the summarization to minimize cost.
621
- Falls back to simple truncation if the agent call fails.
647
+ Uses the **same model** as the main session (`model_ref`), not a
648
+ cheaper small model. Rationale:
649
+
650
+ - Compaction is rare (only on real overflow, ~0-2× per long session).
651
+ - The summary is the *only* persistent record of pre-window history.
652
+ - A weaker compactor risks dropping subtle decisions that the main
653
+ model would have caught — and once dropped, they cannot be recovered
654
+ mid-session.
655
+ - The marginal cost (Sonnet: ~$0.20-0.40 per session; Opus: a few
656
+ dollars) is justified by the fidelity gain on a non-recoverable
657
+ step.
658
+
659
+ Falls back to a mechanical summary if the agent call fails.
622
660
  """
623
- from aru.runtime import get_ctx
624
661
  from aru.providers import create_model
625
662
 
626
663
  prompt = build_compaction_prompt(history, plan_task, model_id=model_id)
@@ -628,16 +665,17 @@ async def compact_conversation(
628
665
  try:
629
666
  from agno.agent import Agent
630
667
 
631
- small_ref = get_ctx().small_model_ref
632
668
  compactor = Agent(
633
669
  name="Compactor",
634
- model=create_model(small_ref, max_tokens=4096),
670
+ model=create_model(model_ref, max_tokens=4096),
635
671
  instructions=(
636
672
  "You summarize coding conversations concisely. Output ONLY the requested sections, no preamble. "
637
673
  "Preserve: user goals, explicit instructions/preferences, file paths with line numbers, "
638
- "function/class names that were modified, what remains to be done, AND verbatim excerpts "
639
- "from any file contents shown in the conversation (signatures, critical constants, "
640
- "bug-related lines) under the '## File contents (key excerpts)' section. "
674
+ "function/class names that were modified, what remains to be done. "
675
+ "For the '## File contents (key excerpts)' section, use your judgment: "
676
+ "if a file was central to the work (being debugged, actively edited, or referenced "
677
+ "in a decision), include the critical lines verbatim; if a file was only briefly "
678
+ "read for context, just list the path. Do not mechanically copy everything. "
641
679
  "Drop: greetings, reasoning chains, redundant tool output, transient status messages."
642
680
  ),
643
681
  markdown=True,
@@ -330,35 +330,162 @@ def create_model(
330
330
  )
331
331
 
332
332
 
333
- def _make_cached_openai_chat_class():
334
- """Create a CachedOpenAIChat subclass (lazy import to avoid top-level dependency)."""
333
+ def _apply_cache_control(formatted_msg: dict) -> bool:
334
+ """Attach `cache_control: ephemeral` to a formatted OpenAI message.
335
+
336
+ Returns True if the marker was applied (i.e., the message had cacheable
337
+ content and wasn't already tagged). Skips messages whose content is not
338
+ a string or block list, messages already marked, and empty content.
339
+
340
+ Used by `CachedOpenAIChat` to tag system + last N user/assistant messages
341
+ for providers that honor OpenAI-style content blocks with `cache_control`
342
+ (DashScope/Qwen, and any OpenAI-compatible endpoint that mirrors the
343
+ Anthropic cache_control convention).
344
+ """
345
+ content = formatted_msg.get("content")
346
+ cache_tag = {"type": "ephemeral"}
347
+ if isinstance(content, str):
348
+ if not content:
349
+ return False
350
+ formatted_msg["content"] = [
351
+ {"type": "text", "text": content, "cache_control": cache_tag}
352
+ ]
353
+ return True
354
+ if isinstance(content, list) and content:
355
+ last = content[-1]
356
+ if isinstance(last, dict) and "cache_control" not in last:
357
+ last["cache_control"] = cache_tag
358
+ return True
359
+ return False
360
+
361
+
362
+ def _make_cached_openai_chat_class(mark_recent_messages: bool = False):
363
+ """Create a CachedOpenAIChat subclass that injects prompt-cache markers.
364
+
365
+ DashScope (Qwen) and other OpenAI-compatible APIs support explicit prompt
366
+ caching via `cache_control: {"type": "ephemeral"}` on content blocks. This
367
+ subclass tags:
368
+
369
+ 1. The **system message** — always. This is the minimum cache coverage
370
+ and is safe for any OpenAI-compatible provider that supports the marker
371
+ (unknown fields are ignored by providers that don't).
372
+
373
+ 2. The **last 2 non-system / non-tool messages** — only when
374
+ `mark_recent_messages=True`. This unlocks prefix caching for the growing
375
+ conversation history (the big win: 5-8× cost reduction on multi-turn
376
+ sessions), but is gated because OpenAI's own API may not accept the
377
+ marker on user/assistant messages. The flag is wired from
378
+ `_create_provider_model` based on whether the provider has a custom
379
+ `base_url` — a strong signal that we're talking to a non-official
380
+ OpenAI endpoint (Qwen/DashScope/custom) that mirrors the Anthropic
381
+ convention.
382
+
383
+ Implementation: each of the 4 invoke methods (invoke/ainvoke plus stream
384
+ variants) pre-formats the full batch using the parent's `_format_message`,
385
+ tags the target messages via `_apply_cache_control`, stores the tagged
386
+ versions in `self._current_cache_tag_map` keyed by `id(original)`, and
387
+ then delegates to `super().<method>()`. The overridden `_format_message`
388
+ consults the map and returns the pre-tagged version when present.
389
+ """
335
390
  from agno.models.openai import OpenAIChat
336
391
  from agno.models.message import Message
337
392
 
338
393
  class CachedOpenAIChat(OpenAIChat):
339
- """OpenAIChat subclass that injects cache_control into system messages.
394
+ _cache_recent_messages: bool = mark_recent_messages
340
395
 
341
- DashScope (Qwen) and other OpenAI-compatible APIs support explicit prompt caching
342
- via cache_control: {"type": "ephemeral"} on content blocks. This subclass
343
- automatically adds that marker to system messages so the provider can cache
344
- the system prompt between turns (up to 90% cost reduction on cached tokens).
345
- """
396
+ # --- core hook ------------------------------------------------------
346
397
 
347
398
  def _format_message(self, message: Message, compress_tool_results: bool = False):
399
+ # If an invoke-level pre-tag map is active, use the tagged version
400
+ tag_map = getattr(self, "_current_cache_tag_map", None)
401
+ if tag_map is not None:
402
+ pre = tag_map.get(id(message))
403
+ if pre is not None:
404
+ return pre
405
+
406
+ # Otherwise fall back to parent format + always-tag system
348
407
  formatted = super()._format_message(message, compress_tool_results)
349
-
350
- if message.role == "system" and isinstance(formatted.get("content"), str):
351
- text = formatted["content"]
352
- formatted["content"] = [
353
- {
354
- "type": "text",
355
- "text": text,
356
- "cache_control": {"type": "ephemeral"},
357
- }
358
- ]
359
-
408
+ if message.role == "system":
409
+ _apply_cache_control(formatted)
360
410
  return formatted
361
411
 
412
+ # --- batch pre-tagging ---------------------------------------------
413
+
414
+ def _build_cache_tag_map(self, messages, compress_tool_results: bool) -> dict:
415
+ """Format all messages up-front and tag system + last 2 recent.
416
+
417
+ Returns id(original_message) -> tagged formatted dict so the
418
+ overridden `_format_message` can substitute during super's
419
+ inline list comprehension.
420
+
421
+ Note: `OpenAIChat._format_message` rewrites `system` → `developer`
422
+ for newer OpenAI models. We check `Message.role` on the ORIGINAL
423
+ message (not the formatted dict) so the logic works regardless of
424
+ that rewrite.
425
+ """
426
+ # Use OpenAIChat's format directly (not self's) so the tag_map
427
+ # we're building doesn't cause recursive substitution.
428
+ base = [
429
+ OpenAIChat._format_message(self, m, compress_tool_results)
430
+ for m in messages
431
+ ]
432
+
433
+ # Tag the first system message (first Message with role=="system")
434
+ for orig, fmt in zip(messages, base):
435
+ if orig.role == "system":
436
+ _apply_cache_control(fmt)
437
+ break
438
+
439
+ # Optionally tag the last 2 non-system / non-tool messages.
440
+ # Iterate original+formatted in reverse so role checks stay
441
+ # on the unmodified Message role.
442
+ if self._cache_recent_messages:
443
+ marked = 0
444
+ for orig, fmt in zip(reversed(messages), reversed(base)):
445
+ if marked >= 2:
446
+ break
447
+ if orig.role in ("system", "tool"):
448
+ continue
449
+ if _apply_cache_control(fmt):
450
+ marked += 1
451
+
452
+ return {id(orig): fmt for orig, fmt in zip(messages, base)}
453
+
454
+ # --- invoke overrides: set up tag map, delegate to parent -----------
455
+
456
+ def invoke(self, messages, assistant_message, **kwargs):
457
+ compress = kwargs.get("compress_tool_results", False)
458
+ self._current_cache_tag_map = self._build_cache_tag_map(messages, compress)
459
+ try:
460
+ return super().invoke(messages, assistant_message, **kwargs)
461
+ finally:
462
+ self._current_cache_tag_map = None
463
+
464
+ async def ainvoke(self, messages, assistant_message, **kwargs):
465
+ compress = kwargs.get("compress_tool_results", False)
466
+ self._current_cache_tag_map = self._build_cache_tag_map(messages, compress)
467
+ try:
468
+ return await super().ainvoke(messages, assistant_message, **kwargs)
469
+ finally:
470
+ self._current_cache_tag_map = None
471
+
472
+ def invoke_stream(self, messages, assistant_message, **kwargs):
473
+ compress = kwargs.get("compress_tool_results", False)
474
+ self._current_cache_tag_map = self._build_cache_tag_map(messages, compress)
475
+ try:
476
+ yield from super().invoke_stream(messages, assistant_message, **kwargs)
477
+ finally:
478
+ self._current_cache_tag_map = None
479
+
480
+ async def ainvoke_stream(self, messages, assistant_message, **kwargs):
481
+ compress = kwargs.get("compress_tool_results", False)
482
+ self._current_cache_tag_map = self._build_cache_tag_map(messages, compress)
483
+ try:
484
+ async for item in super().ainvoke_stream(messages, assistant_message, **kwargs):
485
+ yield item
486
+ finally:
487
+ self._current_cache_tag_map = None
488
+
362
489
  return CachedOpenAIChat
363
490
 
364
491
 
@@ -400,7 +527,14 @@ def _create_provider_model(
400
527
  }
401
528
  params.update(kwargs)
402
529
  if cache_system_prompt:
403
- CachedOpenAIChat = _make_cached_openai_chat_class()
530
+ # Only mark recent messages with cache_control when the provider
531
+ # has a custom base_url (DashScope/Qwen/custom OpenAI-compat).
532
+ # Official OpenAI's API may reject the marker on user/assistant
533
+ # messages — for them, keep system-only caching.
534
+ mark_recent = bool(provider.base_url)
535
+ CachedOpenAIChat = _make_cached_openai_chat_class(
536
+ mark_recent_messages=mark_recent
537
+ )
404
538
  return CachedOpenAIChat(**params)
405
539
  from agno.models.openai import OpenAIChat
406
540
  return OpenAIChat(**params)
@@ -463,7 +597,14 @@ def _create_provider_model(
463
597
  }
464
598
  params.update(kwargs)
465
599
  if cache_system_prompt:
466
- CachedOpenAIChat = _make_cached_openai_chat_class()
600
+ # Fallback branch always means "unknown OpenAI-compat provider"
601
+ # — if there's a base_url it's a custom endpoint that may honor
602
+ # the cache_control marker. Without base_url we're in an odd
603
+ # state (unknown type, no endpoint) — default to system-only.
604
+ mark_recent = bool(provider.base_url)
605
+ CachedOpenAIChat = _make_cached_openai_chat_class(
606
+ mark_recent_messages=mark_recent
607
+ )
467
608
  return CachedOpenAIChat(**params)
468
609
  from agno.models.openai import OpenAIChat
469
610
  return OpenAIChat(**params)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: aru-code
3
- Version: 0.19.0
3
+ Version: 0.19.2
4
4
  Summary: A Claude Code clone built with Agno agents
5
5
  Author-email: Estevao <estevaofon@gmail.com>
6
6
  License-Expression: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "aru-code"
7
- version = "0.19.0"
7
+ version = "0.19.2"
8
8
  description = "A Claude Code clone built with Agno agents"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -1 +0,0 @@
1
- __version__ = "0.19.0"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes