nexcoder 0.1.4__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nexcoder-0.1.4 → nexcoder-0.1.6}/CHANGELOG.md +23 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/PKG-INFO +1 -1
- {nexcoder-0.1.4 → nexcoder-0.1.6}/pyproject.toml +1 -1
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/__init__.py +1 -3
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/agent.py +359 -9
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/api_client.py +42 -11
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/cli.py +9 -1
- {nexcoder-0.1.4 → nexcoder-0.1.6}/.github/workflows/ci.yml +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/.github/workflows/publish.yml +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/.gitignore +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/LICENSE +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/README.md +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/config.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/context.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/exceptions.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/indexer/__init__.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/indexer/index.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/indexer/parser.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/indexer/scanner.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/memory/__init__.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/memory/decisions.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/memory/errors.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/memory/project.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/planner.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/py.typed +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/reviewer.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/safety.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/test_runner.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/tools/__init__.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/tools/file_ops.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/tools/git_ops.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/tools/search.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/tools/shell.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/tests/conftest.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/tests/test_agent.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/tests/test_chat.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/tests/test_cli.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/tests/test_context.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/tests/test_errors.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/tests/test_indexer.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/tests/test_memory.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/tests/test_rate_limiter.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/tests/test_safety.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/tests/test_test_runner.py +0 -0
- {nexcoder-0.1.4 → nexcoder-0.1.6}/tests/test_tools.py +0 -0
|
@@ -5,6 +5,29 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.1.6] — 2026-02-25
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
|
|
12
|
+
- Subtask decomposition in `nex chat` — each user turn is decomposed via the planner with scoped context per subtask
|
|
13
|
+
- Rate limiting for `nex chat` — all API calls (planner, subtask loops, memory updates) go through the rate limiter
|
|
14
|
+
- Memory updates in `nex chat` — Session Log written after each subtask, with pruning
|
|
15
|
+
- Fallback to direct mode in chat if the planner fails
|
|
16
|
+
|
|
17
|
+
### Changed
|
|
18
|
+
|
|
19
|
+
- Default `token_rate_limit` set to 20,000 (was 0) — rate limiting enabled out of the box
|
|
20
|
+
- `token_rate_limit = 20000` included in `nex init` config template
|
|
21
|
+
- 429 rate limit retries now wait for `retry-after` header (default 60s) instead of short 2/4/8s backoff
|
|
22
|
+
- `max_retries` increased from 3 to 5 for better rate limit recovery
|
|
23
|
+
- Planner API call now goes through the rate limiter (was ungated)
|
|
24
|
+
- Memory update API call now goes through the rate limiter (was ungated)
|
|
25
|
+
|
|
26
|
+
### Fixed
|
|
27
|
+
|
|
28
|
+
- 429 rate limit errors crashing the agent — retries were too short (14s total) for a 60s rate limit window
|
|
29
|
+
- Memory updates silently failing due to rate limits — now warns visibly instead of `except: pass`
|
|
30
|
+
|
|
8
31
|
## [0.1.1] — 2026-02-25
|
|
9
32
|
|
|
10
33
|
### Changed
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nexcoder
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.6
|
|
4
4
|
Summary: The coding agent that remembers — AI coding assistant with persistent memory and error learning.
|
|
5
5
|
Project-URL: Homepage, https://github.com/nex-ai/nex-ai
|
|
6
6
|
Project-URL: Repository, https://github.com/nex-ai/nex-ai
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "nexcoder"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.6"
|
|
8
8
|
description = "The coding agent that remembers — AI coding assistant with persistent memory and error learning."
|
|
9
9
|
readme = {file = "README.md", content-type = "text/markdown"}
|
|
10
10
|
license = "MIT"
|
|
@@ -800,11 +800,12 @@ class Agent:
|
|
|
800
800
|
|
|
801
801
|
|
|
802
802
|
class ChatSession:
|
|
803
|
-
"""Interactive chat session with
|
|
803
|
+
"""Interactive chat session with subtask decomposition.
|
|
804
804
|
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
805
|
+
Each user turn is decomposed into subtasks via the planner.
|
|
806
|
+
Each subtask runs with scoped context in its own mini loop,
|
|
807
|
+
keeping the main chat history clean. Falls back to direct
|
|
808
|
+
execution if the planner fails.
|
|
808
809
|
"""
|
|
809
810
|
|
|
810
811
|
def __init__(
|
|
@@ -815,6 +816,13 @@ class ChatSession:
|
|
|
815
816
|
safety: SafetyLayer,
|
|
816
817
|
dry_run: bool = False,
|
|
817
818
|
max_iterations: int = 25,
|
|
819
|
+
rate_limiter: RateLimiter | None = None,
|
|
820
|
+
memory: ProjectMemory | None = None,
|
|
821
|
+
haiku_model: str = "claude-haiku-4-5-20251001",
|
|
822
|
+
assembler: ContextAssembler | None = None,
|
|
823
|
+
error_patterns: list[Any] | None = None,
|
|
824
|
+
index: Any | None = None,
|
|
825
|
+
subtask_token_budget: int = 20_000,
|
|
818
826
|
) -> None:
|
|
819
827
|
"""Initialize a chat session.
|
|
820
828
|
|
|
@@ -825,6 +833,13 @@ class ChatSession:
|
|
|
825
833
|
safety: Safety layer for command approval.
|
|
826
834
|
dry_run: If True, skip destructive operations.
|
|
827
835
|
max_iterations: Max tool calls per user turn.
|
|
836
|
+
rate_limiter: Optional rate limiter for API calls.
|
|
837
|
+
memory: Optional project memory for session log updates.
|
|
838
|
+
haiku_model: Model for memory summary and planning calls.
|
|
839
|
+
assembler: Context assembler for building scoped subtask prompts.
|
|
840
|
+
error_patterns: Past error patterns for subtask context.
|
|
841
|
+
index: Code index for relevant code selection.
|
|
842
|
+
subtask_token_budget: Token budget per subtask context.
|
|
828
843
|
"""
|
|
829
844
|
self._client = api_client
|
|
830
845
|
self._system_prompt = system_prompt
|
|
@@ -832,6 +847,13 @@ class ChatSession:
|
|
|
832
847
|
self._safety = safety
|
|
833
848
|
self._dry_run = dry_run
|
|
834
849
|
self._max_iterations = max_iterations
|
|
850
|
+
self._rate_limiter = rate_limiter
|
|
851
|
+
self._memory = memory
|
|
852
|
+
self._haiku_model = haiku_model
|
|
853
|
+
self._assembler = assembler
|
|
854
|
+
self._error_patterns = error_patterns or []
|
|
855
|
+
self._index = index
|
|
856
|
+
self._subtask_token_budget = subtask_token_budget
|
|
835
857
|
self._messages: list[dict[str, Any]] = []
|
|
836
858
|
self._turn_count = 0
|
|
837
859
|
self._files_modified = False
|
|
@@ -852,7 +874,11 @@ class ChatSession:
|
|
|
852
874
|
return self._files_modified
|
|
853
875
|
|
|
854
876
|
async def send(self, user_message: str) -> str:
|
|
855
|
-
"""Process a
|
|
877
|
+
"""Process a user turn via subtask decomposition.
|
|
878
|
+
|
|
879
|
+
Decomposes the message into subtasks, executes each with
|
|
880
|
+
scoped context, and appends a summary to chat history. Falls
|
|
881
|
+
back to direct execution if the planner fails.
|
|
856
882
|
|
|
857
883
|
Args:
|
|
858
884
|
user_message: The user's message text.
|
|
@@ -861,28 +887,254 @@ class ChatSession:
|
|
|
861
887
|
The assistant's final text response for this turn.
|
|
862
888
|
"""
|
|
863
889
|
self._turn_count += 1
|
|
890
|
+
|
|
891
|
+
# Try subtask decomposition if assembler is available
|
|
892
|
+
if self._assembler is not None:
|
|
893
|
+
try:
|
|
894
|
+
return await self._send_with_subtasks(user_message)
|
|
895
|
+
except Exception as exc:
|
|
896
|
+
console.print(
|
|
897
|
+
f"[yellow]Subtask decomposition failed ({exc}), using direct mode...[/yellow]"
|
|
898
|
+
)
|
|
899
|
+
|
|
900
|
+
return await self._send_direct(user_message)
|
|
901
|
+
|
|
902
|
+
async def _send_with_subtasks(self, user_message: str) -> str:
|
|
903
|
+
"""Decompose user message into subtasks and execute each.
|
|
904
|
+
|
|
905
|
+
Each subtask runs in its own mini loop with scoped context.
|
|
906
|
+
Results are collected and a combined response is appended to
|
|
907
|
+
the main chat history.
|
|
908
|
+
|
|
909
|
+
Args:
|
|
910
|
+
user_message: The user's message text.
|
|
911
|
+
|
|
912
|
+
Returns:
|
|
913
|
+
Combined response from all subtasks.
|
|
914
|
+
"""
|
|
915
|
+
assert self._assembler is not None
|
|
916
|
+
|
|
917
|
+
project_memory = self._memory.load() if self._memory else ""
|
|
918
|
+
|
|
919
|
+
# Decompose via planner
|
|
920
|
+
planner = Planner(self._client, haiku_model=self._haiku_model)
|
|
921
|
+
console.print("\n[bold]Decomposing into subtasks...[/bold]")
|
|
922
|
+
subtasks = await planner.plan(user_message, project_memory, self._rate_limiter)
|
|
923
|
+
|
|
924
|
+
console.print(
|
|
925
|
+
f"[dim]Subtasks: {len(subtasks)} | Max iterations: {self._max_iterations}[/dim]\n"
|
|
926
|
+
)
|
|
927
|
+
|
|
928
|
+
iters_per_subtask = max(5, self._max_iterations // max(len(subtasks), 1))
|
|
929
|
+
|
|
930
|
+
prior_context = ""
|
|
931
|
+
subtask_results: list[str] = []
|
|
932
|
+
|
|
933
|
+
for i, subtask in enumerate(subtasks, 1):
|
|
934
|
+
console.print(
|
|
935
|
+
Panel(
|
|
936
|
+
f"[bold]{subtask.description}[/bold]\n"
|
|
937
|
+
f"[dim]Files: {', '.join(subtask.file_paths) or 'auto'}[/dim]",
|
|
938
|
+
title=f"[bold cyan]Subtask {i}/{len(subtasks)}[/bold cyan]",
|
|
939
|
+
border_style="cyan",
|
|
940
|
+
)
|
|
941
|
+
)
|
|
942
|
+
|
|
943
|
+
# Build scoped context for this subtask
|
|
944
|
+
scoped_code = self._assembler.select_scoped_code(
|
|
945
|
+
subtask.file_paths,
|
|
946
|
+
subtask.description,
|
|
947
|
+
self._index,
|
|
948
|
+
self._subtask_token_budget,
|
|
949
|
+
)
|
|
950
|
+
|
|
951
|
+
system_prompt = self._assembler.build_subtask_prompt(
|
|
952
|
+
subtask_description=subtask.description,
|
|
953
|
+
project_memory=project_memory,
|
|
954
|
+
error_patterns=self._error_patterns,
|
|
955
|
+
relevant_code=scoped_code,
|
|
956
|
+
prior_context=prior_context,
|
|
957
|
+
)
|
|
958
|
+
|
|
959
|
+
# Run subtask in a mini loop with fresh messages
|
|
960
|
+
sub_result = await self._run_subtask_loop(
|
|
961
|
+
system_prompt=system_prompt,
|
|
962
|
+
task=subtask.description,
|
|
963
|
+
max_iterations=iters_per_subtask,
|
|
964
|
+
)
|
|
965
|
+
|
|
966
|
+
subtask_results.append(sub_result.text)
|
|
967
|
+
|
|
968
|
+
# Update memory after each subtask
|
|
969
|
+
if self._memory is not None:
|
|
970
|
+
await self._update_memory_for_subtask(subtask.description, sub_result)
|
|
971
|
+
self._memory.prune_section("Session Log")
|
|
972
|
+
project_memory = self._memory.load()
|
|
973
|
+
|
|
974
|
+
prior_context += f"- Subtask {i}: {subtask.description} -> {sub_result.text[:200]}\n"
|
|
975
|
+
|
|
976
|
+
combined = "\n\n".join(subtask_results) if subtask_results else "No subtasks completed."
|
|
977
|
+
|
|
978
|
+
# Append to main chat history as a clean user/assistant pair
|
|
979
|
+
self._messages.append({"role": "user", "content": user_message})
|
|
980
|
+
self._messages.append(
|
|
981
|
+
{"role": "assistant", "content": [{"type": "text", "text": combined}]}
|
|
982
|
+
)
|
|
983
|
+
|
|
984
|
+
return combined
|
|
985
|
+
|
|
986
|
+
async def _run_subtask_loop(
|
|
987
|
+
self,
|
|
988
|
+
system_prompt: str,
|
|
989
|
+
task: str,
|
|
990
|
+
max_iterations: int,
|
|
991
|
+
) -> SubtaskResult:
|
|
992
|
+
"""Run a focused mini loop for a single subtask.
|
|
993
|
+
|
|
994
|
+
Uses fresh message history to keep each subtask isolated.
|
|
995
|
+
|
|
996
|
+
Args:
|
|
997
|
+
system_prompt: Scoped system prompt for this subtask.
|
|
998
|
+
task: The subtask description.
|
|
999
|
+
max_iterations: Max iterations for this subtask.
|
|
1000
|
+
|
|
1001
|
+
Returns:
|
|
1002
|
+
SubtaskResult with text, files, and iteration count.
|
|
1003
|
+
"""
|
|
1004
|
+
messages: list[dict[str, Any]] = [{"role": "user", "content": task}]
|
|
1005
|
+
iteration = 0
|
|
1006
|
+
files_touched: list[str] = []
|
|
1007
|
+
|
|
1008
|
+
while iteration < max_iterations:
|
|
1009
|
+
iteration += 1
|
|
1010
|
+
console.print(f"[dim]--- Subtask iteration {iteration} ---[/dim]")
|
|
1011
|
+
|
|
1012
|
+
if self._rate_limiter is not None:
|
|
1013
|
+
estimated = ContextAssembler.estimate_tokens(system_prompt + task)
|
|
1014
|
+
await self._rate_limiter.wait_if_needed(estimated)
|
|
1015
|
+
|
|
1016
|
+
response = await self._client.send_message(
|
|
1017
|
+
messages=messages,
|
|
1018
|
+
system=system_prompt,
|
|
1019
|
+
tools=TOOL_DEFINITIONS,
|
|
1020
|
+
)
|
|
1021
|
+
|
|
1022
|
+
if self._rate_limiter is not None:
|
|
1023
|
+
self._rate_limiter.record(response.input_tokens)
|
|
1024
|
+
|
|
1025
|
+
console.print(
|
|
1026
|
+
f"[dim]Tokens: {response.input_tokens} in / "
|
|
1027
|
+
f"{response.output_tokens} out | "
|
|
1028
|
+
f"Cost: ${self._client.usage.estimated_cost:.4f}[/dim]"
|
|
1029
|
+
)
|
|
1030
|
+
|
|
1031
|
+
cost = self._client.usage.estimated_cost
|
|
1032
|
+
if cost > 5.0:
|
|
1033
|
+
console.print(
|
|
1034
|
+
"[bold red]Warning:[/bold red] Session has exceeded $5.00 in API costs"
|
|
1035
|
+
)
|
|
1036
|
+
elif cost > 1.0:
|
|
1037
|
+
console.print(
|
|
1038
|
+
"[bold yellow]Warning:[/bold yellow] Session has exceeded $1.00 in API costs"
|
|
1039
|
+
)
|
|
1040
|
+
|
|
1041
|
+
has_tool_use = any(block.get("type") == "tool_use" for block in response.content)
|
|
1042
|
+
|
|
1043
|
+
if not has_tool_use:
|
|
1044
|
+
text = ""
|
|
1045
|
+
for block in response.content:
|
|
1046
|
+
if block.get("type") == "text":
|
|
1047
|
+
text = str(block.get("text", ""))
|
|
1048
|
+
break
|
|
1049
|
+
return SubtaskResult(text=text, files_touched=files_touched, iterations=iteration)
|
|
1050
|
+
|
|
1051
|
+
assistant_content = response.content
|
|
1052
|
+
messages.append({"role": "assistant", "content": assistant_content})
|
|
1053
|
+
|
|
1054
|
+
tool_results: list[dict[str, Any]] = []
|
|
1055
|
+
for block in assistant_content:
|
|
1056
|
+
if block.get("type") == "tool_use":
|
|
1057
|
+
tool_name = block["name"]
|
|
1058
|
+
tool_input = block["input"]
|
|
1059
|
+
tool_id = block["id"]
|
|
1060
|
+
|
|
1061
|
+
summary = _summarize_input(tool_input)
|
|
1062
|
+
console.print(f" [cyan]Tool:[/cyan] {tool_name}({summary})")
|
|
1063
|
+
|
|
1064
|
+
result, modified = await execute_tool(
|
|
1065
|
+
tool_name,
|
|
1066
|
+
tool_input,
|
|
1067
|
+
self._project_dir,
|
|
1068
|
+
self._safety,
|
|
1069
|
+
self._dry_run,
|
|
1070
|
+
)
|
|
1071
|
+
if modified:
|
|
1072
|
+
self._files_modified = True
|
|
1073
|
+
if tool_name == "write_file" and "path" in tool_input:
|
|
1074
|
+
files_touched.append(tool_input["path"])
|
|
1075
|
+
|
|
1076
|
+
if result.success:
|
|
1077
|
+
console.print(f" [green]OK[/green] ({len(result.output)} chars)")
|
|
1078
|
+
else:
|
|
1079
|
+
console.print(f" [red]Error:[/red] {result.error}")
|
|
1080
|
+
|
|
1081
|
+
content = result.output if result.success else f"Error: {result.error}"
|
|
1082
|
+
tool_results.append(
|
|
1083
|
+
{
|
|
1084
|
+
"type": "tool_result",
|
|
1085
|
+
"tool_use_id": tool_id,
|
|
1086
|
+
"content": content,
|
|
1087
|
+
"is_error": not result.success,
|
|
1088
|
+
}
|
|
1089
|
+
)
|
|
1090
|
+
|
|
1091
|
+
messages.append({"role": "user", "content": tool_results})
|
|
1092
|
+
|
|
1093
|
+
return SubtaskResult(
|
|
1094
|
+
text="Subtask did not complete within iteration limit.",
|
|
1095
|
+
files_touched=files_touched,
|
|
1096
|
+
iterations=iteration,
|
|
1097
|
+
)
|
|
1098
|
+
|
|
1099
|
+
async def _send_direct(self, user_message: str) -> str:
|
|
1100
|
+
"""Process a user turn directly without subtask decomposition.
|
|
1101
|
+
|
|
1102
|
+
Fallback mode when planner is unavailable or fails.
|
|
1103
|
+
|
|
1104
|
+
Args:
|
|
1105
|
+
user_message: The user's message text.
|
|
1106
|
+
|
|
1107
|
+
Returns:
|
|
1108
|
+
The assistant's final text response.
|
|
1109
|
+
"""
|
|
864
1110
|
self._messages.append({"role": "user", "content": user_message})
|
|
865
1111
|
|
|
866
1112
|
iterations = 0
|
|
867
1113
|
final_text = ""
|
|
1114
|
+
turn_files: list[str] = []
|
|
868
1115
|
|
|
869
1116
|
while iterations < self._max_iterations:
|
|
870
1117
|
iterations += 1
|
|
871
1118
|
|
|
1119
|
+
if self._rate_limiter is not None:
|
|
1120
|
+
estimated = ContextAssembler.estimate_tokens(self._system_prompt + user_message)
|
|
1121
|
+
await self._rate_limiter.wait_if_needed(estimated)
|
|
1122
|
+
|
|
872
1123
|
response = await self._client.send_message(
|
|
873
1124
|
messages=self._messages,
|
|
874
1125
|
system=self._system_prompt,
|
|
875
1126
|
tools=TOOL_DEFINITIONS,
|
|
876
1127
|
)
|
|
877
1128
|
|
|
878
|
-
|
|
1129
|
+
if self._rate_limiter is not None:
|
|
1130
|
+
self._rate_limiter.record(response.input_tokens)
|
|
1131
|
+
|
|
879
1132
|
console.print(
|
|
880
1133
|
f"[dim]Tokens: {response.input_tokens} in / "
|
|
881
1134
|
f"{response.output_tokens} out | "
|
|
882
1135
|
f"Cost: ${self._client.usage.estimated_cost:.4f}[/dim]"
|
|
883
1136
|
)
|
|
884
1137
|
|
|
885
|
-
# Cost warnings
|
|
886
1138
|
cost = self._client.usage.estimated_cost
|
|
887
1139
|
if cost > 5.0:
|
|
888
1140
|
console.print(
|
|
@@ -896,14 +1148,12 @@ class ChatSession:
|
|
|
896
1148
|
has_tool_use = any(block.get("type") == "tool_use" for block in response.content)
|
|
897
1149
|
|
|
898
1150
|
if not has_tool_use:
|
|
899
|
-
# Extract final text
|
|
900
1151
|
for block in response.content:
|
|
901
1152
|
if block.get("type") == "text":
|
|
902
1153
|
final_text = block.get("text", "")
|
|
903
1154
|
self._messages.append({"role": "assistant", "content": response.content})
|
|
904
1155
|
break
|
|
905
1156
|
|
|
906
|
-
# Execute tool calls
|
|
907
1157
|
assistant_content = response.content
|
|
908
1158
|
self._messages.append({"role": "assistant", "content": assistant_content})
|
|
909
1159
|
|
|
@@ -926,6 +1176,8 @@ class ChatSession:
|
|
|
926
1176
|
)
|
|
927
1177
|
if modified:
|
|
928
1178
|
self._files_modified = True
|
|
1179
|
+
if tool_name == "write_file" and "path" in tool_input:
|
|
1180
|
+
turn_files.append(tool_input["path"])
|
|
929
1181
|
|
|
930
1182
|
if result.success:
|
|
931
1183
|
console.print(f" [green]OK[/green] ({len(result.output)} chars)")
|
|
@@ -944,8 +1196,106 @@ class ChatSession:
|
|
|
944
1196
|
|
|
945
1197
|
self._messages.append({"role": "user", "content": tool_results})
|
|
946
1198
|
|
|
1199
|
+
# Update memory after turns that modified files
|
|
1200
|
+
if turn_files and self._memory is not None:
|
|
1201
|
+
await self._update_memory_for_direct(user_message, final_text, turn_files)
|
|
1202
|
+
|
|
947
1203
|
return final_text
|
|
948
1204
|
|
|
1205
|
+
async def _update_memory_for_subtask(
|
|
1206
|
+
self, subtask_description: str, result: SubtaskResult
|
|
1207
|
+
) -> None:
|
|
1208
|
+
"""Summarize a completed subtask and append to Session Log.
|
|
1209
|
+
|
|
1210
|
+
Args:
|
|
1211
|
+
subtask_description: What the subtask was supposed to do.
|
|
1212
|
+
result: The subtask result.
|
|
1213
|
+
"""
|
|
1214
|
+
if self._memory is None:
|
|
1215
|
+
return
|
|
1216
|
+
|
|
1217
|
+
truncated = result.text[:500]
|
|
1218
|
+
files_str = ", ".join(result.files_touched[:10]) or "none"
|
|
1219
|
+
user_msg = (
|
|
1220
|
+
f"Subtask: {subtask_description}\nResult: {truncated}\nFiles modified: {files_str}"
|
|
1221
|
+
)
|
|
1222
|
+
|
|
1223
|
+
try:
|
|
1224
|
+
if self._rate_limiter is not None:
|
|
1225
|
+
estimated = ContextAssembler.estimate_tokens(user_msg)
|
|
1226
|
+
await self._rate_limiter.wait_if_needed(estimated)
|
|
1227
|
+
|
|
1228
|
+
resp = await self._client.send_message(
|
|
1229
|
+
messages=[{"role": "user", "content": user_msg}],
|
|
1230
|
+
system=_MEMORY_SUMMARY_PROMPT,
|
|
1231
|
+
model=self._haiku_model,
|
|
1232
|
+
max_tokens=256,
|
|
1233
|
+
)
|
|
1234
|
+
|
|
1235
|
+
if self._rate_limiter is not None:
|
|
1236
|
+
self._rate_limiter.record(resp.input_tokens)
|
|
1237
|
+
|
|
1238
|
+
summary = ""
|
|
1239
|
+
for block in resp.content:
|
|
1240
|
+
if block.get("type") == "text":
|
|
1241
|
+
summary = block.get("text", "").strip()
|
|
1242
|
+
break
|
|
1243
|
+
|
|
1244
|
+
if summary:
|
|
1245
|
+
today = datetime.now(tz=UTC).strftime("%Y-%m-%d")
|
|
1246
|
+
self._memory.append("Session Log", f"- [{today}] {summary}")
|
|
1247
|
+
except Exception as exc:
|
|
1248
|
+
console.print(f"[yellow]Memory update skipped:[/yellow] {exc}")
|
|
1249
|
+
|
|
1250
|
+
async def _update_memory_for_direct(
|
|
1251
|
+
self, user_message: str, response_text: str, files: list[str]
|
|
1252
|
+
) -> None:
|
|
1253
|
+
"""Summarize a direct-mode turn and append to Session Log.
|
|
1254
|
+
|
|
1255
|
+
Args:
|
|
1256
|
+
user_message: The user's input for this turn.
|
|
1257
|
+
response_text: The assistant's final response.
|
|
1258
|
+
files: Files written during this turn.
|
|
1259
|
+
"""
|
|
1260
|
+
if self._memory is None:
|
|
1261
|
+
return
|
|
1262
|
+
|
|
1263
|
+
truncated_response = response_text[:300]
|
|
1264
|
+
files_str = ", ".join(files[:10]) or "none"
|
|
1265
|
+
user_msg = (
|
|
1266
|
+
f"User asked: {user_message[:200]}\n"
|
|
1267
|
+
f"Result: {truncated_response}\n"
|
|
1268
|
+
f"Files modified: {files_str}"
|
|
1269
|
+
)
|
|
1270
|
+
|
|
1271
|
+
try:
|
|
1272
|
+
if self._rate_limiter is not None:
|
|
1273
|
+
estimated = ContextAssembler.estimate_tokens(user_msg)
|
|
1274
|
+
await self._rate_limiter.wait_if_needed(estimated)
|
|
1275
|
+
|
|
1276
|
+
resp = await self._client.send_message(
|
|
1277
|
+
messages=[{"role": "user", "content": user_msg}],
|
|
1278
|
+
system=_MEMORY_SUMMARY_PROMPT,
|
|
1279
|
+
model=self._haiku_model,
|
|
1280
|
+
max_tokens=256,
|
|
1281
|
+
)
|
|
1282
|
+
|
|
1283
|
+
if self._rate_limiter is not None:
|
|
1284
|
+
self._rate_limiter.record(resp.input_tokens)
|
|
1285
|
+
|
|
1286
|
+
summary = ""
|
|
1287
|
+
for block in resp.content:
|
|
1288
|
+
if block.get("type") == "text":
|
|
1289
|
+
summary = block.get("text", "").strip()
|
|
1290
|
+
break
|
|
1291
|
+
|
|
1292
|
+
if summary:
|
|
1293
|
+
today = datetime.now(tz=UTC).strftime("%Y-%m-%d")
|
|
1294
|
+
self._memory.append("Session Log", f"- [{today}] {summary}")
|
|
1295
|
+
self._memory.prune_section("Session Log")
|
|
1296
|
+
except Exception as exc:
|
|
1297
|
+
console.print(f"[yellow]Memory update skipped:[/yellow] {exc}")
|
|
1298
|
+
|
|
949
1299
|
|
|
950
1300
|
async def run_task(task: str, config: NexConfig) -> None:
|
|
951
1301
|
"""Entry point called by the CLI to run a task.
|
|
@@ -122,6 +122,30 @@ class RateLimiter:
|
|
|
122
122
|
return
|
|
123
123
|
|
|
124
124
|
|
|
125
|
+
def _extract_retry_after(exc: Exception, default: float = 60.0) -> float:
|
|
126
|
+
"""Extract retry-after seconds from an Anthropic API error.
|
|
127
|
+
|
|
128
|
+
Inspects the exception's response headers for a ``retry-after`` value.
|
|
129
|
+
Falls back to *default* if the header is missing or unparseable.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
exc: The exception raised by the Anthropic SDK.
|
|
133
|
+
default: Fallback wait time in seconds.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
Number of seconds to wait before retrying.
|
|
137
|
+
"""
|
|
138
|
+
resp = getattr(exc, "response", None)
|
|
139
|
+
if resp is not None:
|
|
140
|
+
header = getattr(resp, "headers", {}).get("retry-after")
|
|
141
|
+
if header:
|
|
142
|
+
try:
|
|
143
|
+
return max(float(header), 1.0)
|
|
144
|
+
except (ValueError, TypeError):
|
|
145
|
+
pass
|
|
146
|
+
return default
|
|
147
|
+
|
|
148
|
+
|
|
125
149
|
class AnthropicClient:
|
|
126
150
|
"""Async wrapper around the Anthropic API.
|
|
127
151
|
|
|
@@ -140,7 +164,7 @@ class AnthropicClient:
|
|
|
140
164
|
self,
|
|
141
165
|
api_key: str,
|
|
142
166
|
default_model: str = "claude-sonnet-4-20250514",
|
|
143
|
-
max_retries: int =
|
|
167
|
+
max_retries: int = 5,
|
|
144
168
|
) -> None:
|
|
145
169
|
"""Initialize the Anthropic client.
|
|
146
170
|
|
|
@@ -196,7 +220,9 @@ class AnthropicClient:
|
|
|
196
220
|
kwargs["tools"] = tools
|
|
197
221
|
|
|
198
222
|
last_error: Exception | None = None
|
|
199
|
-
|
|
223
|
+
# 429 rate limits need more retries with longer waits than server errors
|
|
224
|
+
max_attempts = self._max_retries + 1
|
|
225
|
+
for attempt in range(max_attempts):
|
|
200
226
|
try:
|
|
201
227
|
response = await self._client.messages.create(**kwargs)
|
|
202
228
|
|
|
@@ -230,16 +256,21 @@ class AnthropicClient:
|
|
|
230
256
|
except Exception as exc:
|
|
231
257
|
last_error = exc
|
|
232
258
|
status_code = getattr(exc, "status_code", None)
|
|
259
|
+
is_rate_limit = status_code == 429
|
|
260
|
+
is_retryable = status_code in (500, 502, 503, 529)
|
|
261
|
+
|
|
262
|
+
if (is_rate_limit or is_retryable) and attempt < max_attempts - 1:
|
|
263
|
+
if is_rate_limit:
|
|
264
|
+
# Rate limits: extract retry-after from response headers,
|
|
265
|
+
# or default to 60s (the full rate-limit window).
|
|
266
|
+
wait = _extract_retry_after(exc, default=60.0)
|
|
267
|
+
else:
|
|
268
|
+
# Server errors: short exponential backoff
|
|
269
|
+
wait = float(2**attempt)
|
|
233
270
|
|
|
234
|
-
# Retry on rate limit or server errors
|
|
235
|
-
if status_code in (429, 500, 502, 503, 529) and attempt < self._max_retries:
|
|
236
|
-
wait = 2**attempt
|
|
237
|
-
retry_after = getattr(exc, "retry_after", None)
|
|
238
|
-
if retry_after:
|
|
239
|
-
wait = max(wait, float(retry_after))
|
|
240
271
|
console.print(
|
|
241
|
-
f"[yellow]API error {status_code}, retrying in {wait}s "
|
|
242
|
-
f"(attempt {attempt + 1}/{
|
|
272
|
+
f"[yellow]API error {status_code}, retrying in {wait:.0f}s "
|
|
273
|
+
f"(attempt {attempt + 1}/{max_attempts - 1})...[/yellow]"
|
|
243
274
|
)
|
|
244
275
|
await asyncio.sleep(wait)
|
|
245
276
|
continue
|
|
@@ -250,7 +281,7 @@ class AnthropicClient:
|
|
|
250
281
|
) from exc
|
|
251
282
|
|
|
252
283
|
raise APIError(
|
|
253
|
-
f"Failed after {
|
|
284
|
+
f"Failed after {max_attempts - 1} retries: {last_error}",
|
|
254
285
|
)
|
|
255
286
|
|
|
256
287
|
async def close(self) -> None:
|
|
@@ -438,7 +438,7 @@ async def _run_chat(config: NexConfig) -> None:
|
|
|
438
438
|
config: Nex configuration.
|
|
439
439
|
"""
|
|
440
440
|
from nex.agent import ChatSession
|
|
441
|
-
from nex.api_client import AnthropicClient
|
|
441
|
+
from nex.api_client import AnthropicClient, RateLimiter
|
|
442
442
|
from nex.context import ContextAssembler
|
|
443
443
|
from nex.indexer.index import IndexBuilder
|
|
444
444
|
from nex.memory.errors import ErrorPatternDB
|
|
@@ -464,6 +464,7 @@ async def _run_chat(config: NexConfig) -> None:
|
|
|
464
464
|
|
|
465
465
|
client = AnthropicClient(api_key=config.api_key, default_model=config.model)
|
|
466
466
|
safety = SafetyLayer(dry_run=config.dry_run)
|
|
467
|
+
rate_limiter = RateLimiter(tokens_per_minute=config.token_rate_limit)
|
|
467
468
|
|
|
468
469
|
session = ChatSession(
|
|
469
470
|
api_client=client,
|
|
@@ -472,6 +473,13 @@ async def _run_chat(config: NexConfig) -> None:
|
|
|
472
473
|
safety=safety,
|
|
473
474
|
dry_run=config.dry_run,
|
|
474
475
|
max_iterations=config.max_iterations,
|
|
476
|
+
rate_limiter=rate_limiter,
|
|
477
|
+
memory=memory,
|
|
478
|
+
haiku_model=config.haiku_model,
|
|
479
|
+
assembler=assembler,
|
|
480
|
+
error_patterns=error_patterns,
|
|
481
|
+
index=idx,
|
|
482
|
+
subtask_token_budget=config.subtask_token_budget,
|
|
475
483
|
)
|
|
476
484
|
|
|
477
485
|
try:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|