nexcoder 0.1.4__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {nexcoder-0.1.4 → nexcoder-0.1.6}/CHANGELOG.md +23 -0
  2. {nexcoder-0.1.4 → nexcoder-0.1.6}/PKG-INFO +1 -1
  3. {nexcoder-0.1.4 → nexcoder-0.1.6}/pyproject.toml +1 -1
  4. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/__init__.py +1 -3
  5. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/agent.py +359 -9
  6. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/api_client.py +42 -11
  7. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/cli.py +9 -1
  8. {nexcoder-0.1.4 → nexcoder-0.1.6}/.github/workflows/ci.yml +0 -0
  9. {nexcoder-0.1.4 → nexcoder-0.1.6}/.github/workflows/publish.yml +0 -0
  10. {nexcoder-0.1.4 → nexcoder-0.1.6}/.gitignore +0 -0
  11. {nexcoder-0.1.4 → nexcoder-0.1.6}/LICENSE +0 -0
  12. {nexcoder-0.1.4 → nexcoder-0.1.6}/README.md +0 -0
  13. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/config.py +0 -0
  14. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/context.py +0 -0
  15. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/exceptions.py +0 -0
  16. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/indexer/__init__.py +0 -0
  17. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/indexer/index.py +0 -0
  18. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/indexer/parser.py +0 -0
  19. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/indexer/scanner.py +0 -0
  20. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/memory/__init__.py +0 -0
  21. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/memory/decisions.py +0 -0
  22. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/memory/errors.py +0 -0
  23. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/memory/project.py +0 -0
  24. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/planner.py +0 -0
  25. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/py.typed +0 -0
  26. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/reviewer.py +0 -0
  27. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/safety.py +0 -0
  28. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/test_runner.py +0 -0
  29. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/tools/__init__.py +0 -0
  30. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/tools/file_ops.py +0 -0
  31. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/tools/git_ops.py +0 -0
  32. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/tools/search.py +0 -0
  33. {nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/tools/shell.py +0 -0
  34. {nexcoder-0.1.4 → nexcoder-0.1.6}/tests/conftest.py +0 -0
  35. {nexcoder-0.1.4 → nexcoder-0.1.6}/tests/test_agent.py +0 -0
  36. {nexcoder-0.1.4 → nexcoder-0.1.6}/tests/test_chat.py +0 -0
  37. {nexcoder-0.1.4 → nexcoder-0.1.6}/tests/test_cli.py +0 -0
  38. {nexcoder-0.1.4 → nexcoder-0.1.6}/tests/test_context.py +0 -0
  39. {nexcoder-0.1.4 → nexcoder-0.1.6}/tests/test_errors.py +0 -0
  40. {nexcoder-0.1.4 → nexcoder-0.1.6}/tests/test_indexer.py +0 -0
  41. {nexcoder-0.1.4 → nexcoder-0.1.6}/tests/test_memory.py +0 -0
  42. {nexcoder-0.1.4 → nexcoder-0.1.6}/tests/test_rate_limiter.py +0 -0
  43. {nexcoder-0.1.4 → nexcoder-0.1.6}/tests/test_safety.py +0 -0
  44. {nexcoder-0.1.4 → nexcoder-0.1.6}/tests/test_test_runner.py +0 -0
  45. {nexcoder-0.1.4 → nexcoder-0.1.6}/tests/test_tools.py +0 -0
@@ -5,6 +5,29 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [0.1.6] — 2026-02-25
9
+
10
+ ### Added
11
+
12
+ - Subtask decomposition in `nex chat` — each user turn is decomposed via the planner with scoped context per subtask
13
+ - Rate limiting for `nex chat` — all API calls (planner, subtask loops, memory updates) go through the rate limiter
14
+ - Memory updates in `nex chat` — Session Log written after each subtask, with pruning
15
+ - Fallback to direct mode in chat if the planner fails
16
+
17
+ ### Changed
18
+
19
+ - Default `token_rate_limit` set to 20,000 (was 0) — rate limiting enabled out of the box
20
+ - `token_rate_limit = 20000` included in `nex init` config template
21
+ - 429 rate limit retries now wait for `retry-after` header (default 60s) instead of short 2/4/8s backoff
22
+ - `max_retries` increased from 3 to 5 for better rate limit recovery
23
+ - Planner API call now goes through the rate limiter (was ungated)
24
+ - Memory update API call now goes through the rate limiter (was ungated)
25
+
26
+ ### Fixed
27
+
28
+ - 429 rate limit errors crashing the agent — retries were too short (14s total) for a 60s rate limit window
29
+ - Memory updates silently failing due to rate limits — now warns visibly instead of `except: pass`
30
+
8
31
  ## [0.1.1] — 2026-02-25
9
32
 
10
33
  ### Changed
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nexcoder
3
- Version: 0.1.4
3
+ Version: 0.1.6
4
4
  Summary: The coding agent that remembers — AI coding assistant with persistent memory and error learning.
5
5
  Project-URL: Homepage, https://github.com/nex-ai/nex-ai
6
6
  Project-URL: Repository, https://github.com/nex-ai/nex-ai
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "nexcoder"
7
- version = "0.1.4"
7
+ version = "0.1.6"
8
8
  description = "The coding agent that remembers — AI coding assistant with persistent memory and error learning."
9
9
  readme = {file = "README.md", content-type = "text/markdown"}
10
10
  license = "MIT"
@@ -1,6 +1,4 @@
1
1
  """Nex AI — The Coding Agent That Remembers."""
2
2
 
3
- from __future__ import annotations
4
-
5
- __version__ = "0.1.0"
3
+ __version__ = "v0.1.6"
6
4
  __app_name__ = "nex"
@@ -800,11 +800,12 @@ class Agent:
800
800
 
801
801
 
802
802
  class ChatSession:
803
- """Interactive chat session with persistent message history.
803
+ """Interactive chat session with subtask decomposition.
804
804
 
805
- Unlike Agent (which runs a single task to completion), ChatSession
806
- maintains a conversation across multiple user turns, accumulating
807
- context in its message history.
805
+ Each user turn is decomposed into subtasks via the planner.
806
+ Each subtask runs with scoped context in its own mini loop,
807
+ keeping the main chat history clean. Falls back to direct
808
+ execution if the planner fails.
808
809
  """
809
810
 
810
811
  def __init__(
@@ -815,6 +816,13 @@ class ChatSession:
815
816
  safety: SafetyLayer,
816
817
  dry_run: bool = False,
817
818
  max_iterations: int = 25,
819
+ rate_limiter: RateLimiter | None = None,
820
+ memory: ProjectMemory | None = None,
821
+ haiku_model: str = "claude-haiku-4-5-20251001",
822
+ assembler: ContextAssembler | None = None,
823
+ error_patterns: list[Any] | None = None,
824
+ index: Any | None = None,
825
+ subtask_token_budget: int = 20_000,
818
826
  ) -> None:
819
827
  """Initialize a chat session.
820
828
 
@@ -825,6 +833,13 @@ class ChatSession:
825
833
  safety: Safety layer for command approval.
826
834
  dry_run: If True, skip destructive operations.
827
835
  max_iterations: Max tool calls per user turn.
836
+ rate_limiter: Optional rate limiter for API calls.
837
+ memory: Optional project memory for session log updates.
838
+ haiku_model: Model for memory summary and planning calls.
839
+ assembler: Context assembler for building scoped subtask prompts.
840
+ error_patterns: Past error patterns for subtask context.
841
+ index: Code index for relevant code selection.
842
+ subtask_token_budget: Token budget per subtask context.
828
843
  """
829
844
  self._client = api_client
830
845
  self._system_prompt = system_prompt
@@ -832,6 +847,13 @@ class ChatSession:
832
847
  self._safety = safety
833
848
  self._dry_run = dry_run
834
849
  self._max_iterations = max_iterations
850
+ self._rate_limiter = rate_limiter
851
+ self._memory = memory
852
+ self._haiku_model = haiku_model
853
+ self._assembler = assembler
854
+ self._error_patterns = error_patterns or []
855
+ self._index = index
856
+ self._subtask_token_budget = subtask_token_budget
835
857
  self._messages: list[dict[str, Any]] = []
836
858
  self._turn_count = 0
837
859
  self._files_modified = False
@@ -852,7 +874,11 @@ class ChatSession:
852
874
  return self._files_modified
853
875
 
854
876
  async def send(self, user_message: str) -> str:
855
- """Process a single user turn, executing tools as needed.
877
+ """Process a user turn via subtask decomposition.
878
+
879
+ Decomposes the message into subtasks, executes each with
880
+ scoped context, and appends a summary to chat history. Falls
881
+ back to direct execution if the planner fails.
856
882
 
857
883
  Args:
858
884
  user_message: The user's message text.
@@ -861,28 +887,254 @@ class ChatSession:
861
887
  The assistant's final text response for this turn.
862
888
  """
863
889
  self._turn_count += 1
890
+
891
+ # Try subtask decomposition if assembler is available
892
+ if self._assembler is not None:
893
+ try:
894
+ return await self._send_with_subtasks(user_message)
895
+ except Exception as exc:
896
+ console.print(
897
+ f"[yellow]Subtask decomposition failed ({exc}), using direct mode...[/yellow]"
898
+ )
899
+
900
+ return await self._send_direct(user_message)
901
+
902
+ async def _send_with_subtasks(self, user_message: str) -> str:
903
+ """Decompose user message into subtasks and execute each.
904
+
905
+ Each subtask runs in its own mini loop with scoped context.
906
+ Results are collected and a combined response is appended to
907
+ the main chat history.
908
+
909
+ Args:
910
+ user_message: The user's message text.
911
+
912
+ Returns:
913
+ Combined response from all subtasks.
914
+ """
915
+ assert self._assembler is not None
916
+
917
+ project_memory = self._memory.load() if self._memory else ""
918
+
919
+ # Decompose via planner
920
+ planner = Planner(self._client, haiku_model=self._haiku_model)
921
+ console.print("\n[bold]Decomposing into subtasks...[/bold]")
922
+ subtasks = await planner.plan(user_message, project_memory, self._rate_limiter)
923
+
924
+ console.print(
925
+ f"[dim]Subtasks: {len(subtasks)} | Max iterations: {self._max_iterations}[/dim]\n"
926
+ )
927
+
928
+ iters_per_subtask = max(5, self._max_iterations // max(len(subtasks), 1))
929
+
930
+ prior_context = ""
931
+ subtask_results: list[str] = []
932
+
933
+ for i, subtask in enumerate(subtasks, 1):
934
+ console.print(
935
+ Panel(
936
+ f"[bold]{subtask.description}[/bold]\n"
937
+ f"[dim]Files: {', '.join(subtask.file_paths) or 'auto'}[/dim]",
938
+ title=f"[bold cyan]Subtask {i}/{len(subtasks)}[/bold cyan]",
939
+ border_style="cyan",
940
+ )
941
+ )
942
+
943
+ # Build scoped context for this subtask
944
+ scoped_code = self._assembler.select_scoped_code(
945
+ subtask.file_paths,
946
+ subtask.description,
947
+ self._index,
948
+ self._subtask_token_budget,
949
+ )
950
+
951
+ system_prompt = self._assembler.build_subtask_prompt(
952
+ subtask_description=subtask.description,
953
+ project_memory=project_memory,
954
+ error_patterns=self._error_patterns,
955
+ relevant_code=scoped_code,
956
+ prior_context=prior_context,
957
+ )
958
+
959
+ # Run subtask in a mini loop with fresh messages
960
+ sub_result = await self._run_subtask_loop(
961
+ system_prompt=system_prompt,
962
+ task=subtask.description,
963
+ max_iterations=iters_per_subtask,
964
+ )
965
+
966
+ subtask_results.append(sub_result.text)
967
+
968
+ # Update memory after each subtask
969
+ if self._memory is not None:
970
+ await self._update_memory_for_subtask(subtask.description, sub_result)
971
+ self._memory.prune_section("Session Log")
972
+ project_memory = self._memory.load()
973
+
974
+ prior_context += f"- Subtask {i}: {subtask.description} -> {sub_result.text[:200]}\n"
975
+
976
+ combined = "\n\n".join(subtask_results) if subtask_results else "No subtasks completed."
977
+
978
+ # Append to main chat history as a clean user/assistant pair
979
+ self._messages.append({"role": "user", "content": user_message})
980
+ self._messages.append(
981
+ {"role": "assistant", "content": [{"type": "text", "text": combined}]}
982
+ )
983
+
984
+ return combined
985
+
986
+ async def _run_subtask_loop(
987
+ self,
988
+ system_prompt: str,
989
+ task: str,
990
+ max_iterations: int,
991
+ ) -> SubtaskResult:
992
+ """Run a focused mini loop for a single subtask.
993
+
994
+ Uses fresh message history to keep each subtask isolated.
995
+
996
+ Args:
997
+ system_prompt: Scoped system prompt for this subtask.
998
+ task: The subtask description.
999
+ max_iterations: Max iterations for this subtask.
1000
+
1001
+ Returns:
1002
+ SubtaskResult with text, files, and iteration count.
1003
+ """
1004
+ messages: list[dict[str, Any]] = [{"role": "user", "content": task}]
1005
+ iteration = 0
1006
+ files_touched: list[str] = []
1007
+
1008
+ while iteration < max_iterations:
1009
+ iteration += 1
1010
+ console.print(f"[dim]--- Subtask iteration {iteration} ---[/dim]")
1011
+
1012
+ if self._rate_limiter is not None:
1013
+ estimated = ContextAssembler.estimate_tokens(system_prompt + task)
1014
+ await self._rate_limiter.wait_if_needed(estimated)
1015
+
1016
+ response = await self._client.send_message(
1017
+ messages=messages,
1018
+ system=system_prompt,
1019
+ tools=TOOL_DEFINITIONS,
1020
+ )
1021
+
1022
+ if self._rate_limiter is not None:
1023
+ self._rate_limiter.record(response.input_tokens)
1024
+
1025
+ console.print(
1026
+ f"[dim]Tokens: {response.input_tokens} in / "
1027
+ f"{response.output_tokens} out | "
1028
+ f"Cost: ${self._client.usage.estimated_cost:.4f}[/dim]"
1029
+ )
1030
+
1031
+ cost = self._client.usage.estimated_cost
1032
+ if cost > 5.0:
1033
+ console.print(
1034
+ "[bold red]Warning:[/bold red] Session has exceeded $5.00 in API costs"
1035
+ )
1036
+ elif cost > 1.0:
1037
+ console.print(
1038
+ "[bold yellow]Warning:[/bold yellow] Session has exceeded $1.00 in API costs"
1039
+ )
1040
+
1041
+ has_tool_use = any(block.get("type") == "tool_use" for block in response.content)
1042
+
1043
+ if not has_tool_use:
1044
+ text = ""
1045
+ for block in response.content:
1046
+ if block.get("type") == "text":
1047
+ text = str(block.get("text", ""))
1048
+ break
1049
+ return SubtaskResult(text=text, files_touched=files_touched, iterations=iteration)
1050
+
1051
+ assistant_content = response.content
1052
+ messages.append({"role": "assistant", "content": assistant_content})
1053
+
1054
+ tool_results: list[dict[str, Any]] = []
1055
+ for block in assistant_content:
1056
+ if block.get("type") == "tool_use":
1057
+ tool_name = block["name"]
1058
+ tool_input = block["input"]
1059
+ tool_id = block["id"]
1060
+
1061
+ summary = _summarize_input(tool_input)
1062
+ console.print(f" [cyan]Tool:[/cyan] {tool_name}({summary})")
1063
+
1064
+ result, modified = await execute_tool(
1065
+ tool_name,
1066
+ tool_input,
1067
+ self._project_dir,
1068
+ self._safety,
1069
+ self._dry_run,
1070
+ )
1071
+ if modified:
1072
+ self._files_modified = True
1073
+ if tool_name == "write_file" and "path" in tool_input:
1074
+ files_touched.append(tool_input["path"])
1075
+
1076
+ if result.success:
1077
+ console.print(f" [green]OK[/green] ({len(result.output)} chars)")
1078
+ else:
1079
+ console.print(f" [red]Error:[/red] {result.error}")
1080
+
1081
+ content = result.output if result.success else f"Error: {result.error}"
1082
+ tool_results.append(
1083
+ {
1084
+ "type": "tool_result",
1085
+ "tool_use_id": tool_id,
1086
+ "content": content,
1087
+ "is_error": not result.success,
1088
+ }
1089
+ )
1090
+
1091
+ messages.append({"role": "user", "content": tool_results})
1092
+
1093
+ return SubtaskResult(
1094
+ text="Subtask did not complete within iteration limit.",
1095
+ files_touched=files_touched,
1096
+ iterations=iteration,
1097
+ )
1098
+
1099
+ async def _send_direct(self, user_message: str) -> str:
1100
+ """Process a user turn directly without subtask decomposition.
1101
+
1102
+ Fallback mode when planner is unavailable or fails.
1103
+
1104
+ Args:
1105
+ user_message: The user's message text.
1106
+
1107
+ Returns:
1108
+ The assistant's final text response.
1109
+ """
864
1110
  self._messages.append({"role": "user", "content": user_message})
865
1111
 
866
1112
  iterations = 0
867
1113
  final_text = ""
1114
+ turn_files: list[str] = []
868
1115
 
869
1116
  while iterations < self._max_iterations:
870
1117
  iterations += 1
871
1118
 
1119
+ if self._rate_limiter is not None:
1120
+ estimated = ContextAssembler.estimate_tokens(self._system_prompt + user_message)
1121
+ await self._rate_limiter.wait_if_needed(estimated)
1122
+
872
1123
  response = await self._client.send_message(
873
1124
  messages=self._messages,
874
1125
  system=self._system_prompt,
875
1126
  tools=TOOL_DEFINITIONS,
876
1127
  )
877
1128
 
878
- # Show token usage
1129
+ if self._rate_limiter is not None:
1130
+ self._rate_limiter.record(response.input_tokens)
1131
+
879
1132
  console.print(
880
1133
  f"[dim]Tokens: {response.input_tokens} in / "
881
1134
  f"{response.output_tokens} out | "
882
1135
  f"Cost: ${self._client.usage.estimated_cost:.4f}[/dim]"
883
1136
  )
884
1137
 
885
- # Cost warnings
886
1138
  cost = self._client.usage.estimated_cost
887
1139
  if cost > 5.0:
888
1140
  console.print(
@@ -896,14 +1148,12 @@ class ChatSession:
896
1148
  has_tool_use = any(block.get("type") == "tool_use" for block in response.content)
897
1149
 
898
1150
  if not has_tool_use:
899
- # Extract final text
900
1151
  for block in response.content:
901
1152
  if block.get("type") == "text":
902
1153
  final_text = block.get("text", "")
903
1154
  self._messages.append({"role": "assistant", "content": response.content})
904
1155
  break
905
1156
 
906
- # Execute tool calls
907
1157
  assistant_content = response.content
908
1158
  self._messages.append({"role": "assistant", "content": assistant_content})
909
1159
 
@@ -926,6 +1176,8 @@ class ChatSession:
926
1176
  )
927
1177
  if modified:
928
1178
  self._files_modified = True
1179
+ if tool_name == "write_file" and "path" in tool_input:
1180
+ turn_files.append(tool_input["path"])
929
1181
 
930
1182
  if result.success:
931
1183
  console.print(f" [green]OK[/green] ({len(result.output)} chars)")
@@ -944,8 +1196,106 @@ class ChatSession:
944
1196
 
945
1197
  self._messages.append({"role": "user", "content": tool_results})
946
1198
 
1199
+ # Update memory after turns that modified files
1200
+ if turn_files and self._memory is not None:
1201
+ await self._update_memory_for_direct(user_message, final_text, turn_files)
1202
+
947
1203
  return final_text
948
1204
 
1205
+ async def _update_memory_for_subtask(
1206
+ self, subtask_description: str, result: SubtaskResult
1207
+ ) -> None:
1208
+ """Summarize a completed subtask and append to Session Log.
1209
+
1210
+ Args:
1211
+ subtask_description: What the subtask was supposed to do.
1212
+ result: The subtask result.
1213
+ """
1214
+ if self._memory is None:
1215
+ return
1216
+
1217
+ truncated = result.text[:500]
1218
+ files_str = ", ".join(result.files_touched[:10]) or "none"
1219
+ user_msg = (
1220
+ f"Subtask: {subtask_description}\nResult: {truncated}\nFiles modified: {files_str}"
1221
+ )
1222
+
1223
+ try:
1224
+ if self._rate_limiter is not None:
1225
+ estimated = ContextAssembler.estimate_tokens(user_msg)
1226
+ await self._rate_limiter.wait_if_needed(estimated)
1227
+
1228
+ resp = await self._client.send_message(
1229
+ messages=[{"role": "user", "content": user_msg}],
1230
+ system=_MEMORY_SUMMARY_PROMPT,
1231
+ model=self._haiku_model,
1232
+ max_tokens=256,
1233
+ )
1234
+
1235
+ if self._rate_limiter is not None:
1236
+ self._rate_limiter.record(resp.input_tokens)
1237
+
1238
+ summary = ""
1239
+ for block in resp.content:
1240
+ if block.get("type") == "text":
1241
+ summary = block.get("text", "").strip()
1242
+ break
1243
+
1244
+ if summary:
1245
+ today = datetime.now(tz=UTC).strftime("%Y-%m-%d")
1246
+ self._memory.append("Session Log", f"- [{today}] {summary}")
1247
+ except Exception as exc:
1248
+ console.print(f"[yellow]Memory update skipped:[/yellow] {exc}")
1249
+
1250
+ async def _update_memory_for_direct(
1251
+ self, user_message: str, response_text: str, files: list[str]
1252
+ ) -> None:
1253
+ """Summarize a direct-mode turn and append to Session Log.
1254
+
1255
+ Args:
1256
+ user_message: The user's input for this turn.
1257
+ response_text: The assistant's final response.
1258
+ files: Files written during this turn.
1259
+ """
1260
+ if self._memory is None:
1261
+ return
1262
+
1263
+ truncated_response = response_text[:300]
1264
+ files_str = ", ".join(files[:10]) or "none"
1265
+ user_msg = (
1266
+ f"User asked: {user_message[:200]}\n"
1267
+ f"Result: {truncated_response}\n"
1268
+ f"Files modified: {files_str}"
1269
+ )
1270
+
1271
+ try:
1272
+ if self._rate_limiter is not None:
1273
+ estimated = ContextAssembler.estimate_tokens(user_msg)
1274
+ await self._rate_limiter.wait_if_needed(estimated)
1275
+
1276
+ resp = await self._client.send_message(
1277
+ messages=[{"role": "user", "content": user_msg}],
1278
+ system=_MEMORY_SUMMARY_PROMPT,
1279
+ model=self._haiku_model,
1280
+ max_tokens=256,
1281
+ )
1282
+
1283
+ if self._rate_limiter is not None:
1284
+ self._rate_limiter.record(resp.input_tokens)
1285
+
1286
+ summary = ""
1287
+ for block in resp.content:
1288
+ if block.get("type") == "text":
1289
+ summary = block.get("text", "").strip()
1290
+ break
1291
+
1292
+ if summary:
1293
+ today = datetime.now(tz=UTC).strftime("%Y-%m-%d")
1294
+ self._memory.append("Session Log", f"- [{today}] {summary}")
1295
+ self._memory.prune_section("Session Log")
1296
+ except Exception as exc:
1297
+ console.print(f"[yellow]Memory update skipped:[/yellow] {exc}")
1298
+
949
1299
 
950
1300
  async def run_task(task: str, config: NexConfig) -> None:
951
1301
  """Entry point called by the CLI to run a task.
@@ -122,6 +122,30 @@ class RateLimiter:
122
122
  return
123
123
 
124
124
 
125
+ def _extract_retry_after(exc: Exception, default: float = 60.0) -> float:
126
+ """Extract retry-after seconds from an Anthropic API error.
127
+
128
+ Inspects the exception's response headers for a ``retry-after`` value.
129
+ Falls back to *default* if the header is missing or unparseable.
130
+
131
+ Args:
132
+ exc: The exception raised by the Anthropic SDK.
133
+ default: Fallback wait time in seconds.
134
+
135
+ Returns:
136
+ Number of seconds to wait before retrying.
137
+ """
138
+ resp = getattr(exc, "response", None)
139
+ if resp is not None:
140
+ header = getattr(resp, "headers", {}).get("retry-after")
141
+ if header:
142
+ try:
143
+ return max(float(header), 1.0)
144
+ except (ValueError, TypeError):
145
+ pass
146
+ return default
147
+
148
+
125
149
  class AnthropicClient:
126
150
  """Async wrapper around the Anthropic API.
127
151
 
@@ -140,7 +164,7 @@ class AnthropicClient:
140
164
  self,
141
165
  api_key: str,
142
166
  default_model: str = "claude-sonnet-4-20250514",
143
- max_retries: int = 3,
167
+ max_retries: int = 5,
144
168
  ) -> None:
145
169
  """Initialize the Anthropic client.
146
170
 
@@ -196,7 +220,9 @@ class AnthropicClient:
196
220
  kwargs["tools"] = tools
197
221
 
198
222
  last_error: Exception | None = None
199
- for attempt in range(self._max_retries + 1):
223
+ # 429 rate limits need more retries with longer waits than server errors
224
+ max_attempts = self._max_retries + 1
225
+ for attempt in range(max_attempts):
200
226
  try:
201
227
  response = await self._client.messages.create(**kwargs)
202
228
 
@@ -230,16 +256,21 @@ class AnthropicClient:
230
256
  except Exception as exc:
231
257
  last_error = exc
232
258
  status_code = getattr(exc, "status_code", None)
259
+ is_rate_limit = status_code == 429
260
+ is_retryable = status_code in (500, 502, 503, 529)
261
+
262
+ if (is_rate_limit or is_retryable) and attempt < max_attempts - 1:
263
+ if is_rate_limit:
264
+ # Rate limits: extract retry-after from response headers,
265
+ # or default to 60s (the full rate-limit window).
266
+ wait = _extract_retry_after(exc, default=60.0)
267
+ else:
268
+ # Server errors: short exponential backoff
269
+ wait = float(2**attempt)
233
270
 
234
- # Retry on rate limit or server errors
235
- if status_code in (429, 500, 502, 503, 529) and attempt < self._max_retries:
236
- wait = 2**attempt
237
- retry_after = getattr(exc, "retry_after", None)
238
- if retry_after:
239
- wait = max(wait, float(retry_after))
240
271
  console.print(
241
- f"[yellow]API error {status_code}, retrying in {wait}s "
242
- f"(attempt {attempt + 1}/{self._max_retries})...[/yellow]"
272
+ f"[yellow]API error {status_code}, retrying in {wait:.0f}s "
273
+ f"(attempt {attempt + 1}/{max_attempts - 1})...[/yellow]"
243
274
  )
244
275
  await asyncio.sleep(wait)
245
276
  continue
@@ -250,7 +281,7 @@ class AnthropicClient:
250
281
  ) from exc
251
282
 
252
283
  raise APIError(
253
- f"Failed after {self._max_retries} retries: {last_error}",
284
+ f"Failed after {max_attempts - 1} retries: {last_error}",
254
285
  )
255
286
 
256
287
  async def close(self) -> None:
@@ -438,7 +438,7 @@ async def _run_chat(config: NexConfig) -> None:
438
438
  config: Nex configuration.
439
439
  """
440
440
  from nex.agent import ChatSession
441
- from nex.api_client import AnthropicClient
441
+ from nex.api_client import AnthropicClient, RateLimiter
442
442
  from nex.context import ContextAssembler
443
443
  from nex.indexer.index import IndexBuilder
444
444
  from nex.memory.errors import ErrorPatternDB
@@ -464,6 +464,7 @@ async def _run_chat(config: NexConfig) -> None:
464
464
 
465
465
  client = AnthropicClient(api_key=config.api_key, default_model=config.model)
466
466
  safety = SafetyLayer(dry_run=config.dry_run)
467
+ rate_limiter = RateLimiter(tokens_per_minute=config.token_rate_limit)
467
468
 
468
469
  session = ChatSession(
469
470
  api_client=client,
@@ -472,6 +473,13 @@ async def _run_chat(config: NexConfig) -> None:
472
473
  safety=safety,
473
474
  dry_run=config.dry_run,
474
475
  max_iterations=config.max_iterations,
476
+ rate_limiter=rate_limiter,
477
+ memory=memory,
478
+ haiku_model=config.haiku_model,
479
+ assembler=assembler,
480
+ error_patterns=error_patterns,
481
+ index=idx,
482
+ subtask_token_budget=config.subtask_token_budget,
475
483
  )
476
484
 
477
485
  try:
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes