PyPI - nexcoder - Versions diffs - 0.1.4__tar.gz → 0.1.6__tar.gz - Mend

nexcoder 0.1.4tar.gz → 0.1.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

{nexcoder-0.1.4 → nexcoder-0.1.6}/CHANGELOG.md RENAMED Viewed

@@ -5,6 +5,29 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [0.1.6] — 2026-02-25
+### Added
+- Subtask decomposition in `nex chat` — each user turn is decomposed via the planner with scoped context per subtask
+- Rate limiting for `nex chat` — all API calls (planner, subtask loops, memory updates) go through the rate limiter
+- Memory updates in `nex chat` — Session Log written after each subtask, with pruning
+- Fallback to direct mode in chat if the planner fails
+### Changed
+- Default `token_rate_limit` set to 20,000 (was 0) — rate limiting enabled out of the box
+- `token_rate_limit = 20000` included in `nex init` config template
+- 429 rate limit retries now wait for `retry-after` header (default 60s) instead of short 2/4/8s backoff
+- `max_retries` increased from 3 to 5 for better rate limit recovery
+- Planner API call now goes through the rate limiter (was ungated)
+- Memory update API call now goes through the rate limiter (was ungated)
+### Fixed
+- 429 rate limit errors crashing the agent — retries were too short (14s total) for a 60s rate limit window
+- Memory updates silently failing due to rate limits — now warns visibly instead of `except: pass`
 ## [0.1.1] — 2026-02-25
 ### Changed

{nexcoder-0.1.4 → nexcoder-0.1.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nexcoder
-Version: 0.1.4
+Version: 0.1.6
 Summary: The coding agent that remembers — AI coding assistant with persistent memory and error learning.
 Project-URL: Homepage, https://github.com/nex-ai/nex-ai
 Project-URL: Repository, https://github.com/nex-ai/nex-ai

{nexcoder-0.1.4 → nexcoder-0.1.6}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "nexcoder"
-version = "0.1.4"
+version = "0.1.6"
 description = "The coding agent that remembers — AI coding assistant with persistent memory and error learning."
 readme = {file = "README.md", content-type = "text/markdown"}
 license = "MIT"

{nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/__init__.py RENAMED Viewed

@@ -1,6 +1,4 @@
 """Nex AI — The Coding Agent That Remembers."""
-from __future__ import annotations
-__version__ = "0.1.0"
+__version__ = "v0.1.6"
 __app_name__ = "nex"

{nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/agent.py RENAMED Viewed

@@ -800,11 +800,12 @@ class Agent:
 class ChatSession:
-    """Interactive chat session with persistent message history.
+    """Interactive chat session with subtask decomposition.
-    Unlike Agent (which runs a single task to completion), ChatSession
-    maintains a conversation across multiple user turns, accumulating
-    context in its message history.
+    Each user turn is decomposed into subtasks via the planner.
+    Each subtask runs with scoped context in its own mini loop,
+    keeping the main chat history clean. Falls back to direct
+    execution if the planner fails.
     """
     def __init__(
@@ -815,6 +816,13 @@ class ChatSession:
         safety: SafetyLayer,
         dry_run: bool = False,
         max_iterations: int = 25,
+        rate_limiter: RateLimiter | None = None,
+        memory: ProjectMemory | None = None,
+        haiku_model: str = "claude-haiku-4-5-20251001",
+        assembler: ContextAssembler | None = None,
+        error_patterns: list[Any] | None = None,
+        index: Any | None = None,
+        subtask_token_budget: int = 20_000,
     ) -> None:
         """Initialize a chat session.
@@ -825,6 +833,13 @@ class ChatSession:
             safety: Safety layer for command approval.
             dry_run: If True, skip destructive operations.
             max_iterations: Max tool calls per user turn.
+            rate_limiter: Optional rate limiter for API calls.
+            memory: Optional project memory for session log updates.
+            haiku_model: Model for memory summary and planning calls.
+            assembler: Context assembler for building scoped subtask prompts.
+            error_patterns: Past error patterns for subtask context.
+            index: Code index for relevant code selection.
+            subtask_token_budget: Token budget per subtask context.
         """
         self._client = api_client
         self._system_prompt = system_prompt
@@ -832,6 +847,13 @@ class ChatSession:
         self._safety = safety
         self._dry_run = dry_run
         self._max_iterations = max_iterations
+        self._rate_limiter = rate_limiter
+        self._memory = memory
+        self._haiku_model = haiku_model
+        self._assembler = assembler
+        self._error_patterns = error_patterns or []
+        self._index = index
+        self._subtask_token_budget = subtask_token_budget
         self._messages: list[dict[str, Any]] = []
         self._turn_count = 0
         self._files_modified = False
@@ -852,7 +874,11 @@ class ChatSession:
         return self._files_modified
     async def send(self, user_message: str) -> str:
-        """Process a single user turn, executing tools as needed.
+        """Process a user turn via subtask decomposition.
+        Decomposes the message into subtasks, executes each with
+        scoped context, and appends a summary to chat history. Falls
+        back to direct execution if the planner fails.
         Args:
             user_message: The user's message text.
@@ -861,28 +887,254 @@ class ChatSession:
             The assistant's final text response for this turn.
         """
         self._turn_count += 1
+        # Try subtask decomposition if assembler is available
+        if self._assembler is not None:
+            try:
+                return await self._send_with_subtasks(user_message)
+            except Exception as exc:
+                console.print(
+                    f"[yellow]Subtask decomposition failed ({exc}), using direct mode...[/yellow]"
+                )
+        return await self._send_direct(user_message)
+    async def _send_with_subtasks(self, user_message: str) -> str:
+        """Decompose user message into subtasks and execute each.
+        Each subtask runs in its own mini loop with scoped context.
+        Results are collected and a combined response is appended to
+        the main chat history.
+        Args:
+            user_message: The user's message text.
+        Returns:
+            Combined response from all subtasks.
+        """
+        assert self._assembler is not None
+        project_memory = self._memory.load() if self._memory else ""
+        # Decompose via planner
+        planner = Planner(self._client, haiku_model=self._haiku_model)
+        console.print("\n[bold]Decomposing into subtasks...[/bold]")
+        subtasks = await planner.plan(user_message, project_memory, self._rate_limiter)
+        console.print(
+            f"[dim]Subtasks: {len(subtasks)} | Max iterations: {self._max_iterations}[/dim]\n"
+        )
+        iters_per_subtask = max(5, self._max_iterations // max(len(subtasks), 1))
+        prior_context = ""
+        subtask_results: list[str] = []
+        for i, subtask in enumerate(subtasks, 1):
+            console.print(
+                Panel(
+                    f"[bold]{subtask.description}[/bold]\n"
+                    f"[dim]Files: {', '.join(subtask.file_paths) or 'auto'}[/dim]",
+                    title=f"[bold cyan]Subtask {i}/{len(subtasks)}[/bold cyan]",
+                    border_style="cyan",
+                )
+            )
+            # Build scoped context for this subtask
+            scoped_code = self._assembler.select_scoped_code(
+                subtask.file_paths,
+                subtask.description,
+                self._index,
+                self._subtask_token_budget,
+            )
+            system_prompt = self._assembler.build_subtask_prompt(
+                subtask_description=subtask.description,
+                project_memory=project_memory,
+                error_patterns=self._error_patterns,
+                relevant_code=scoped_code,
+                prior_context=prior_context,
+            )
+            # Run subtask in a mini loop with fresh messages
+            sub_result = await self._run_subtask_loop(
+                system_prompt=system_prompt,
+                task=subtask.description,
+                max_iterations=iters_per_subtask,
+            )
+            subtask_results.append(sub_result.text)
+            # Update memory after each subtask
+            if self._memory is not None:
+                await self._update_memory_for_subtask(subtask.description, sub_result)
+                self._memory.prune_section("Session Log")
+                project_memory = self._memory.load()
+            prior_context += f"- Subtask {i}: {subtask.description} -> {sub_result.text[:200]}\n"
+        combined = "\n\n".join(subtask_results) if subtask_results else "No subtasks completed."
+        # Append to main chat history as a clean user/assistant pair
+        self._messages.append({"role": "user", "content": user_message})
+        self._messages.append(
+            {"role": "assistant", "content": [{"type": "text", "text": combined}]}
+        )
+        return combined
+    async def _run_subtask_loop(
+        self,
+        system_prompt: str,
+        task: str,
+        max_iterations: int,
+    ) -> SubtaskResult:
+        """Run a focused mini loop for a single subtask.
+        Uses fresh message history to keep each subtask isolated.
+        Args:
+            system_prompt: Scoped system prompt for this subtask.
+            task: The subtask description.
+            max_iterations: Max iterations for this subtask.
+        Returns:
+            SubtaskResult with text, files, and iteration count.
+        """
+        messages: list[dict[str, Any]] = [{"role": "user", "content": task}]
+        iteration = 0
+        files_touched: list[str] = []
+        while iteration < max_iterations:
+            iteration += 1
+            console.print(f"[dim]--- Subtask iteration {iteration} ---[/dim]")
+            if self._rate_limiter is not None:
+                estimated = ContextAssembler.estimate_tokens(system_prompt + task)
+                await self._rate_limiter.wait_if_needed(estimated)
+            response = await self._client.send_message(
+                messages=messages,
+                system=system_prompt,
+                tools=TOOL_DEFINITIONS,
+            )
+            if self._rate_limiter is not None:
+                self._rate_limiter.record(response.input_tokens)
+            console.print(
+                f"[dim]Tokens: {response.input_tokens} in / "
+                f"{response.output_tokens} out | "
+                f"Cost: ${self._client.usage.estimated_cost:.4f}[/dim]"
+            )
+            cost = self._client.usage.estimated_cost
+            if cost > 5.0:
+                console.print(
+                    "[bold red]Warning:[/bold red] Session has exceeded $5.00 in API costs"
+                )
+            elif cost > 1.0:
+                console.print(
+                    "[bold yellow]Warning:[/bold yellow] Session has exceeded $1.00 in API costs"
+                )
+            has_tool_use = any(block.get("type") == "tool_use" for block in response.content)
+            if not has_tool_use:
+                text = ""
+                for block in response.content:
+                    if block.get("type") == "text":
+                        text = str(block.get("text", ""))
+                        break
+                return SubtaskResult(text=text, files_touched=files_touched, iterations=iteration)
+            assistant_content = response.content
+            messages.append({"role": "assistant", "content": assistant_content})
+            tool_results: list[dict[str, Any]] = []
+            for block in assistant_content:
+                if block.get("type") == "tool_use":
+                    tool_name = block["name"]
+                    tool_input = block["input"]
+                    tool_id = block["id"]
+                    summary = _summarize_input(tool_input)
+                    console.print(f"  [cyan]Tool:[/cyan] {tool_name}({summary})")
+                    result, modified = await execute_tool(
+                        tool_name,
+                        tool_input,
+                        self._project_dir,
+                        self._safety,
+                        self._dry_run,
+                    )
+                    if modified:
+                        self._files_modified = True
+                        if tool_name == "write_file" and "path" in tool_input:
+                            files_touched.append(tool_input["path"])
+                    if result.success:
+                        console.print(f"  [green]OK[/green] ({len(result.output)} chars)")
+                    else:
+                        console.print(f"  [red]Error:[/red] {result.error}")
+                    content = result.output if result.success else f"Error: {result.error}"
+                    tool_results.append(
+                        {
+                            "type": "tool_result",
+                            "tool_use_id": tool_id,
+                            "content": content,
+                            "is_error": not result.success,
+                        }
+                    )
+            messages.append({"role": "user", "content": tool_results})
+        return SubtaskResult(
+            text="Subtask did not complete within iteration limit.",
+            files_touched=files_touched,
+            iterations=iteration,
+        )
+    async def _send_direct(self, user_message: str) -> str:
+        """Process a user turn directly without subtask decomposition.
+        Fallback mode when planner is unavailable or fails.
+        Args:
+            user_message: The user's message text.
+        Returns:
+            The assistant's final text response.
+        """
         self._messages.append({"role": "user", "content": user_message})
         iterations = 0
         final_text = ""
+        turn_files: list[str] = []
         while iterations < self._max_iterations:
             iterations += 1
+            if self._rate_limiter is not None:
+                estimated = ContextAssembler.estimate_tokens(self._system_prompt + user_message)
+                await self._rate_limiter.wait_if_needed(estimated)
             response = await self._client.send_message(
                 messages=self._messages,
                 system=self._system_prompt,
                 tools=TOOL_DEFINITIONS,
             )
-            # Show token usage
+            if self._rate_limiter is not None:
+                self._rate_limiter.record(response.input_tokens)
             console.print(
                 f"[dim]Tokens: {response.input_tokens} in / "
                 f"{response.output_tokens} out | "
                 f"Cost: ${self._client.usage.estimated_cost:.4f}[/dim]"
             )
-            # Cost warnings
             cost = self._client.usage.estimated_cost
             if cost > 5.0:
                 console.print(
@@ -896,14 +1148,12 @@ class ChatSession:
             has_tool_use = any(block.get("type") == "tool_use" for block in response.content)
             if not has_tool_use:
-                # Extract final text
                 for block in response.content:
                     if block.get("type") == "text":
                         final_text = block.get("text", "")
                 self._messages.append({"role": "assistant", "content": response.content})
                 break
-            # Execute tool calls
             assistant_content = response.content
             self._messages.append({"role": "assistant", "content": assistant_content})
@@ -926,6 +1176,8 @@ class ChatSession:
                     )
                     if modified:
                         self._files_modified = True
+                        if tool_name == "write_file" and "path" in tool_input:
+                            turn_files.append(tool_input["path"])
                     if result.success:
                         console.print(f"  [green]OK[/green] ({len(result.output)} chars)")
@@ -944,8 +1196,106 @@ class ChatSession:
             self._messages.append({"role": "user", "content": tool_results})
+        # Update memory after turns that modified files
+        if turn_files and self._memory is not None:
+            await self._update_memory_for_direct(user_message, final_text, turn_files)
         return final_text
+    async def _update_memory_for_subtask(
+        self, subtask_description: str, result: SubtaskResult
+    ) -> None:
+        """Summarize a completed subtask and append to Session Log.
+        Args:
+            subtask_description: What the subtask was supposed to do.
+            result: The subtask result.
+        """
+        if self._memory is None:
+            return
+        truncated = result.text[:500]
+        files_str = ", ".join(result.files_touched[:10]) or "none"
+        user_msg = (
+            f"Subtask: {subtask_description}\nResult: {truncated}\nFiles modified: {files_str}"
+        )
+        try:
+            if self._rate_limiter is not None:
+                estimated = ContextAssembler.estimate_tokens(user_msg)
+                await self._rate_limiter.wait_if_needed(estimated)
+            resp = await self._client.send_message(
+                messages=[{"role": "user", "content": user_msg}],
+                system=_MEMORY_SUMMARY_PROMPT,
+                model=self._haiku_model,
+                max_tokens=256,
+            )
+            if self._rate_limiter is not None:
+                self._rate_limiter.record(resp.input_tokens)
+            summary = ""
+            for block in resp.content:
+                if block.get("type") == "text":
+                    summary = block.get("text", "").strip()
+                    break
+            if summary:
+                today = datetime.now(tz=UTC).strftime("%Y-%m-%d")
+                self._memory.append("Session Log", f"- [{today}] {summary}")
+        except Exception as exc:
+            console.print(f"[yellow]Memory update skipped:[/yellow] {exc}")
+    async def _update_memory_for_direct(
+        self, user_message: str, response_text: str, files: list[str]
+    ) -> None:
+        """Summarize a direct-mode turn and append to Session Log.
+        Args:
+            user_message: The user's input for this turn.
+            response_text: The assistant's final response.
+            files: Files written during this turn.
+        """
+        if self._memory is None:
+            return
+        truncated_response = response_text[:300]
+        files_str = ", ".join(files[:10]) or "none"
+        user_msg = (
+            f"User asked: {user_message[:200]}\n"
+            f"Result: {truncated_response}\n"
+            f"Files modified: {files_str}"
+        )
+        try:
+            if self._rate_limiter is not None:
+                estimated = ContextAssembler.estimate_tokens(user_msg)
+                await self._rate_limiter.wait_if_needed(estimated)
+            resp = await self._client.send_message(
+                messages=[{"role": "user", "content": user_msg}],
+                system=_MEMORY_SUMMARY_PROMPT,
+                model=self._haiku_model,
+                max_tokens=256,
+            )
+            if self._rate_limiter is not None:
+                self._rate_limiter.record(resp.input_tokens)
+            summary = ""
+            for block in resp.content:
+                if block.get("type") == "text":
+                    summary = block.get("text", "").strip()
+                    break
+            if summary:
+                today = datetime.now(tz=UTC).strftime("%Y-%m-%d")
+                self._memory.append("Session Log", f"- [{today}] {summary}")
+                self._memory.prune_section("Session Log")
+        except Exception as exc:
+            console.print(f"[yellow]Memory update skipped:[/yellow] {exc}")
 async def run_task(task: str, config: NexConfig) -> None:
     """Entry point called by the CLI to run a task.

{nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/api_client.py RENAMED Viewed

@@ -122,6 +122,30 @@ class RateLimiter:
                 return
+def _extract_retry_after(exc: Exception, default: float = 60.0) -> float:
+    """Extract retry-after seconds from an Anthropic API error.
+    Inspects the exception's response headers for a ``retry-after`` value.
+    Falls back to *default* if the header is missing or unparseable.
+    Args:
+        exc: The exception raised by the Anthropic SDK.
+        default: Fallback wait time in seconds.
+    Returns:
+        Number of seconds to wait before retrying.
+    """
+    resp = getattr(exc, "response", None)
+    if resp is not None:
+        header = getattr(resp, "headers", {}).get("retry-after")
+        if header:
+            try:
+                return max(float(header), 1.0)
+            except (ValueError, TypeError):
+                pass
+    return default
 class AnthropicClient:
     """Async wrapper around the Anthropic API.
@@ -140,7 +164,7 @@ class AnthropicClient:
         self,
         api_key: str,
         default_model: str = "claude-sonnet-4-20250514",
-        max_retries: int = 3,
+        max_retries: int = 5,
     ) -> None:
         """Initialize the Anthropic client.
@@ -196,7 +220,9 @@ class AnthropicClient:
             kwargs["tools"] = tools
         last_error: Exception | None = None
-        for attempt in range(self._max_retries + 1):
+        # 429 rate limits need more retries with longer waits than server errors
+        max_attempts = self._max_retries + 1
+        for attempt in range(max_attempts):
             try:
                 response = await self._client.messages.create(**kwargs)
@@ -230,16 +256,21 @@ class AnthropicClient:
             except Exception as exc:
                 last_error = exc
                 status_code = getattr(exc, "status_code", None)
+                is_rate_limit = status_code == 429
+                is_retryable = status_code in (500, 502, 503, 529)
+                if (is_rate_limit or is_retryable) and attempt < max_attempts - 1:
+                    if is_rate_limit:
+                        # Rate limits: extract retry-after from response headers,
+                        # or default to 60s (the full rate-limit window).
+                        wait = _extract_retry_after(exc, default=60.0)
+                    else:
+                        # Server errors: short exponential backoff
+                        wait = float(2**attempt)
-                # Retry on rate limit or server errors
-                if status_code in (429, 500, 502, 503, 529) and attempt < self._max_retries:
-                    wait = 2**attempt
-                    retry_after = getattr(exc, "retry_after", None)
-                    if retry_after:
-                        wait = max(wait, float(retry_after))
                     console.print(
-                        f"[yellow]API error {status_code}, retrying in {wait}s "
-                        f"(attempt {attempt + 1}/{self._max_retries})...[/yellow]"
+                        f"[yellow]API error {status_code}, retrying in {wait:.0f}s "
+                        f"(attempt {attempt + 1}/{max_attempts - 1})...[/yellow]"
                     )
                     await asyncio.sleep(wait)
                     continue
@@ -250,7 +281,7 @@ class AnthropicClient:
                 ) from exc
         raise APIError(
-            f"Failed after {self._max_retries} retries: {last_error}",
+            f"Failed after {max_attempts - 1} retries: {last_error}",
         )
     async def close(self) -> None:

{nexcoder-0.1.4 → nexcoder-0.1.6}/src/nex/cli.py RENAMED Viewed

@@ -438,7 +438,7 @@ async def _run_chat(config: NexConfig) -> None:
         config: Nex configuration.
     """
     from nex.agent import ChatSession
-    from nex.api_client import AnthropicClient
+    from nex.api_client import AnthropicClient, RateLimiter
     from nex.context import ContextAssembler
     from nex.indexer.index import IndexBuilder
     from nex.memory.errors import ErrorPatternDB
@@ -464,6 +464,7 @@ async def _run_chat(config: NexConfig) -> None:
     client = AnthropicClient(api_key=config.api_key, default_model=config.model)
     safety = SafetyLayer(dry_run=config.dry_run)
+    rate_limiter = RateLimiter(tokens_per_minute=config.token_rate_limit)
     session = ChatSession(
         api_client=client,
@@ -472,6 +473,13 @@ async def _run_chat(config: NexConfig) -> None:
         safety=safety,
         dry_run=config.dry_run,
         max_iterations=config.max_iterations,
+        rate_limiter=rate_limiter,
+        memory=memory,
+        haiku_model=config.haiku_model,
+        assembler=assembler,
+        error_patterns=error_patterns,
+        index=idx,
+        subtask_token_budget=config.subtask_token_budget,
     )
     try: