nexcoder 0.1.0__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {nexcoder-0.1.0 → nexcoder-0.1.2}/.gitignore +3 -0
  2. {nexcoder-0.1.0 → nexcoder-0.1.2}/CHANGELOG.md +15 -0
  3. {nexcoder-0.1.0 → nexcoder-0.1.2}/PKG-INFO +1 -1
  4. {nexcoder-0.1.0 → nexcoder-0.1.2}/pyproject.toml +1 -1
  5. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/agent.py +363 -2
  6. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/api_client.py +65 -1
  7. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/cli.py +1 -0
  8. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/config.py +12 -0
  9. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/context.py +136 -0
  10. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/memory/project.py +49 -0
  11. nexcoder-0.1.2/tests/test_agent.py +350 -0
  12. {nexcoder-0.1.0 → nexcoder-0.1.2}/tests/test_cli.py +2 -1
  13. nexcoder-0.1.2/tests/test_memory.py +91 -0
  14. nexcoder-0.1.2/tests/test_rate_limiter.py +92 -0
  15. nexcoder-0.1.0/CLAUDE.md +0 -268
  16. nexcoder-0.1.0/tests/test_agent.py +0 -133
  17. nexcoder-0.1.0/tests/test_memory.py +0 -52
  18. {nexcoder-0.1.0 → nexcoder-0.1.2}/.github/workflows/ci.yml +0 -0
  19. {nexcoder-0.1.0 → nexcoder-0.1.2}/.github/workflows/publish.yml +0 -0
  20. {nexcoder-0.1.0 → nexcoder-0.1.2}/LICENSE +0 -0
  21. {nexcoder-0.1.0 → nexcoder-0.1.2}/README.md +0 -0
  22. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/__init__.py +0 -0
  23. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/exceptions.py +0 -0
  24. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/indexer/__init__.py +0 -0
  25. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/indexer/index.py +0 -0
  26. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/indexer/parser.py +0 -0
  27. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/indexer/scanner.py +0 -0
  28. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/memory/__init__.py +0 -0
  29. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/memory/decisions.py +0 -0
  30. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/memory/errors.py +0 -0
  31. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/planner.py +0 -0
  32. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/py.typed +0 -0
  33. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/reviewer.py +0 -0
  34. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/safety.py +0 -0
  35. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/test_runner.py +0 -0
  36. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/tools/__init__.py +0 -0
  37. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/tools/file_ops.py +0 -0
  38. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/tools/git_ops.py +0 -0
  39. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/tools/search.py +0 -0
  40. {nexcoder-0.1.0 → nexcoder-0.1.2}/src/nex/tools/shell.py +0 -0
  41. {nexcoder-0.1.0 → nexcoder-0.1.2}/tests/conftest.py +0 -0
  42. {nexcoder-0.1.0 → nexcoder-0.1.2}/tests/test_chat.py +0 -0
  43. {nexcoder-0.1.0 → nexcoder-0.1.2}/tests/test_context.py +0 -0
  44. {nexcoder-0.1.0 → nexcoder-0.1.2}/tests/test_errors.py +0 -0
  45. {nexcoder-0.1.0 → nexcoder-0.1.2}/tests/test_indexer.py +0 -0
  46. {nexcoder-0.1.0 → nexcoder-0.1.2}/tests/test_safety.py +0 -0
  47. {nexcoder-0.1.0 → nexcoder-0.1.2}/tests/test_test_runner.py +0 -0
  48. {nexcoder-0.1.0 → nexcoder-0.1.2}/tests/test_tools.py +0 -0
@@ -41,3 +41,6 @@ htmlcov/
41
41
  # OS
42
42
  .DS_Store
43
43
  Thumbs.db
44
+
45
+ # Claude
46
+ CLAUDE.md
@@ -5,6 +5,21 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [0.1.1] — 2026-02-25
9
+
10
+ ### Changed
11
+
12
+ - Agent now always decomposes tasks into subtasks via the planner, regardless of rate limit settings
13
+ - `_run_single()` is now a fallback path, used only when the planner fails
14
+ - `_run_subtask_loop()` returns `SubtaskResult` (with text, files_touched, iterations) instead of a bare string
15
+
16
+ ### Added
17
+
18
+ - Memory updates after every subtask — Haiku summarizes what was done and appends to `## Session Log` in `.nex/memory.md`
19
+ - Mid-subtask memory checkpoint at iteration 8 for long-running subtasks (no API call, lightweight progress note)
20
+ - `ProjectMemory.prune_section()` to prevent Session Log from growing unbounded (trims oldest entries beyond 30 lines)
21
+ - Tests for planner failure fallback, memory updates after subtasks, always-subtask behavior, and section pruning
22
+
8
23
  ## [0.1.0] — 2026-02-24
9
24
 
10
25
  ### Added
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nexcoder
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: The coding agent that remembers — AI coding assistant with persistent memory and error learning.
5
5
  Project-URL: Homepage, https://github.com/nex-ai/nex-ai
6
6
  Project-URL: Repository, https://github.com/nex-ai/nex-ai
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "nexcoder"
7
- version = "0.1.0"
7
+ version = "0.1.2"
8
8
  description = "The coding agent that remembers — AI coding assistant with persistent memory and error learning."
9
9
  readme = {file = "README.md", content-type = "text/markdown"}
10
10
  license = "MIT"
@@ -12,7 +12,8 @@ Follows the agentic REPL pattern:
12
12
 
13
13
  from __future__ import annotations
14
14
 
15
- from dataclasses import dataclass
15
+ from dataclasses import dataclass, field
16
+ from datetime import UTC, datetime
16
17
  from pathlib import Path
17
18
  from typing import Any
18
19
 
@@ -22,12 +23,13 @@ from rich.panel import Panel
22
23
  from rich.prompt import Prompt
23
24
  from rich.syntax import Syntax
24
25
 
25
- from nex.api_client import AnthropicClient
26
+ from nex.api_client import AnthropicClient, RateLimiter
26
27
  from nex.config import NexConfig
27
28
  from nex.context import ContextAssembler
28
29
  from nex.exceptions import NexError, SafetyError, ToolError
29
30
  from nex.memory.errors import ErrorPatternDB
30
31
  from nex.memory.project import ProjectMemory
32
+ from nex.planner import Planner
31
33
  from nex.safety import SafetyLayer
32
34
  from nex.test_runner import TestRunner
33
35
  from nex.tools import TOOL_DEFINITIONS, ToolResult
@@ -56,6 +58,30 @@ class AgentConfig:
56
58
  max_iterations: int = 25
57
59
 
58
60
 
61
+ @dataclass
62
+ class SubtaskResult:
63
+ """Result of a single subtask execution.
64
+
65
+ Attributes:
66
+ text: The final text response from the subtask.
67
+ files_touched: Paths of files written during the subtask.
68
+ iterations: Number of agent iterations used.
69
+ """
70
+
71
+ text: str
72
+ files_touched: list[str] = field(default_factory=list)
73
+ iterations: int = 0
74
+
75
+
76
+ _MID_SUBTASK_MEMORY_THRESHOLD: int = 8
77
+
78
+ _MEMORY_SUMMARY_PROMPT: str = """\
79
+ You are summarizing what a coding agent just did. Given the subtask and result, \
80
+ write a 1-2 sentence summary. Include what was accomplished and key files modified. \
81
+ Be extremely concise. No markdown. Just 1-2 plain sentences.\
82
+ """
83
+
84
+
59
85
  async def execute_tool(
60
86
  name: str,
61
87
  tool_input: dict[str, Any],
@@ -224,6 +250,25 @@ class Agent:
224
250
  async def run(self) -> str:
225
251
  """Execute the agent loop and return the final response.
226
252
 
253
+ Always decomposes the task into subtasks. Falls back to the
254
+ standard single-loop execution only if planning fails.
255
+
256
+ Returns:
257
+ The agent's final text response.
258
+ """
259
+ rate_limit = 0
260
+ if self._nex_config:
261
+ rate_limit = self._nex_config.token_rate_limit
262
+
263
+ try:
264
+ return await self._run_with_subtasks(rate_limit)
265
+ except Exception as exc:
266
+ console.print(f"[yellow]Subtask decomposition failed ({exc}), falling back...[/yellow]")
267
+ return await self._run_single()
268
+
269
+ async def _run_single(self) -> str:
270
+ """Execute the standard agent loop (no rate limiting).
271
+
227
272
  Returns:
228
273
  The agent's final text response.
229
274
  """
@@ -361,6 +406,322 @@ class Agent:
361
406
 
362
407
  return final_response
363
408
 
409
+ async def _run_with_subtasks(self, token_rate_limit: int) -> str:
410
+ """Execute the task via planner decomposition with memory updates.
411
+
412
+ Always decomposes the task into subtasks, builds scoped context for
413
+ each, updates project memory after every subtask, and optionally
414
+ paces API calls when a token rate limit is set.
415
+
416
+ Args:
417
+ token_rate_limit: Max input tokens per minute (0 = no limit).
418
+
419
+ Returns:
420
+ The combined final response.
421
+ """
422
+ memory = ProjectMemory(self._project_dir)
423
+ error_db = ErrorPatternDB(self._project_dir)
424
+ assembler = ContextAssembler(self._project_dir)
425
+
426
+ project_memory = memory.load()
427
+ error_patterns = error_db.find_similar(task_summary=self._config.task)
428
+
429
+ # Load index
430
+ from nex.indexer.index import IndexBuilder
431
+
432
+ builder = IndexBuilder(self._project_dir)
433
+ index = builder.load()
434
+
435
+ # Decompose task via planner
436
+ haiku_model = "claude-haiku-4-5-20251001"
437
+ if self._nex_config:
438
+ haiku_model = self._nex_config.haiku_model
439
+
440
+ planner = Planner(self._client, haiku_model=haiku_model)
441
+
442
+ console.print("\n[bold]Decomposing task into subtasks...[/bold]")
443
+ subtasks = await planner.plan(self._config.task, project_memory)
444
+
445
+ console.print(f"\n[bold]Running task:[/bold] {self._config.task}")
446
+ rate_info = f"Rate limit: {token_rate_limit} tokens/min | " if token_rate_limit else ""
447
+ console.print(
448
+ f"[dim]Subtasks: {len(subtasks)} | "
449
+ f"{rate_info}"
450
+ f"Max iterations: {self._config.max_iterations}[/dim]\n"
451
+ )
452
+
453
+ rate_limiter = RateLimiter(tokens_per_minute=token_rate_limit)
454
+ budget = 20_000
455
+ if self._nex_config:
456
+ budget = self._nex_config.subtask_token_budget
457
+
458
+ # Split iteration budget across subtasks (min 5 each)
459
+ iters_per_subtask = max(5, self._config.max_iterations // max(len(subtasks), 1))
460
+
461
+ prior_context = ""
462
+ subtask_results: list[str] = []
463
+
464
+ try:
465
+ for i, subtask in enumerate(subtasks, 1):
466
+ console.print(
467
+ Panel(
468
+ f"[bold]{subtask.description}[/bold]\n"
469
+ f"[dim]Files: {', '.join(subtask.file_paths) or 'auto'}[/dim]",
470
+ title=f"[bold cyan]Subtask {i}/{len(subtasks)}[/bold cyan]",
471
+ border_style="cyan",
472
+ )
473
+ )
474
+
475
+ # Build scoped context
476
+ scoped_code = assembler.select_scoped_code(
477
+ subtask.file_paths, subtask.description, index, budget
478
+ )
479
+
480
+ system_prompt = assembler.build_subtask_prompt(
481
+ subtask_description=subtask.description,
482
+ project_memory=project_memory,
483
+ error_patterns=error_patterns,
484
+ relevant_code=scoped_code,
485
+ prior_context=prior_context,
486
+ )
487
+
488
+ sub_result = await self._run_subtask_loop(
489
+ system_prompt=system_prompt,
490
+ task=subtask.description,
491
+ max_iterations=iters_per_subtask,
492
+ rate_limiter=rate_limiter,
493
+ memory=memory,
494
+ )
495
+
496
+ subtask_results.append(sub_result.text)
497
+
498
+ # Update memory after each subtask
499
+ await self._generate_memory_update(
500
+ memory, subtask.description, sub_result, haiku_model
501
+ )
502
+ memory.prune_section("Session Log")
503
+ project_memory = memory.load()
504
+
505
+ # Build prior context for the next subtask (keep it compact)
506
+ prior_context += (
507
+ f"- Subtask {i}: {subtask.description} -> {sub_result.text[:200]}\n"
508
+ )
509
+
510
+ except KeyboardInterrupt:
511
+ console.print("\n[yellow]Agent interrupted by user.[/yellow]")
512
+ finally:
513
+ error_db.close()
514
+
515
+ final_response = (
516
+ "\n\n".join(subtask_results) if subtask_results else "No subtasks completed."
517
+ )
518
+
519
+ console.print()
520
+ console.print(
521
+ Panel(
522
+ Markdown(final_response),
523
+ title="[bold green]All Subtasks Complete[/bold green]",
524
+ border_style="green",
525
+ )
526
+ )
527
+
528
+ # Post-task: run tests, show diff, and offer to commit
529
+ if self._files_modified:
530
+ tests_passed = await self._run_tests()
531
+ if not tests_passed:
532
+ console.print(
533
+ "[yellow]Tests failed. Review the changes before committing.[/yellow]"
534
+ )
535
+ await self._post_task_git()
536
+
537
+ return final_response
538
+
539
+ async def _run_subtask_loop(
540
+ self,
541
+ system_prompt: str,
542
+ task: str,
543
+ max_iterations: int,
544
+ rate_limiter: RateLimiter,
545
+ memory: ProjectMemory | None = None,
546
+ ) -> SubtaskResult:
547
+ """Run a focused mini agent loop for a single subtask.
548
+
549
+ Uses fresh message history to prevent token accumulation. Paces
550
+ API calls via the rate limiter. Tracks files touched and writes
551
+ a mid-subtask memory checkpoint for long-running subtasks.
552
+
553
+ Args:
554
+ system_prompt: Scoped system prompt for this subtask.
555
+ task: The subtask description.
556
+ max_iterations: Max tool call iterations for this subtask.
557
+ rate_limiter: Rate limiter to pace API calls.
558
+ memory: Project memory for mid-subtask checkpoints.
559
+
560
+ Returns:
561
+ A SubtaskResult with the response text, files touched, and
562
+ iteration count.
563
+ """
564
+ messages: list[dict[str, Any]] = [{"role": "user", "content": task}]
565
+ iteration = 0
566
+ files_touched: list[str] = []
567
+
568
+ while iteration < max_iterations:
569
+ iteration += 1
570
+ console.print(f"[dim]--- Subtask iteration {iteration} ---[/dim]")
571
+
572
+ # Mid-subtask memory checkpoint for long-running subtasks
573
+ if iteration == _MID_SUBTASK_MEMORY_THRESHOLD and memory is not None:
574
+ self._generate_mid_subtask_memory_update(memory, task, iteration, files_touched)
575
+
576
+ # Estimate tokens and wait if needed
577
+ estimated = ContextAssembler.estimate_tokens(system_prompt + task)
578
+ await rate_limiter.wait_if_needed(estimated)
579
+
580
+ response = await self._client.send_message(
581
+ messages=messages,
582
+ system=system_prompt,
583
+ tools=TOOL_DEFINITIONS,
584
+ )
585
+
586
+ # Record actual tokens
587
+ rate_limiter.record(response.input_tokens)
588
+
589
+ # Show token usage
590
+ console.print(
591
+ f"[dim]Tokens: {response.input_tokens} in / "
592
+ f"{response.output_tokens} out | "
593
+ f"Cost: ${self._client.usage.estimated_cost:.4f}[/dim]"
594
+ )
595
+
596
+ has_tool_use = any(block.get("type") == "tool_use" for block in response.content)
597
+
598
+ if not has_tool_use:
599
+ text = ""
600
+ for block in response.content:
601
+ if block.get("type") == "text":
602
+ text = str(block.get("text", ""))
603
+ break
604
+ return SubtaskResult(text=text, files_touched=files_touched, iterations=iteration)
605
+
606
+ # Execute tool calls
607
+ assistant_content = response.content
608
+ messages.append({"role": "assistant", "content": assistant_content})
609
+
610
+ tool_results: list[dict[str, Any]] = []
611
+ for block in assistant_content:
612
+ if block.get("type") == "tool_use":
613
+ tool_name = block["name"]
614
+ tool_input = block["input"]
615
+ tool_id = block["id"]
616
+
617
+ summary = _summarize_input(tool_input)
618
+ console.print(f" [cyan]Tool:[/cyan] {tool_name}({summary})")
619
+
620
+ result, modified = await execute_tool(
621
+ tool_name,
622
+ tool_input,
623
+ self._project_dir,
624
+ self._safety,
625
+ self._config.dry_run,
626
+ )
627
+ if modified:
628
+ self._files_modified = True
629
+ # Track written files
630
+ if tool_name == "write_file" and "path" in tool_input:
631
+ files_touched.append(tool_input["path"])
632
+
633
+ if result.success:
634
+ console.print(f" [green]OK[/green] ({len(result.output)} chars)")
635
+ else:
636
+ console.print(f" [red]Error:[/red] {result.error}")
637
+
638
+ content = result.output if result.success else f"Error: {result.error}"
639
+ tool_results.append(
640
+ {
641
+ "type": "tool_result",
642
+ "tool_use_id": tool_id,
643
+ "content": content,
644
+ "is_error": not result.success,
645
+ }
646
+ )
647
+
648
+ messages.append({"role": "user", "content": tool_results})
649
+
650
+ return SubtaskResult(
651
+ text="Subtask did not complete within iteration limit.",
652
+ files_touched=files_touched,
653
+ iterations=iteration,
654
+ )
655
+
656
+ async def _generate_memory_update(
657
+ self,
658
+ memory: ProjectMemory,
659
+ subtask_description: str,
660
+ result: SubtaskResult,
661
+ haiku_model: str,
662
+ ) -> None:
663
+ """Summarize a completed subtask and append to Session Log.
664
+
665
+ Uses Haiku to produce a concise 1-2 sentence summary of what
666
+ the subtask accomplished and which files were modified.
667
+
668
+ Args:
669
+ memory: Project memory instance.
670
+ subtask_description: What the subtask was supposed to do.
671
+ result: The subtask result with text, files, and iterations.
672
+ haiku_model: Model ID for the summary call.
673
+ """
674
+ truncated = result.text[:500]
675
+ files_str = ", ".join(result.files_touched[:10]) or "none"
676
+ user_msg = (
677
+ f"Subtask: {subtask_description}\nResult: {truncated}\nFiles modified: {files_str}"
678
+ )
679
+
680
+ try:
681
+ response = await self._client.send_message(
682
+ messages=[{"role": "user", "content": user_msg}],
683
+ system=_MEMORY_SUMMARY_PROMPT,
684
+ model=haiku_model,
685
+ max_tokens=256,
686
+ )
687
+ summary = ""
688
+ for block in response.content:
689
+ if block.get("type") == "text":
690
+ summary = block.get("text", "").strip()
691
+ break
692
+
693
+ if summary:
694
+ today = datetime.now(tz=UTC).strftime("%Y-%m-%d")
695
+ memory.append("Session Log", f"- [{today}] {summary}")
696
+ except Exception:
697
+ # Memory updates are best-effort; never fail the main task
698
+ pass
699
+
700
+ def _generate_mid_subtask_memory_update(
701
+ self,
702
+ memory: ProjectMemory,
703
+ task: str,
704
+ iteration: int,
705
+ files_touched: list[str],
706
+ ) -> None:
707
+ """Write a lightweight progress checkpoint to Session Log.
708
+
709
+ Called when a subtask exceeds ``_MID_SUBTASK_MEMORY_THRESHOLD``
710
+ iterations, without making an additional API call.
711
+
712
+ Args:
713
+ memory: Project memory instance.
714
+ task: The subtask description.
715
+ iteration: Current iteration number.
716
+ files_touched: Files written so far.
717
+ """
718
+ files_str = ", ".join(files_touched[:10]) or "none"
719
+ note = f"- [In progress] {task} ({iteration} iterations, files: {files_str})"
720
+ try:
721
+ memory.append("Session Log", note)
722
+ except Exception:
723
+ pass
724
+
364
725
  async def _run_tests(self) -> bool:
365
726
  """Detect and run the project's test suite.
366
727
 
@@ -3,7 +3,8 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import asyncio
6
- from dataclasses import dataclass
6
+ import time
7
+ from dataclasses import dataclass, field
7
8
  from typing import Any
8
9
 
9
10
  from rich.console import Console
@@ -58,6 +59,69 @@ class TokenUsage:
58
59
  return input_cost + output_cost
59
60
 
60
61
 
62
+ @dataclass
63
+ class RateLimiter:
64
+ """Token-bucket rate limiter using a 60-second sliding window.
65
+
66
+ Tracks timestamps and token counts of recent API calls. Before each call,
67
+ checks whether the next request would exceed the configured token limit
68
+ and sleeps if necessary.
69
+
70
+ Attributes:
71
+ tokens_per_minute: Maximum input tokens allowed per 60-second window.
72
+ Set to 0 to disable rate limiting.
73
+ """
74
+
75
+ tokens_per_minute: int = 0
76
+ _entries: list[tuple[float, int]] = field(default_factory=list, repr=False)
77
+
78
+ @property
79
+ def is_enabled(self) -> bool:
80
+ """Return True if rate limiting is active."""
81
+ return self.tokens_per_minute > 0
82
+
83
+ def record(self, input_tokens: int) -> None:
84
+ """Record a completed API call's actual input token count.
85
+
86
+ Args:
87
+ input_tokens: Actual input tokens consumed by the call.
88
+ """
89
+ self._entries.append((time.monotonic(), input_tokens))
90
+
91
+ def tokens_in_window(self) -> int:
92
+ """Return total input tokens consumed in the last 60 seconds."""
93
+ cutoff = time.monotonic() - 60.0
94
+ self._entries = [(t, n) for t, n in self._entries if t > cutoff]
95
+ return sum(n for _, n in self._entries)
96
+
97
+ async def wait_if_needed(self, estimated_tokens: int) -> None:
98
+ """Sleep if the next call would exceed the rate limit.
99
+
100
+ Args:
101
+ estimated_tokens: Estimated input tokens for the upcoming call.
102
+ """
103
+ if not self.is_enabled:
104
+ return
105
+
106
+ while True:
107
+ used = self.tokens_in_window()
108
+ if used + estimated_tokens <= self.tokens_per_minute:
109
+ return
110
+
111
+ # Find the oldest entry and wait until it expires
112
+ if self._entries:
113
+ oldest_time = self._entries[0][0]
114
+ wait = oldest_time + 60.0 - time.monotonic() + 0.1
115
+ if wait > 0:
116
+ console.print(
117
+ f"[yellow]Rate limit: {used} tokens in window, "
118
+ f"waiting {wait:.1f}s...[/yellow]"
119
+ )
120
+ await asyncio.sleep(wait)
121
+ else:
122
+ return
123
+
124
+
61
125
  class AnthropicClient:
62
126
  """Async wrapper around the Anthropic API.
63
127
 
@@ -59,6 +59,7 @@ _CONFIG_TEMPLATE = """\
59
59
  # model = "claude-sonnet-4-20250514"
60
60
  # max_iterations = 25
61
61
  # dry_run = false
62
+ token_rate_limit = 25000
62
63
  """
63
64
 
64
65
 
@@ -38,6 +38,8 @@ class NexConfig:
38
38
  nex_dir: Path to the .nex directory (relative to project root).
39
39
  test_command: Override for auto-detected test command (empty = auto-detect).
40
40
  test_timeout: Maximum seconds to wait for test suite to complete.
41
+ token_rate_limit: Max input tokens per minute. 0 = no rate limiting.
42
+ subtask_token_budget: Max tokens per subtask context window.
41
43
  """
42
44
 
43
45
  project_dir: Path = field(default_factory=Path.cwd)
@@ -50,6 +52,8 @@ class NexConfig:
50
52
  nex_dir: Path = field(default_factory=lambda: Path(".nex"))
51
53
  test_command: str = ""
52
54
  test_timeout: int = 120
55
+ token_rate_limit: int = 25_000
56
+ subtask_token_budget: int = 20_000
53
57
 
54
58
 
55
59
  def load_config(project_dir: Path) -> NexConfig:
@@ -148,6 +152,10 @@ def _apply_toml(config: NexConfig, settings: dict[str, Any]) -> None:
148
152
  config.test_command = str(settings["test_command"])
149
153
  if "test_timeout" in settings:
150
154
  config.test_timeout = int(settings["test_timeout"])
155
+ if "token_rate_limit" in settings:
156
+ config.token_rate_limit = int(settings["token_rate_limit"])
157
+ if "subtask_token_budget" in settings:
158
+ config.subtask_token_budget = int(settings["subtask_token_budget"])
151
159
 
152
160
 
153
161
  def _apply_env(config: NexConfig) -> None:
@@ -166,3 +174,7 @@ def _apply_env(config: NexConfig) -> None:
166
174
  config.test_command = test_cmd
167
175
  if test_timeout := os.environ.get("NEX_TEST_TIMEOUT"):
168
176
  config.test_timeout = int(test_timeout)
177
+ if token_rate := os.environ.get("NEX_TOKEN_RATE_LIMIT"):
178
+ config.token_rate_limit = int(token_rate)
179
+ if subtask_budget := os.environ.get("NEX_SUBTASK_TOKEN_BUDGET"):
180
+ config.subtask_token_budget = int(subtask_budget)