PyPI - aru-code - Versions diffs - 0.1.0__py3-none-any.whl - Mend

aru-code 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

aru/__init__.py +1 -0
aru/agents/__init__.py +0 -0
aru/agents/base.py +188 -0
aru/agents/executor.py +32 -0
aru/agents/planner.py +85 -0
aru/cli.py +1993 -0
aru/config.py +237 -0
aru/context.py +287 -0
aru/providers.py +433 -0
aru/tools/__init__.py +0 -0
aru/tools/ast_tools.py +422 -0
aru/tools/codebase.py +1328 -0
aru/tools/gitignore.py +109 -0
aru/tools/mcp_client.py +156 -0
aru/tools/ranker.py +220 -0
aru/tools/tasklist.py +183 -0
aru_code-0.1.0.dist-info/METADATA +385 -0
aru_code-0.1.0.dist-info/RECORD +22 -0
aru_code-0.1.0.dist-info/WHEEL +5 -0
aru_code-0.1.0.dist-info/entry_points.txt +2 -0
aru_code-0.1.0.dist-info/licenses/LICENSE +21 -0
aru_code-0.1.0.dist-info/top_level.txt +1 -0

aru/tools/gitignore.py ADDED Viewed

@@ -0,0 +1,109 @@
+"""Gitignore-aware file filtering for codebase operations."""
+import os
+from typing import Iterator
+import pathspec
+def normalize_path(path: str) -> str:
+    """Convert backslashes to forward slashes and remove trailing slashes."""
+    return path.replace("\\", "/").rstrip("/")
+# Hardcoded fallback patterns (always excluded even without .gitignore)
+_FALLBACK_PATTERNS = [
+    ".git",
+    "node_modules",
+    "__pycache__",
+    "venv",
+    ".venv",
+    ".aru",
+    "*.pyc",
+    "*.pyo",
+]
+# Cache: {(root_dir, gitignore_mtime): PathSpec}
+_cache: dict[tuple[str, float], pathspec.PathSpec] = {}
+def _find_git_root(start: str) -> str | None:
+    """Walk up from start directory to find the git root (directory containing .git)."""
+    current = os.path.abspath(start)
+    while True:
+        if os.path.isdir(os.path.join(current, ".git")):
+            return current
+        parent = os.path.dirname(current)
+        if parent == current:
+            return None
+        current = parent
+def load_gitignore(root_dir: str) -> pathspec.PathSpec:
+    """Parse .gitignore from root_dir combined with hardcoded fallback patterns.
+    Results are cached by root_dir and .gitignore mtime.
+    """
+    root_dir = os.path.abspath(root_dir)
+    gitignore_path = os.path.join(root_dir, ".gitignore")
+    mtime = 0.0
+    if os.path.isfile(gitignore_path):
+        mtime = os.path.getmtime(gitignore_path)
+    cache_key = (root_dir, mtime)
+    if cache_key in _cache:
+        return _cache[cache_key]
+    # Clear old entries for this root_dir
+    _cache.pop(next((k for k in _cache if k[0] == root_dir), (None, None)), None)
+    patterns = list(_FALLBACK_PATTERNS)
+    if os.path.isfile(gitignore_path):
+        with open(gitignore_path, "r", encoding="utf-8", errors="ignore") as f:
+            for line in f:
+                line = line.strip()
+                if line and not line.startswith("#"):
+                    patterns.append(line)
+    spec = pathspec.PathSpec.from_lines("gitwildmatch", patterns)
+    _cache[cache_key] = spec
+    return spec
+def is_ignored(path: str, root_dir: str) -> bool:
+    """Check if a relative path should be ignored based on .gitignore rules.
+    Args:
+        path: Relative path to check (forward slashes preferred).
+        root_dir: Project root directory containing .gitignore.
+    """
+    spec = load_gitignore(root_dir)
+    # Normalize to forward slashes for pathspec
+    normalized = path.replace("\\", "/")
+    return spec.match_file(normalized)
+def walk_filtered(directory: str) -> Iterator[tuple[str, list[str], list[str]]]:
+    """Walk directory tree, filtering out gitignored files and directories.
+    Drop-in replacement for os.walk() that respects .gitignore rules.
+    Finds the git root (or uses the directory itself) to load ignore patterns.
+    """
+    directory = os.path.abspath(directory)
+    root_dir = _find_git_root(directory) or directory
+    spec = load_gitignore(root_dir)
+    for dirpath, dirs, files in os.walk(directory):
+        # Filter directories in-place to prevent descending into ignored dirs
+        dirs[:] = [
+            d for d in dirs
+            if not spec.match_file(os.path.relpath(os.path.join(dirpath, d), root_dir).replace("\\", "/") + "/")
+        ]
+        # Filter files
+        filtered_files = [
+            f for f in files
+            if not spec.match_file(os.path.relpath(os.path.join(dirpath, f), root_dir).replace("\\", "/"))
+        ]
+        yield dirpath, dirs, filtered_files

aru/tools/mcp_client.py ADDED Viewed

@@ -0,0 +1,156 @@
+"""Model Context Protocol (MCP) client manager and tool generation."""
+import asyncio
+import json
+import os
+from contextlib import AsyncExitStack
+from agno.tools import Function
+from mcp.client.stdio import stdio_client, StdioServerParameters
+from mcp.client.session import ClientSession
+class McpSessionManager:
+    """Manages MCP server subprocesses and active client sessions."""
+    def __init__(self, config_path: str = "arc.mcp.json"):
+        self.config_path = config_path
+        self._exit_stack = AsyncExitStack()
+        self.sessions: dict[str, ClientSession] = {}
+    async def initialize(self):
+        """Read config and spawn all MCP servers concurrently."""
+        if not os.path.exists(self.config_path):
+            return
+        with open(self.config_path, "r", encoding="utf-8") as f:
+            try:
+                config = json.load(f)
+            except json.JSONDecodeError:
+                print(f"[Warning] Failed to parse {self.config_path}")
+                return
+        servers = config.get("mcpServers", {})
+        tasks = []
+        for name, svr_config in servers.items():
+            cmd = svr_config.get("command")
+            if not cmd:
+                continue
+            tasks.append(self._start_server(name, svr_config))
+        if tasks:
+            await asyncio.gather(*tasks)
+    async def _start_server(self, name: str, svr_config: dict):
+        """Start a single MCP server and register its session."""
+        cmd = svr_config.get("command")
+        args = svr_config.get("args", [])
+        env = svr_config.get("env", None)
+        server_params = StdioServerParameters(
+            command=cmd,
+            args=args,
+            env={**os.environ.copy(), **env} if env else None
+        )
+        try:
+            read_stream, write_stream = await self._exit_stack.enter_async_context(
+                stdio_client(server_params)
+            )
+            session = await self._exit_stack.enter_async_context(
+                ClientSession(read_stream, write_stream)
+            )
+            await session.initialize()
+            self.sessions[name] = session
+        except Exception as e:
+            print(f"[Warning] Failed to start MCP server '{name}': {e}")
+    async def get_tools(self) -> list[Function]:
+        """Fetch all tools from connected servers concurrently and convert to Agno Functions."""
+        async def _fetch(server_name: str, session: ClientSession) -> list[Function]:
+            try:
+                result = await session.list_tools()
+                return [self._create_agno_function(server_name, session, tool) for tool in result.tools]
+            except Exception as e:
+                print(f"[Warning] Failed to fetch tools from MCP server '{server_name}': {e}")
+                return []
+        results = await asyncio.gather(
+            *[_fetch(name, sess) for name, sess in self.sessions.items()]
+        )
+        return [tool for tools in results for tool in tools]
+    def _create_agno_function(self, server_name: str, session: ClientSession, tool) -> Function:
+        """Dynamically create an Agno Function that routes to the remote MCP tool."""
+        # We need to capture 'session' and 'tool.name' cleanly.
+        # Python's default arguments trick captures loop variables.
+        async def mcp_caller(**kwargs) -> str:
+            try:
+                result = await session.call_tool(tool.name, arguments=kwargs)
+                # Parse MCP ToolResultContent
+                output = []
+                for content in result.content:
+                    if hasattr(content, "text"):
+                        output.append(content.text)
+                if result.isError:
+                    return f"Error from {tool.name}: " + "\n".join(output)
+                return "\n".join(output)
+            except Exception as e:
+                return f"Error executing {tool.name} on {server_name}: {e}"
+        # Assign __name__ to the callable for Agno's internal representation
+        safe_name = f"{server_name}__{tool.name}".replace("-", "_")
+        mcp_caller.__name__ = safe_name
+        return Function(
+            name=safe_name,
+            description=f"[{server_name}] {tool.description or ''}",
+            parameters=tool.inputSchema,
+            entrypoint=mcp_caller
+        )
+    async def cleanup(self):
+        """Close all active MCP client sessions and terminate server subprocesses."""
+        try:
+            await self._exit_stack.aclose()
+        except (RuntimeError, Exception):
+            pass
+# Global Singleton manager to be used entirely inside aru's async loops
+_manager: McpSessionManager | None = None
+async def init_mcp() -> list[Function]:
+    """Initialize MCP servers and return the loaded Agno functions."""
+    global _manager
+    if _manager is None:
+        config_path = None
+        for path in [
+            ".aru/mcp_servers.json",
+            "aru.mcp.json",
+            ".mcp.json",
+            "mcp.json"
+        ]:
+            if os.path.exists(path):
+                config_path = path
+                break
+        if config_path:
+            _manager = McpSessionManager(config_path=config_path)
+            await _manager.initialize()
+        else:
+            # Create an empty manager so cleanup doesn't fail, but return no tools
+            _manager = McpSessionManager(config_path="")
+            return []
+    return await _manager.get_tools()
+async def cleanup_mcp():
+    """Cleanup global manager."""
+    global _manager
+    if _manager:
+        await _manager.cleanup()
+        _manager = None

aru/tools/ranker.py ADDED Viewed

@@ -0,0 +1,220 @@
+"""Multi-factor file relevance ranking for task-driven context selection."""
+import fnmatch
+import os
+import re
+from aru.tools.gitignore import walk_filtered
+# Weights for each ranking signal (sum to 1.0)
+WEIGHT_NAME = 0.50
+WEIGHT_STRUCTURAL = 0.30
+WEIGHT_RECENCY = 0.20
+def _get_project_files(root_dir: str) -> list[str]:
+    """Get all project files using gitignore-aware walk."""
+    files = []
+    for dirpath, _, filenames in walk_filtered(root_dir):
+        for filename in filenames:
+            filepath = os.path.join(dirpath, filename)
+            rel_path = os.path.relpath(filepath, root_dir).replace("\\", "/")
+            files.append(rel_path)
+    return files
+def _score_name_match(file_path: str, keywords: list[str]) -> float:
+    """Score based on how many task keywords appear in the file path/name."""
+    if not keywords:
+        return 0.0
+    path_lower = file_path.lower()
+    # Split path into components for matching
+    path_parts = re.split(r"[/\\_.\-]", path_lower)
+    matches = 0
+    for keyword in keywords:
+        kw = keyword.lower()
+        if len(kw) < 3:  # Skip very short words
+            continue
+        # Exact match in path component
+        if kw in path_parts:
+            matches += 2
+        # Partial match in full path
+        elif kw in path_lower:
+            matches += 1
+        # Check if any path component is a substring of the keyword (e.g., "auth" in "authentication")
+        else:
+            for part in path_parts:
+                if len(part) >= 3 and part in kw:
+                    matches += 1.5  # Higher than partial match, lower than exact
+                    break
+    return min(matches / max(len(keywords), 1), 1.0)
+def _extract_keywords(task: str) -> list[str]:
+    """Extract meaningful keywords from a task description."""
+    # Common stop words to filter out
+    stop_words = {
+        "the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
+        "have", "has", "had", "do", "does", "did", "will", "would", "could",
+        "should", "may", "might", "can", "shall", "to", "of", "in", "for",
+        "on", "with", "at", "by", "from", "as", "into", "through", "during",
+        "before", "after", "above", "below", "between", "out", "off", "over",
+        "under", "again", "further", "then", "once", "here", "there", "when",
+        "where", "why", "how", "all", "each", "every", "both", "few", "more",
+        "most", "other", "some", "such", "no", "nor", "not", "only", "own",
+        "same", "so", "than", "too", "very", "just", "but", "and", "or",
+        "if", "it", "its", "this", "that", "these", "those", "i", "me", "my",
+        "we", "our", "you", "your", "he", "she", "they", "them", "what",
+        "which", "who", "whom", "add", "create", "make", "build", "implement",
+        "fix", "update", "change", "modify", "remove", "delete", "get", "set",
+        "use", "new", "file", "files", "code", "function", "method",
+    }
+    # Tokenize and filter
+    words = re.findall(r"[a-zA-Z_][a-zA-Z0-9_]*", task)
+    keywords = [w for w in words if w.lower() not in stop_words and len(w) >= 3]
+    return keywords
+def _score_recency(file_path: str, root_dir: str, max_age_days: float = 30.0) -> float:
+    """Score based on how recently the file was modified (0-1, 1 = most recent)."""
+    try:
+        mtime = os.path.getmtime(os.path.join(root_dir, file_path))
+        import time
+        age_seconds = time.time() - mtime
+        age_days = age_seconds / 86400
+        if age_days <= 0:
+            return 1.0
+        if age_days >= max_age_days:
+            return 0.0
+        return 1.0 - (age_days / max_age_days)
+    except OSError:
+        return 0.0
+def _get_structural_scores(top_files: list[str], root_dir: str) -> dict[str, float]:
+    """Boost files that are dependencies of already-relevant files."""
+    try:
+        from aru.tools.ast_tools import find_dependencies, _resolve_import_to_file, _find_project_root
+    except ImportError:
+        return {}
+    dep_counts: dict[str, int] = {}
+    for file_path in top_files[:5]:  # Only trace top 5 to avoid slowness
+        full_path = os.path.join(root_dir, file_path)
+        if not os.path.isfile(full_path):
+            continue
+        try:
+            with open(full_path, "r", encoding="utf-8", errors="ignore") as f:
+                content = f.read()
+        except OSError:
+            continue
+        # Extract imports and resolve to local files
+        for line in content.split("\n"):
+            stripped = line.strip()
+            if stripped.startswith("import ") or stripped.startswith("from "):
+                resolved = _resolve_import_to_file(stripped, root_dir)
+                if resolved:
+                    normalized = resolved.replace("\\", "/")
+                    dep_counts[normalized] = dep_counts.get(normalized, 0) + 1
+    if not dep_counts:
+        return {}
+    max_count = max(dep_counts.values())
+    return {k: v / max_count for k, v in dep_counts.items()}
+def rank_files(task: str, top_k: int = 15) -> str:
+    """Rank project files by relevance to a given task description.
+    Uses multiple signals to determine which files are most relevant:
+    - Filename/path keyword matching
+    - Structural dependencies (files imported by relevant files)
+    - Modification recency
+    Use this as a first step when starting a new task to identify which files to read.
+    Args:
+        task: Natural language description of the task (e.g. "add authentication to the CLI").
+        top_k: Maximum number of files to return. Defaults to 15.
+    """
+    root_dir = os.getcwd()
+    all_files = _get_project_files(root_dir)
+    if not all_files:
+        return "No files found in the project."
+    keywords = _extract_keywords(task)
+    # Signal 1: Name match scores
+    name_scores = {f: _score_name_match(f, keywords) for f in all_files}
+    # Signal 2: Recency scores
+    recency_scores = {f: _score_recency(f, root_dir) for f in all_files}
+    # Preliminary ranking (without structural) to find top files for dependency tracing
+    preliminary_scores = {}
+    for f in all_files:
+        score = (
+            WEIGHT_NAME * name_scores.get(f, 0.0)
+            + WEIGHT_RECENCY * recency_scores.get(f, 0.0)
+        )
+        preliminary_scores[f] = score
+    # Signal 3: Structural scores (based on top preliminary results)
+    top_preliminary = sorted(preliminary_scores, key=preliminary_scores.get, reverse=True)[:10]
+    structural_scores = _get_structural_scores(top_preliminary, root_dir)
+    # Final combined scores
+    final_scores: dict[str, tuple[float, list[str]]] = {}
+    for f in all_files:
+        reasons = []
+        name = name_scores.get(f, 0.0)
+        structural = structural_scores.get(f, 0.0)
+        recency = recency_scores.get(f, 0.0)
+        score = (
+            WEIGHT_NAME * name
+            + WEIGHT_STRUCTURAL * structural
+            + WEIGHT_RECENCY * recency
+        )
+        # Build reason strings
+        if name > 0.3:
+            reasons.append("name match")
+        if structural > 0:
+            reasons.append("dependency of top files")
+        if recency > 0.7:
+            reasons.append("recently modified")
+        if score > 0:
+            final_scores[f] = (score, reasons)
+    # Sort and take top_k
+    ranked = sorted(final_scores.items(), key=lambda x: x[1][0], reverse=True)[:top_k]
+    if not ranked:
+        return f"No files found with relevance to: {task}"
+    # Normalize scores to 0-1 based on top score
+    max_score = ranked[0][1][0] if ranked else 1.0
+    if max_score == 0:
+        max_score = 1.0
+    # Format output
+    lines = [f"Files ranked by relevance to: \"{task}\"\n"]
+    lines.append("Ranking mode: name + structural + recency\n")
+    for i, (file_path, (score, reasons)) in enumerate(ranked, 1):
+        normalized_score = score / max_score
+        reason_str = " + ".join(reasons) if reasons else "low signal"
+        lines.append(f"  {i:2d}. {file_path} ({normalized_score:.2f}) — {reason_str}")
+    return "\n".join(lines)

aru/tools/tasklist.py ADDED Viewed

@@ -0,0 +1,183 @@
+"""Task list tools for structured step execution.
+Provides create_task_list and update_task tools that the executor must call
+to plan and track subtasks within each plan step. Inspired by Claude Code
+and Antigravity's task management approach.
+"""
+import threading
+from rich.console import Console, Group
+from rich.panel import Panel
+from rich.text import Text
+_console = Console()
+_live = None
+_display = None
+MAX_SUBTASKS = 10
+def set_live(live):
+    global _live
+    _live = live
+def set_display(display):
+    global _display
+    _display = display
+class _TaskStore:
+    """Thread-safe store for the current step's subtask list."""
+    def __init__(self):
+        self._lock = threading.Lock()
+        self._tasks: list[dict] = []  # {"index": int, "description": str, "status": str}
+        self._created = False
+    def create(self, tasks: list[str]) -> list[dict]:
+        with self._lock:
+            self._tasks = [
+                {"index": i + 1, "description": desc, "status": "pending"}
+                for i, desc in enumerate(tasks)
+            ]
+            self._created = True
+            return list(self._tasks)
+    def update(self, index: int, status: str) -> dict | None:
+        with self._lock:
+            for task in self._tasks:
+                if task["index"] == index:
+                    task["status"] = status
+                    return dict(task)
+            return None
+    def get_all(self) -> list[dict]:
+        with self._lock:
+            return list(self._tasks)
+    @property
+    def is_created(self) -> bool:
+        with self._lock:
+            return self._created
+    def reset(self):
+        with self._lock:
+            self._tasks = []
+            self._created = False
+# Global singleton per executor step (reset between steps)
+_store = _TaskStore()
+def reset_task_store():
+    """Reset the task store between executor steps."""
+    _store.reset()
+def get_task_store() -> _TaskStore:
+    """Get the current task store for inspection."""
+    return _store
+def _render_task_list(tasks: list[dict]) -> Panel:
+    """Render the task list as a Rich panel."""
+    lines = []
+    for t in tasks:
+        if t["status"] == "completed":
+            icon = "[bold green]✓[/bold green]"
+            style = "dim"
+        elif t["status"] == "in_progress":
+            icon = "[bold yellow]~[/bold yellow]"
+            style = "bold"
+        elif t["status"] == "failed":
+            icon = "[bold red]✗[/bold red]"
+            style = "red"
+        else:
+            icon = "[dim]○[/dim]"
+            style = "dim"
+        lines.append(Text.from_markup(f"  {icon} {t['index']}. {t['description']}", style=style))
+    return Panel(
+        Group(*lines),
+        title="[bold cyan]Subtasks[/bold cyan]",
+        border_style="cyan",
+        expand=True,
+    )
+def _show(panel: Panel):
+    """Display panel using the active display or console."""
+    if _display and hasattr(_display, "show_permission"):
+        _display.show_permission(panel)
+    elif _live:
+        _live.console.print(panel)
+    else:
+        _console.print(panel)
+def create_task_list(tasks: list[str]) -> str:
+    """Create a subtask list for the current step. MUST be called before any other tool.
+    Define 1-10 concrete subtasks that you will execute in order.
+    Each subtask should be a single action (Read, Write, Edit, Run).
+    Args:
+        tasks: List of subtask descriptions. Min 1, max 10.
+               Example: ["Read backend/models.py", "Write backend/auth.py", "Edit backend/main.py — add import", "Run pytest"]
+    """
+    if _store.is_created:
+        return "Error: Task list already created for this step. Use update_task to update subtask status."
+    if len(tasks) < 1:
+        return "Error: Minimum 1 subtask required."
+    if len(tasks) > MAX_SUBTASKS:
+        return f"Error: Maximum {MAX_SUBTASKS} subtasks allowed. Got {len(tasks)}. Simplify your plan."
+    created = _store.create(tasks)
+    panel = _render_task_list(created)
+    _show(panel)
+    task_lines = "\n".join(f"  {t['index']}. {t['description']}" for t in created)
+    return f"Task list created ({len(created)} subtasks):\n{task_lines}\n\nNow execute subtask 1."
+def update_task(index: int, status: str) -> str:
+    """Update the status of a subtask. Call this as you complete each subtask.
+    Args:
+        index: Subtask number (1-based).
+        status: New status — one of: "in_progress", "completed", "failed".
+    """
+    if not _store.is_created:
+        return "Error: No task list exists. Call create_task_list first."
+    if status not in ("in_progress", "completed", "failed"):
+        return f"Error: Invalid status '{status}'. Use: in_progress, completed, failed."
+    updated = _store.update(index, status)
+    if not updated:
+        return f"Error: Subtask {index} not found."
+    # Show updated task list
+    all_tasks = _store.get_all()
+    panel = _render_task_list(all_tasks)
+    _show(panel)
+    # Check if all done
+    completed_count = sum(1 for t in all_tasks if t["status"] == "completed")
+    failed_count = sum(1 for t in all_tasks if t["status"] == "failed")
+    total = len(all_tasks)
+    if completed_count + failed_count == total:
+        return f"All subtasks finished ({completed_count} completed, {failed_count} failed). Step done. Output a brief summary of what was created/changed."
+    # Find next pending subtask
+    next_task = next((t for t in all_tasks if t["status"] == "pending"), None)
+    if next_task:
+        return f"Subtask {index} → {status}. Next: subtask {next_task['index']} — {next_task['description']}"
+    return f"Subtask {index} → {status}."