PyPI - makefile-agent - Versions diffs - 0.3.3__tar.gz → 0.3.5__tar.gz - Mend

makefile-agent 0.3.3tar.gz → 0.3.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

{makefile_agent-0.3.3 → makefile_agent-0.3.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: makefile-agent
-Version: 0.3.3
+Version: 0.3.5
 Summary: AI‑assistant‑as‑Makefile: a tool to create and manage AI agents using a Makefile.
 Author: Dmitriy Sorochenkov
 License-Expression: MIT

{makefile_agent-0.3.3 → makefile_agent-0.3.5}/make_agent/agent.py RENAMED Viewed

@@ -1,10 +1,12 @@
 from __future__ import annotations
+import asyncio
 import json
 import logging
 import time
+from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, NamedTuple
+from typing import Any, AsyncGenerator, NamedTuple
 from uuid import uuid4
 import any_llm
@@ -34,6 +36,40 @@ _MAX_RUN_SECONDS_PER_REQUEST = 900
 logger = logging.getLogger(__name__)
+@dataclass
+class TokenEvent:
+    """A partial text token streamed from the LLM."""
+    text: str
+@dataclass
+class ToolStartEvent:
+    """Emitted just before a tool call is executed."""
+    name: str
+    args: dict
+@dataclass
+class ToolDoneEvent:
+    """Emitted after a tool call completes."""
+    name: str
+    output: str
+    is_error: bool
+@dataclass
+class DoneEvent:
+    """Emitted once the agent has a final text response (no more tool calls)."""
+    content: str
+AgentEvent = TokenEvent | ToolStartEvent | ToolDoneEvent | DoneEvent
 class AgentConfig(NamedTuple):
     makefile_path: Path
     model: str
@@ -66,7 +102,7 @@ def _parse_retry_after(e: any_llm.RateLimitError) -> float | None:
     return None
-def _completion_with_retry(
+async def _acompletion_with_retry(
     model: str,
     messages: list[dict],
     tool_kwargs: dict[str, Any],
@@ -74,25 +110,36 @@ def _completion_with_retry(
     max_tokens: int = _DEFAULT_MAX_TOKENS,
     reasoning_effort: str = _DEFAULT_REASONING_EFFORT,
 ) -> Any:
-    """Call ``any_llm.completion``, retrying on rate limit up to *max_retries* times.
+    """Call ``any_llm.acompletion`` with streaming, retrying on rate limit.
     On each ``RateLimitError`` the wait time is read from the ``Retry-After``
     response header when present, otherwise exponential backoff is used
     (``2^attempt`` seconds, capped at 60 s).  A message is printed before
     each retry so the user can see what is happening.
+    Returns an ``AsyncIterator[ChatCompletionChunk]``.
     """
     for attempt in range(max_retries + 1):
         try:
-            return any_llm.completion(model=model, messages=messages, max_tokens=max_tokens, reasoning_effort=reasoning_effort, **tool_kwargs)
+            return await any_llm.acompletion(
+                model=model,
+                messages=messages,
+                max_tokens=max_tokens,
+                reasoning_effort=reasoning_effort,
+                stream=True,
+                stream_options={"include_usage": True},
+                **tool_kwargs,
+            )
         except any_llm.RateLimitError as e:
             if attempt == max_retries:
                 raise
             wait = _parse_retry_after(e) or min(2**attempt, 60)
             print(
-                f"Rate limited, retrying in {wait:.0f}s" f" (attempt {attempt + 1}/{max_retries})...",
+                f"Rate limited, retrying in {wait:.0f}s"
+                f" (attempt {attempt + 1}/{max_retries})...",
                 flush=True,
             )
-            time.sleep(wait)
+            await asyncio.sleep(wait)
 def _parse_item(doc: Any) -> ChatCompletionMessageToolCall | None:
@@ -160,11 +207,12 @@ def _parse_disabled_builtins(value: str | None) -> frozenset[str]:
 class Agent:
     """LLM agent that maintains conversation history and dispatches tool calls.
-    Call the instance with a user message to get the assistant's reply::
+    Await ``arun()`` with a user message to get the assistant's reply, or use
+    ``astream()`` to receive events as they are produced::
-        config = AgentConfig(makefile_path=Path("Makefile"), model="anthropic/claude-haiku-4-5-20251001", session_id="example")
+        config = AgentConfig(makefile_path=Path("Makefile"), model="anthropic/claude-haiku-4-5", session_id="example")
         agent = Agent(config, memory=None)
-        reply = agent("List the files in the current directory.")
+        reply = await agent.arun("List the files in the current directory.")
     """
     def __init__(self, config: AgentConfig, memory: Memory | None) -> None:
@@ -212,7 +260,7 @@ class Agent:
     def model(self) -> str:
         return self._model
-    def _run_agent(self, mk_path: Path, prompt: str) -> str:
+    async def _arun_agent(self, mk_path: Path, prompt: str) -> str:
         """Instantiate a specialist agent in-process and return its response."""
         sub_disabled = self._disabled_builtin_tools | frozenset({"run_agent"})
         logger.info("Instantiating sub-agent with model %s to run %s", self._agent_model, mk_path)
@@ -228,23 +276,23 @@ class Agent:
             reasoning_effort=self._reasoning_effort,
             session_id=self._session_id,
         )
-        return Agent(sub_config, self._memory)(prompt)
+        return await Agent(sub_config, self._memory).arun(prompt)
     def __repr__(self) -> str:
         return f"Agent(model={self._model!r}, tools={self.tool_names!r})"
-    def __call__(self, user_input: str) -> str:
-        """Send *user_input* to the LLM and return the assistant's reply.
+    async def astream(self, user_input: str) -> AsyncGenerator[AgentEvent, None]:
+        """Stream events produced while processing *user_input*.
-        Dispatches tool calls in a loop until the model returns a plain
-        text response.
+        Yields :class:`TokenEvent` for each partial LLM token,
+        :class:`ToolStartEvent` / :class:`ToolDoneEvent` around each tool call,
+        and a final :class:`DoneEvent` when the agent is done.
         """
         self._messages.append({"role": "user", "content": user_input})
         logger.debug("[user]\n%s", user_input)
         if self._memory is not None:
             self._memory.store("user", user_input)
-        # Track consecutive identical failing tool calls to detect loops.
         last_fail_key: str | None = None
         consecutive_failures = 0
         model_turns = 0
@@ -261,7 +309,7 @@ class Agent:
                     f"aborted: exceeded {_MAX_RUN_SECONDS_PER_REQUEST}s runtime in a single request"
                 )
-            response = _completion_with_retry(
+            stream = await _acompletion_with_retry(
                 self._model,
                 self._messages,
                 self._tool_kwargs,
@@ -270,23 +318,79 @@ class Agent:
                 self._reasoning_effort,
             )
             model_turns += 1
-            msg = response.choices[0].message
-            logger.debug("[model_response]\n%s", msg)
-            if self._memory is not None and response.usage is not None:
+            # Accumulate streaming response.
+            content_parts: list[str] = []
+            tool_call_acc: dict[int, dict] = {}  # index → {id, name, arguments}
+            usage = None
+            async for chunk in stream:
+                if not chunk.choices:
+                    if chunk.usage is not None:
+                        usage = chunk.usage
+                    continue
+                delta = chunk.choices[0].delta
+                if delta.content:
+                    content_parts.append(delta.content)
+                    yield TokenEvent(delta.content)
+                if delta.tool_calls:
+                    for tc_delta in delta.tool_calls:
+                        idx = tc_delta.index
+                        if idx not in tool_call_acc:
+                            tool_call_acc[idx] = {"id": tc_delta.id or "", "name": "", "arguments": ""}
+                        if tc_delta.function:
+                            tool_call_acc[idx]["name"] += tc_delta.function.name or ""
+                            tool_call_acc[idx]["arguments"] += tc_delta.function.arguments or ""
+                if chunk.usage is not None:
+                    usage = chunk.usage
+            content = "".join(content_parts)
+            logger.debug("[model_response] content=%r tool_calls=%d", content[:120], len(tool_call_acc))
+            if self._memory is not None and usage is not None:
                 self._memory.record_token_usage(
                     self._session_id or "",
                     self._makefile_path.name,
                     self._model,
-                    response.usage.prompt_tokens,
-                    response.usage.completion_tokens,
+                    usage.prompt_tokens,
+                    usage.completion_tokens,
                 )
-            tool_calls = msg.tool_calls or _parse_content_tool_calls(msg.content or "")
-            if tool_calls:
-                self._messages.append(msg.model_dump(exclude_none=True))
+            # Support models that embed tool calls as a JSON array in content.
+            content_tool_calls = None
+            if not tool_call_acc and content:
+                content_tool_calls = _parse_content_tool_calls(content)
+            if tool_call_acc or content_tool_calls:
+                if tool_call_acc:
+                    sorted_tcs = [tool_call_acc[i] for i in sorted(tool_call_acc)]
+                    assistant_msg: dict = {
+                        "role": "assistant",
+                        "content": content or None,
+                        "tool_calls": [
+                            {
+                                "id": tc["id"],
+                                "type": "function",
+                                "function": {"name": tc["name"], "arguments": tc["arguments"]},
+                            }
+                            for tc in sorted_tcs
+                        ],
+                    }
+                    tool_calls_to_run = [
+                        ChatCompletionMessageFunctionToolCall(
+                            id=tc["id"],
+                            type="function",
+                            function=Function(name=tc["name"], arguments=tc["arguments"]),
+                        )
+                        for tc in sorted_tcs
+                    ]
+                else:
+                    assistant_msg = {"role": "assistant", "content": content}
+                    tool_calls_to_run = content_tool_calls  # type: ignore[assignment]
+                self._messages.append(assistant_msg)
-                for tc in tool_calls:
+                for tc in tool_calls_to_run:
                     if tool_calls_executed >= _MAX_TOOL_CALLS_PER_REQUEST:
                         raise RuntimeError(
                             f"aborted: exceeded {_MAX_TOOL_CALLS_PER_REQUEST} tool calls in a single request"
@@ -302,6 +406,8 @@ class Agent:
                         continue
                     logger.debug("[tool_call] %s args=%s", target, arguments)
+                    yield ToolStartEvent(name=target, args=arguments)
                     if target not in self._tool_name_set:
                         result = get_tool_result("", f"unknown tool: {target}", None)
                     else:
@@ -309,12 +415,12 @@ class Agent:
                             if target in self._builtins:
                                 raw = self._builtins[target](**arguments)
                                 if isinstance(raw, _RunAgent):
-                                    agent_result = self._run_agent(raw.mk_path, raw.prompt)
+                                    agent_result = await self._arun_agent(raw.mk_path, raw.prompt)
                                     result = get_tool_result(agent_result, "", 0, self._max_tool_output)
                                 else:
                                     result = get_tool_result(str(raw), "", 0, self._max_tool_output)
                             else:
-                                result = run_tool(
+                                result = await run_tool(
                                     target,
                                     arguments,
                                     self._makefile_path,
@@ -329,16 +435,10 @@ class Agent:
                             result = get_tool_result("", f"unexpected error: {e}", None)
                     logger.info("[tool_result] %s -> %s", target, result.output)
+                    yield ToolDoneEvent(name=target, output=result.output, is_error=result.is_error)
-                    self._messages.append(
-                        {
-                            "role": "tool",
-                            "tool_call_id": tc.id,
-                            "content": result.output,
-                        }
-                    )
+                    self._messages.append({"role": "tool", "tool_call_id": tc.id, "content": result.output})
-                    # Detect repeated identical failing tool calls.
                     call_key = f"{target}:{tc.function.arguments}"
                     if result.is_error and call_key == last_fail_key:
                         consecutive_failures += 1
@@ -361,12 +461,23 @@ class Agent:
                     last_fail_key = None
                     consecutive_failures = 0
             else:
-                content = msg.content or ""
                 self._messages.append({"role": "assistant", "content": content})
                 logger.debug("[assistant]\n%s", content)
                 if self._memory is not None:
                     self._memory.store("agent", content)
-                return content
+                yield DoneEvent(content=content)
+                return
+    async def arun(self, user_input: str) -> str:
+        """Send *user_input* to the LLM and return the assistant's final reply.
+        Convenience wrapper around :meth:`astream` that discards intermediate
+        events and returns the final text.
+        """
+        async for event in self.astream(user_input):
+            if isinstance(event, DoneEvent):
+                return event.content
+        return ""
 class SessionNotFoundError(Exception):
@@ -400,9 +511,13 @@ class AgentManager:
         except KeyError:
             raise SessionNotFoundError(f"Session with id {session_id} not found.")
-    def notify_agent(self, session_id: str, message: str) -> str:
+    async def arun_agent(self, session_id: str, message: str) -> str:
+        agent = self.get_agent(session_id)
+        return await agent.arun(message)
+    def astream_agent(self, session_id: str, message: str) -> AsyncGenerator[AgentEvent, None]:
         agent = self.get_agent(session_id)
-        return agent(message)
+        return agent.astream(message)
     def export_conversation(self, session_id: str) -> Path | None:
         agent = self.get_agent(session_id)

makefile_agent-0.3.5/make_agent/agent_shell.py ADDED Viewed

@@ -0,0 +1,205 @@
+import asyncio
+import readline
+import signal
+from pathlib import Path
+from typing import Any, Optional
+from make_agent.agent import (
+    _DEFAULT_MAX_RETRIES,
+    _DEFAULT_MAX_TOKENS,
+    _DEFAULT_MAX_TOOL_OUTPUT,
+    _DEFAULT_REASONING_EFFORT,
+    _DEFAULT_TOOL_TIMEOUT,
+    AgentConfig,
+    AgentManager,
+    DoneEvent,
+    TokenEvent,
+    ToolDoneEvent,
+    ToolStartEvent,
+)
+class MakeAgentShell:
+    """Async interactive REPL that delegates all LLM interaction to an :class:`Agent`."""
+    prompt = "make-agent> "
+    def __init__(self, agent_manager: AgentManager, session_id: str) -> None:
+        self._agent_manager = agent_manager
+        self._session_id = session_id
+        self._commands: dict[str, Any] = {
+            "exit": self._cmd_exit,
+            "quit": self._cmd_exit,
+            "export": self._cmd_export,
+            "stats": self._cmd_stats,
+            "help": self._cmd_help,
+        }
+    # ── readline completion ────────────────────────────────────────────────
+    def _setup_readline(self) -> None:
+        """Configure readline so /cmd completions work."""
+        try:
+            readline.set_completer_delims(readline.get_completer_delims().replace("/", ""))
+            readline.set_completer(self._completer)
+            readline.parse_and_bind("tab: complete")
+        except Exception:
+            pass
+    def _completer(self, text: str, state: int) -> str | None:
+        if not text.startswith("/"):
+            return None
+        cmd_text = text[1:]
+        matches = ["/" + name for name in self._commands if name.startswith(cmd_text)]
+        return matches[state] if state < len(matches) else None
+    # ── command handlers ───────────────────────────────────────────────────
+    def _cmd_exit(self) -> bool:
+        return True
+    def _cmd_export(self) -> bool:
+        path = self._agent_manager.export_conversation(self._session_id)
+        if path:
+            print(f"Conversation exported to {path}")
+        return False
+    def _cmd_stats(self) -> bool:
+        stats = self._agent_manager.get_token_stats(self._session_id)
+        if not stats:
+            print("No token usage stats available (memory not enabled or no LLM calls yet).")
+            return False
+        print(f"Token usage for session {self._session_id}:")
+        print(f"  Model(s):      {', '.join(stats['models'])}")
+        print(f"  Input tokens:  {stats['input_tokens']}")
+        print(f"  Output tokens: {stats['output_tokens']}")
+        print(f"  Total tokens:  {stats['total_tokens']}")
+        # Per-agent breakdown
+        agents = stats.get("agents", {})
+        if agents:
+            print("\nPer-agent breakdown:")
+            for agent_name, agent_stats in sorted(agents.items()):
+                print(f"    {agent_name}:")
+                print(f"      Input:  {agent_stats['input_tokens']}")
+                print(f"      Output: {agent_stats['output_tokens']}")
+                print(f"      Total:  {agent_stats['total_tokens']}")
+        return False
+    def _cmd_help(self) -> bool:
+        print("Commands: " + "  ".join(f"/{name}" for name in self._commands))
+        print("Any other input is sent to the agent. Press Ctrl-C to cancel a running turn.")
+        return False
+    def _dispatch_command(self, line: str) -> bool:
+        """Dispatch a /command. Returns True if the shell should exit."""
+        name, *_ = line.strip().split(None, 1)
+        handler = self._commands.get(name)
+        if handler is None:
+            print(f"Unknown command: /{name}  (type /help for a list)")
+            return False
+        return handler()
+    # ── agent turn ─────────────────────────────────────────────────────────
+    async def _stream_turn(self, message: str) -> None:
+        """Stream one agent turn, printing events as they arrive."""
+        async for event in self._agent_manager.astream_agent(self._session_id, message):
+            if isinstance(event, TokenEvent):
+                print(event.text, end="", flush=True)
+            elif isinstance(event, ToolStartEvent):
+                print(f"\nRunning: {event.name}...", flush=True)
+            elif isinstance(event, ToolDoneEvent):
+                pass  # tool output visible via agent logs; keep terminal clean
+            elif isinstance(event, DoneEvent):
+                print()  # trailing newline after streamed content
+    async def _run_turn(self, message: str) -> None:
+        """Run one agent turn with per-turn Ctrl-C cancellation."""
+        task = asyncio.create_task(self._stream_turn(message))
+        loop = asyncio.get_running_loop()
+        loop.add_signal_handler(signal.SIGINT, task.cancel)
+        try:
+            await task
+        except asyncio.CancelledError:
+            print("\nCancelled.")
+        except Exception as e:
+            print(f"Error: {e}")
+        finally:
+            loop.remove_signal_handler(signal.SIGINT)
+    # ── main loop ──────────────────────────────────────────────────────────
+    async def run(self) -> None:
+        """Start the interactive REPL loop."""
+        self._setup_readline()
+        loop = asyncio.get_running_loop()
+        print(
+            "Type your message. Prefix shell commands with /  "
+            "(e.g. /exit, /help). Press Ctrl-D or Ctrl-C twice to exit.\n"
+        )
+        while True:
+            try:
+                line = await loop.run_in_executor(None, input, self.prompt)
+            except EOFError:
+                print()
+                break
+            line = line.strip()
+            if not line:
+                continue
+            if line.startswith("/"):
+                should_exit = self._dispatch_command(line[1:])
+                if should_exit:
+                    break
+                continue
+            await self._run_turn(line)
+async def run(
+    makefile_path: Path,
+    model: str,
+    agent_model: Optional[str] = None,
+    prompt: Optional[str] = None,
+    max_retries: int = _DEFAULT_MAX_RETRIES,
+    tool_timeout: int = _DEFAULT_TOOL_TIMEOUT,
+    max_tool_output: int = _DEFAULT_MAX_TOOL_OUTPUT,
+    max_tokens: int = _DEFAULT_MAX_TOKENS,
+    agents_dir: str | None = None,
+    with_memory: bool = False,
+    disabled_builtin_tools: frozenset[str] = frozenset(),
+    reasoning_effort: str = _DEFAULT_REASONING_EFFORT,
+) -> None:
+    """Start the interactive shell (or send a single prompt and return).
+    Reads the system prompt and tool definitions from *makefile_path*, then
+    enters a :class:`MakeAgentShell` loop.  Press Ctrl-D or type ``/exit``
+    to leave.  When *prompt* is given the shell is bypassed: the prompt is
+    sent to the agent and the reply is printed.
+    """
+    agent_config = AgentConfig(
+        makefile_path=makefile_path,
+        model=model,
+        agent_model=agent_model,
+        max_retries=max_retries,
+        tool_timeout=tool_timeout,
+        max_tool_output=max_tool_output,
+        max_tokens=max_tokens,
+        agents_dir=agents_dir,
+        disabled_builtin_tools=disabled_builtin_tools,
+        reasoning_effort=reasoning_effort,
+    )
+    agent_manager = AgentManager()
+    session_id = agent_manager.create_session(agent_config, with_memory=with_memory)
+    print(f"Loaded {makefile_path}")
+    if prompt:
+        print("Sending initial prompt...\n")
+        print(await agent_manager.arun_agent(session_id, prompt))
+        return
+    shell = MakeAgentShell(agent_manager, session_id)
+    try:
+        await shell.run()
+    except KeyboardInterrupt:
+        print()

{makefile_agent-0.3.3 → makefile_agent-0.3.5}/make_agent/main.py RENAMED Viewed

@@ -1,6 +1,7 @@
 """make-agent: an AI agent driven by a Makefile."""
 import argparse
+import asyncio
 import logging
 import sys
 from pathlib import Path
@@ -119,19 +120,21 @@ def _cmd_run(args: argparse.Namespace) -> None:
         except OSError as e:
             sys.exit(f"make-agent run: {e}")
-    run(
-        makefile_path=Path(args.file),
-        model=args.model,
-        agent_model=args.agent_model if args.agent_model is not None else args.model,
-        prompt=prompt,
-        max_retries=args.max_retries,
-        tool_timeout=args.tool_timeout,
-        max_tool_output=args.max_tool_output,
-        max_tokens=args.max_tokens,
-        agents_dir=args.agents_dir,
-        with_memory=args.with_memory,
-        disabled_builtin_tools=_parse_disabled_tools(args.disable_builtin_tools),
-        reasoning_effort=args.reasoning_effort,
+    asyncio.run(
+        run(
+            makefile_path=Path(args.file),
+            model=args.model,
+            agent_model=args.agent_model if args.agent_model is not None else args.model,
+            prompt=prompt,
+            max_retries=args.max_retries,
+            tool_timeout=args.tool_timeout,
+            max_tool_output=args.max_tool_output,
+            max_tokens=args.max_tokens,
+            agents_dir=args.agents_dir,
+            with_memory=args.with_memory,
+            disabled_builtin_tools=_parse_disabled_tools(args.disable_builtin_tools),
+            reasoning_effort=args.reasoning_effort,
+        )
     )

{makefile_agent-0.3.3 → makefile_agent-0.3.5}/make_agent/memory.py RENAMED Viewed

@@ -196,7 +196,8 @@ class Memory:
         """Return aggregated token usage totals for *session_id*.
         Returns a dict with keys ``input_tokens``, ``output_tokens``,
-        ``total_tokens``, and ``models`` (list of distinct model names used),
+        ``total_tokens``, ``models`` (list of distinct model names used),
+        and ``agents`` (dict mapping agent name to per-agent stats),
         or an empty dict when no rows exist for that session.
         """
         conn = self._get_conn()
@@ -214,11 +215,31 @@ class Memory:
                 (session_id,),
             ).fetchall()
         ]
+        # Per-agent breakdown
+        agent_rows = conn.execute(
+            "SELECT agent, SUM(input_tokens) AS input_tokens, SUM(output_tokens) AS output_tokens"
+            " FROM token_usage WHERE session_id = ? GROUP BY agent ORDER BY agent",
+            (session_id,),
+        ).fetchall()
+        agents = {}
+        for arow in agent_rows:
+            agent_name = arow["agent"]
+            input_tok = arow["input_tokens"] or 0
+            output_tok = arow["output_tokens"] or 0
+            agents[agent_name] = {
+                "input_tokens": input_tok,
+                "output_tokens": output_tok,
+                "total_tokens": input_tok + output_tok,
+            }
         return {
             "input_tokens": row["input_tokens"],
             "output_tokens": row["output_tokens"],
             "total_tokens": row["input_tokens"] + row["output_tokens"],
             "models": models,
+            "agents": agents,
         }
     def close(self) -> None:

makefile-agent 0.3.3__tar.gz → 0.3.5__tar.gz

makefile-agent 0.3.3tar.gz → 0.3.5tar.gz