bare-agent 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bare_agent/__init__.py +47 -0
- bare_agent/budget.py +60 -0
- bare_agent/config.py +73 -0
- bare_agent/events.py +21 -0
- bare_agent/llm.py +203 -0
- bare_agent/logging.py +49 -0
- bare_agent/loop.py +200 -0
- bare_agent/registry.py +131 -0
- bare_agent-0.0.1.dist-info/METADATA +256 -0
- bare_agent-0.0.1.dist-info/RECORD +12 -0
- bare_agent-0.0.1.dist-info/WHEEL +4 -0
- bare_agent-0.0.1.dist-info/licenses/LICENSE +21 -0
bare_agent/__init__.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""bare-agent: a framework-free agent runtime. Own the loop, not the framework."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from bare_agent.budget import Budget, BudgetState, BudgetStop
|
|
6
|
+
from bare_agent.config import Settings, get_settings
|
|
7
|
+
from bare_agent.events import AgentEvent, EventSink, emit
|
|
8
|
+
from bare_agent.llm import LLMClient, LLMResponse, TokenSink, ToolCallRequest
|
|
9
|
+
from bare_agent.logging import configure_logging, get_logger
|
|
10
|
+
from bare_agent.loop import AgentLoop, AgentResult, CompletionClient
|
|
11
|
+
from bare_agent.registry import (
|
|
12
|
+
Approver,
|
|
13
|
+
Permission,
|
|
14
|
+
Tool,
|
|
15
|
+
ToolCall,
|
|
16
|
+
ToolRegistry,
|
|
17
|
+
ToolResult,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
__version__ = "0.0.1"
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"AgentEvent",
|
|
24
|
+
"AgentLoop",
|
|
25
|
+
"AgentResult",
|
|
26
|
+
"Approver",
|
|
27
|
+
"Budget",
|
|
28
|
+
"BudgetState",
|
|
29
|
+
"BudgetStop",
|
|
30
|
+
"CompletionClient",
|
|
31
|
+
"EventSink",
|
|
32
|
+
"LLMClient",
|
|
33
|
+
"LLMResponse",
|
|
34
|
+
"Permission",
|
|
35
|
+
"Settings",
|
|
36
|
+
"TokenSink",
|
|
37
|
+
"Tool",
|
|
38
|
+
"ToolCall",
|
|
39
|
+
"ToolCallRequest",
|
|
40
|
+
"ToolRegistry",
|
|
41
|
+
"ToolResult",
|
|
42
|
+
"__version__",
|
|
43
|
+
"configure_logging",
|
|
44
|
+
"emit",
|
|
45
|
+
"get_logger",
|
|
46
|
+
"get_settings",
|
|
47
|
+
]
|
bare_agent/budget.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Three-axis budget and hard cost cap for a single agent run."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from enum import StrEnum
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class BudgetStop(StrEnum):
|
|
11
|
+
TURNS = "max_turns"
|
|
12
|
+
TOKENS = "max_tokens"
|
|
13
|
+
WALLCLOCK = "max_wallclock_s"
|
|
14
|
+
COST = "max_cost_usd"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass(frozen=True)
|
|
18
|
+
class Budget:
|
|
19
|
+
max_turns: int
|
|
20
|
+
max_tokens: int
|
|
21
|
+
max_wallclock_s: float
|
|
22
|
+
max_cost_usd: float
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def from_settings(cls, settings: object) -> Budget:
|
|
26
|
+
return cls(
|
|
27
|
+
max_turns=getattr(settings, "max_turns"), # noqa: B009
|
|
28
|
+
max_tokens=getattr(settings, "max_tokens"), # noqa: B009
|
|
29
|
+
max_wallclock_s=getattr(settings, "max_wallclock_s"), # noqa: B009
|
|
30
|
+
max_cost_usd=getattr(settings, "max_cost_usd"), # noqa: B009
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class BudgetState:
|
|
36
|
+
budget: Budget
|
|
37
|
+
turns: int = 0
|
|
38
|
+
tokens: int = 0
|
|
39
|
+
cost_usd: float = 0.0
|
|
40
|
+
_start: float = field(default_factory=time.monotonic)
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def elapsed_s(self) -> float:
|
|
44
|
+
return time.monotonic() - self._start
|
|
45
|
+
|
|
46
|
+
def record_turn(self, *, tokens: int, cost_usd: float) -> None:
|
|
47
|
+
self.turns += 1
|
|
48
|
+
self.tokens += tokens
|
|
49
|
+
self.cost_usd += cost_usd
|
|
50
|
+
|
|
51
|
+
def exceeded(self) -> BudgetStop | None:
|
|
52
|
+
if self.turns >= self.budget.max_turns:
|
|
53
|
+
return BudgetStop.TURNS
|
|
54
|
+
if self.tokens >= self.budget.max_tokens:
|
|
55
|
+
return BudgetStop.TOKENS
|
|
56
|
+
if self.elapsed_s >= self.budget.max_wallclock_s:
|
|
57
|
+
return BudgetStop.WALLCLOCK
|
|
58
|
+
if self.cost_usd >= self.budget.max_cost_usd:
|
|
59
|
+
return BudgetStop.COST
|
|
60
|
+
return None
|
bare_agent/config.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Typed runtime configuration via Pydantic Settings."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from functools import lru_cache
|
|
6
|
+
|
|
7
|
+
from pydantic import Field
|
|
8
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Settings(BaseSettings):
|
|
12
|
+
model_config = SettingsConfigDict(
|
|
13
|
+
env_prefix="BARE_AGENT_",
|
|
14
|
+
env_file=".env",
|
|
15
|
+
env_file_encoding="utf-8",
|
|
16
|
+
extra="ignore",
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
model: str = Field(
|
|
20
|
+
default="ollama_chat/qwen3",
|
|
21
|
+
description=(
|
|
22
|
+
"LiteLLM model id. Local Ollama by default ($0, no key); any frontier id "
|
|
23
|
+
"(e.g. anthropic/claude-haiku-4-5) with its provider key exported."
|
|
24
|
+
),
|
|
25
|
+
)
|
|
26
|
+
ollama_base_url: str = Field(
|
|
27
|
+
default="http://localhost:11434",
|
|
28
|
+
description="Ollama server URL, passed as api_base for ollama_chat/ models.",
|
|
29
|
+
)
|
|
30
|
+
fallback_models: list[str] = Field(
|
|
31
|
+
default_factory=list,
|
|
32
|
+
description='Ordered fallback model ids, e.g. ["anthropic/claude-haiku-4-5"].',
|
|
33
|
+
)
|
|
34
|
+
num_retries: int = Field(
|
|
35
|
+
default=2, ge=0, description="LiteLLM transient-failure retries before falling back."
|
|
36
|
+
)
|
|
37
|
+
temperature: float | None = Field(
|
|
38
|
+
default=None, description="Sampling temperature; None = provider default."
|
|
39
|
+
)
|
|
40
|
+
reasoning_effort: str | None = Field(
|
|
41
|
+
default=None,
|
|
42
|
+
description=(
|
|
43
|
+
"Passed to LiteLLM; on Ollama Qwen3.x 'disable' maps to think:false. "
|
|
44
|
+
"None = provider default."
|
|
45
|
+
),
|
|
46
|
+
)
|
|
47
|
+
request_timeout_s: float = Field(default=120.0, gt=0, description="Per-LLM-call timeout.")
|
|
48
|
+
|
|
49
|
+
max_turns: int = Field(default=8, gt=0, description="Maximum LLM turns per run.")
|
|
50
|
+
max_tokens: int = Field(default=120_000, gt=0, description="Maximum cumulative tokens per run.")
|
|
51
|
+
max_wallclock_s: float = Field(default=180.0, gt=0, description="Maximum wall-clock per run.")
|
|
52
|
+
max_cost_usd: float = Field(default=0.50, gt=0, description="Hard cost cap per run in USD.")
|
|
53
|
+
|
|
54
|
+
use_queue: bool = Field(
|
|
55
|
+
default=False,
|
|
56
|
+
description="Run agents via a Redis job queue + worker pool (KEDA-scalable) instead of inline.",
|
|
57
|
+
)
|
|
58
|
+
redis_url: str = Field(
|
|
59
|
+
default="redis://localhost:6379/0",
|
|
60
|
+
description="Redis DSN for the run queue + event pub/sub.",
|
|
61
|
+
)
|
|
62
|
+
runs_queue_key: str = Field(
|
|
63
|
+
default="bare-agent:runs:pending",
|
|
64
|
+
description="Redis list the API LPUSHes runs to and KEDA scales the worker on.",
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
log_level: str = Field(default="INFO", description="structlog/stdlib level.")
|
|
68
|
+
log_json: bool = Field(default=False, description="Emit JSON logs.")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@lru_cache(maxsize=1)
|
|
72
|
+
def get_settings() -> Settings:
|
|
73
|
+
return Settings()
|
bare_agent/events.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Optional event stream for surfacing agent progress to a UI."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Awaitable, Callable
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True)
|
|
11
|
+
class AgentEvent:
|
|
12
|
+
kind: str
|
|
13
|
+
data: dict[str, Any] = field(default_factory=dict)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
EventSink = Callable[[AgentEvent], Awaitable[None]]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def emit(sink: EventSink | None, kind: str, /, **data: Any) -> None:
|
|
20
|
+
if sink is not None:
|
|
21
|
+
await sink(AgentEvent(kind, data))
|
bare_agent/llm.py
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
"""LiteLLM gateway wrapper returning normalized, cost-attributed responses."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import contextlib
|
|
6
|
+
from collections.abc import Awaitable, Callable
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Any, TypeVar
|
|
9
|
+
|
|
10
|
+
import litellm
|
|
11
|
+
from pydantic import BaseModel
|
|
12
|
+
|
|
13
|
+
from bare_agent.config import Settings
|
|
14
|
+
from bare_agent.logging import get_logger
|
|
15
|
+
|
|
16
|
+
log = get_logger(__name__)
|
|
17
|
+
|
|
18
|
+
_Structured = TypeVar("_Structured", bound=BaseModel)
|
|
19
|
+
|
|
20
|
+
TokenSink = Callable[[str], Awaitable[None]]
|
|
21
|
+
|
|
22
|
+
_LOCAL_PREFIXES: tuple[str, ...] = ("ollama/", "ollama_chat/")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass(frozen=True)
|
|
26
|
+
class ToolCallRequest:
|
|
27
|
+
id: str
|
|
28
|
+
name: str
|
|
29
|
+
arguments: str
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass(frozen=True)
|
|
33
|
+
class LLMResponse:
|
|
34
|
+
content: str | None
|
|
35
|
+
tool_calls: list[ToolCallRequest]
|
|
36
|
+
prompt_tokens: int
|
|
37
|
+
completion_tokens: int
|
|
38
|
+
cost_usd: float
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class LLMClient:
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
*,
|
|
45
|
+
model: str,
|
|
46
|
+
timeout_s: float,
|
|
47
|
+
temperature: float | None = None,
|
|
48
|
+
num_retries: int = 2,
|
|
49
|
+
fallbacks: list[str] | None = None,
|
|
50
|
+
reasoning_effort: str | None = None,
|
|
51
|
+
api_base: str | None = None,
|
|
52
|
+
) -> None:
|
|
53
|
+
self._model: str = model
|
|
54
|
+
self._timeout: float = timeout_s
|
|
55
|
+
self._temperature: float | None = temperature
|
|
56
|
+
self._reasoning_effort: str | None = reasoning_effort
|
|
57
|
+
self._num_retries: int = num_retries
|
|
58
|
+
self._fallbacks: list[str] | None = fallbacks or None
|
|
59
|
+
self._api_base: str | None = api_base
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def from_settings(cls, settings: Settings) -> LLMClient:
|
|
63
|
+
is_local: bool = settings.model.startswith(_LOCAL_PREFIXES)
|
|
64
|
+
return cls(
|
|
65
|
+
model=settings.model,
|
|
66
|
+
timeout_s=settings.request_timeout_s,
|
|
67
|
+
temperature=settings.temperature,
|
|
68
|
+
num_retries=settings.num_retries,
|
|
69
|
+
fallbacks=settings.fallback_models or None,
|
|
70
|
+
reasoning_effort=settings.reasoning_effort,
|
|
71
|
+
api_base=settings.ollama_base_url if is_local else None,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
def _reliability_kwargs(self) -> dict[str, Any]:
|
|
75
|
+
kwargs: dict[str, Any] = {"timeout": self._timeout, "num_retries": self._num_retries}
|
|
76
|
+
if self._fallbacks is not None:
|
|
77
|
+
kwargs["fallbacks"] = self._fallbacks
|
|
78
|
+
if self._api_base is not None:
|
|
79
|
+
kwargs["api_base"] = self._api_base
|
|
80
|
+
return kwargs
|
|
81
|
+
|
|
82
|
+
def _sampling_kwargs(self, *, structured: bool = False) -> dict[str, Any]:
|
|
83
|
+
kwargs: dict[str, Any] = {}
|
|
84
|
+
if self._temperature is not None:
|
|
85
|
+
kwargs["temperature"] = self._temperature
|
|
86
|
+
if self._reasoning_effort is not None and not structured:
|
|
87
|
+
kwargs["reasoning_effort"] = self._reasoning_effort
|
|
88
|
+
return kwargs
|
|
89
|
+
|
|
90
|
+
def _prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
91
|
+
if "anthropic" not in self._model and "claude" not in self._model:
|
|
92
|
+
return messages
|
|
93
|
+
prepared: list[dict[str, Any]] = []
|
|
94
|
+
marked: bool = False
|
|
95
|
+
for message in messages:
|
|
96
|
+
content: Any = message.get("content")
|
|
97
|
+
if not marked and message.get("role") == "system" and isinstance(content, str):
|
|
98
|
+
prepared.append(
|
|
99
|
+
{
|
|
100
|
+
"role": "system",
|
|
101
|
+
"content": [
|
|
102
|
+
{
|
|
103
|
+
"type": "text",
|
|
104
|
+
"text": content,
|
|
105
|
+
"cache_control": {"type": "ephemeral"},
|
|
106
|
+
}
|
|
107
|
+
],
|
|
108
|
+
}
|
|
109
|
+
)
|
|
110
|
+
marked = True
|
|
111
|
+
else:
|
|
112
|
+
prepared.append(message)
|
|
113
|
+
return prepared
|
|
114
|
+
|
|
115
|
+
async def complete(
|
|
116
|
+
self,
|
|
117
|
+
messages: list[dict[str, Any]],
|
|
118
|
+
tools: list[dict[str, Any]] | None = None,
|
|
119
|
+
*,
|
|
120
|
+
on_token: TokenSink | None = None,
|
|
121
|
+
) -> LLMResponse:
|
|
122
|
+
messages = self._prepare_messages(messages)
|
|
123
|
+
if on_token is None:
|
|
124
|
+
response: Any = await litellm.acompletion(
|
|
125
|
+
model=self._model,
|
|
126
|
+
messages=messages,
|
|
127
|
+
tools=tools,
|
|
128
|
+
**self._reliability_kwargs(),
|
|
129
|
+
**self._sampling_kwargs(),
|
|
130
|
+
)
|
|
131
|
+
else:
|
|
132
|
+
response = await self._stream(messages, tools, on_token)
|
|
133
|
+
return self._normalize(response)
|
|
134
|
+
|
|
135
|
+
async def _stream(
|
|
136
|
+
self,
|
|
137
|
+
messages: list[dict[str, Any]],
|
|
138
|
+
tools: list[dict[str, Any]] | None,
|
|
139
|
+
on_token: TokenSink,
|
|
140
|
+
) -> Any:
|
|
141
|
+
chunks: list[Any] = []
|
|
142
|
+
stream: Any = await litellm.acompletion(
|
|
143
|
+
model=self._model,
|
|
144
|
+
messages=messages,
|
|
145
|
+
tools=tools,
|
|
146
|
+
**self._reliability_kwargs(),
|
|
147
|
+
stream=True,
|
|
148
|
+
stream_options={"include_usage": True},
|
|
149
|
+
**self._sampling_kwargs(),
|
|
150
|
+
)
|
|
151
|
+
try:
|
|
152
|
+
async for chunk in stream:
|
|
153
|
+
chunks.append(chunk)
|
|
154
|
+
choices: Any = getattr(chunk, "choices", None)
|
|
155
|
+
if not choices:
|
|
156
|
+
continue
|
|
157
|
+
delta: str | None = getattr(choices[0].delta, "content", None)
|
|
158
|
+
if delta:
|
|
159
|
+
await on_token(delta)
|
|
160
|
+
return litellm.stream_chunk_builder(chunks, messages=messages)
|
|
161
|
+
finally:
|
|
162
|
+
aclose: Any = getattr(stream, "aclose", None)
|
|
163
|
+
if aclose is not None:
|
|
164
|
+
with contextlib.suppress(Exception):
|
|
165
|
+
await aclose()
|
|
166
|
+
|
|
167
|
+
def _normalize(self, response: Any) -> LLMResponse:
|
|
168
|
+
message: Any = response.choices[0].message
|
|
169
|
+
tool_calls: list[ToolCallRequest] = [
|
|
170
|
+
ToolCallRequest(id=call.id, name=call.function.name, arguments=call.function.arguments)
|
|
171
|
+
for call in (message.tool_calls or [])
|
|
172
|
+
]
|
|
173
|
+
usage: Any = response.usage
|
|
174
|
+
prompt_tokens: int = int(getattr(usage, "prompt_tokens", 0) or 0)
|
|
175
|
+
completion_tokens: int = int(getattr(usage, "completion_tokens", 0) or 0)
|
|
176
|
+
|
|
177
|
+
try:
|
|
178
|
+
cost_usd: float = float(litellm.completion_cost(completion_response=response) or 0.0)
|
|
179
|
+
except Exception as error:
|
|
180
|
+
log.debug("cost_calc_skipped", model=self._model, error=str(error))
|
|
181
|
+
cost_usd = 0.0
|
|
182
|
+
|
|
183
|
+
return LLMResponse(
|
|
184
|
+
content=message.content,
|
|
185
|
+
tool_calls=tool_calls,
|
|
186
|
+
prompt_tokens=prompt_tokens,
|
|
187
|
+
completion_tokens=completion_tokens,
|
|
188
|
+
cost_usd=cost_usd,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
async def complete_structured(
|
|
192
|
+
self, messages: list[dict[str, Any]], schema: type[_Structured]
|
|
193
|
+
) -> _Structured:
|
|
194
|
+
messages = self._prepare_messages(messages)
|
|
195
|
+
response: Any = await litellm.acompletion(
|
|
196
|
+
model=self._model,
|
|
197
|
+
messages=messages,
|
|
198
|
+
response_format=schema,
|
|
199
|
+
**self._reliability_kwargs(),
|
|
200
|
+
**self._sampling_kwargs(structured=True),
|
|
201
|
+
)
|
|
202
|
+
content: str = response.choices[0].message.content or "{}"
|
|
203
|
+
return schema.model_validate_json(content)
|
bare_agent/logging.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""Structured logging via structlog: console for dev, JSON for prod."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import sys
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import orjson
|
|
10
|
+
import structlog
|
|
11
|
+
|
|
12
|
+
_is_configured: bool = False
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _orjson_serializer(obj: Any, default: Any = None, **_: Any) -> str:
|
|
16
|
+
return orjson.dumps(obj, default=default).decode()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def configure_logging(*, level: str = "INFO", json: bool = False) -> None:
|
|
20
|
+
global _is_configured
|
|
21
|
+
if _is_configured:
|
|
22
|
+
return
|
|
23
|
+
|
|
24
|
+
level_number: int = logging.getLevelNamesMapping().get(level.upper(), logging.INFO)
|
|
25
|
+
renderer: Any = (
|
|
26
|
+
structlog.processors.JSONRenderer(serializer=_orjson_serializer)
|
|
27
|
+
if json
|
|
28
|
+
else structlog.dev.ConsoleRenderer(colors=sys.stderr.isatty())
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
structlog.configure(
|
|
32
|
+
processors=[
|
|
33
|
+
structlog.contextvars.merge_contextvars,
|
|
34
|
+
structlog.processors.add_log_level,
|
|
35
|
+
structlog.processors.TimeStamper(fmt="iso"),
|
|
36
|
+
structlog.processors.format_exc_info,
|
|
37
|
+
renderer,
|
|
38
|
+
],
|
|
39
|
+
wrapper_class=structlog.make_filtering_bound_logger(level_number),
|
|
40
|
+
logger_factory=structlog.PrintLoggerFactory(file=sys.stderr),
|
|
41
|
+
cache_logger_on_first_use=True,
|
|
42
|
+
)
|
|
43
|
+
_is_configured = True
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def get_logger(name: str | None = None) -> Any:
|
|
47
|
+
if not _is_configured:
|
|
48
|
+
configure_logging()
|
|
49
|
+
return structlog.get_logger(name)
|
bare_agent/loop.py
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"""An LLM in a tool-use loop: a stateless reducer over an explicit message list."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections import Counter
|
|
6
|
+
from contextlib import AsyncExitStack
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import Any, Protocol
|
|
9
|
+
|
|
10
|
+
import orjson
|
|
11
|
+
from pydantic import BaseModel
|
|
12
|
+
|
|
13
|
+
from bare_agent.budget import Budget, BudgetState, BudgetStop
|
|
14
|
+
from bare_agent.events import EventSink, emit
|
|
15
|
+
from bare_agent.llm import LLMResponse, TokenSink
|
|
16
|
+
from bare_agent.logging import get_logger
|
|
17
|
+
from bare_agent.registry import Approver, ToolCall, ToolRegistry, ToolResult
|
|
18
|
+
|
|
19
|
+
log = get_logger(__name__)
|
|
20
|
+
|
|
21
|
+
_CYCLE_LIMIT: int = 3
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class CompletionClient(Protocol):
|
|
25
|
+
async def complete(
|
|
26
|
+
self,
|
|
27
|
+
messages: list[dict[str, Any]],
|
|
28
|
+
tools: list[dict[str, Any]] | None = None,
|
|
29
|
+
*,
|
|
30
|
+
on_token: TokenSink | None = None,
|
|
31
|
+
) -> LLMResponse: ...
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass(frozen=True)
|
|
35
|
+
class AgentResult:
|
|
36
|
+
answer: str
|
|
37
|
+
stop_reason: str
|
|
38
|
+
turns: int
|
|
39
|
+
tokens: int
|
|
40
|
+
cost_usd: float
|
|
41
|
+
transcript: list[dict[str, Any]] = field(default_factory=list)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _result_to_text(result: ToolResult) -> str:
|
|
45
|
+
if not result.ok:
|
|
46
|
+
return orjson.dumps({"error": result.error}).decode()
|
|
47
|
+
content: Any = result.content
|
|
48
|
+
if isinstance(content, BaseModel):
|
|
49
|
+
return content.model_dump_json()
|
|
50
|
+
if isinstance(content, str):
|
|
51
|
+
return content
|
|
52
|
+
return orjson.dumps(content, default=str).decode()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _wrap_untrusted(result: ToolResult) -> str:
|
|
56
|
+
text: str = _result_to_text(result)
|
|
57
|
+
if not result.ok:
|
|
58
|
+
return text
|
|
59
|
+
return f"<untrusted_tool_output>\n{text}\n</untrusted_tool_output>"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _parse_arguments(raw: str) -> dict[str, Any]:
|
|
63
|
+
if not raw:
|
|
64
|
+
return {}
|
|
65
|
+
try:
|
|
66
|
+
loaded: Any = orjson.loads(raw)
|
|
67
|
+
except orjson.JSONDecodeError:
|
|
68
|
+
return {}
|
|
69
|
+
return loaded if isinstance(loaded, dict) else {}
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _call_signature(name: str, args: dict[str, Any]) -> str:
|
|
73
|
+
return orjson.dumps({"n": name, "a": args}, option=orjson.OPT_SORT_KEYS).decode()
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class AgentLoop:
|
|
77
|
+
def __init__(
|
|
78
|
+
self,
|
|
79
|
+
*,
|
|
80
|
+
registry: ToolRegistry,
|
|
81
|
+
llm: CompletionClient,
|
|
82
|
+
budget: Budget,
|
|
83
|
+
system_prompt: str,
|
|
84
|
+
approver: Approver | None = None,
|
|
85
|
+
) -> None:
|
|
86
|
+
self._registry: ToolRegistry = registry
|
|
87
|
+
self._llm: CompletionClient = llm
|
|
88
|
+
self._budget: Budget = budget
|
|
89
|
+
self._system_prompt: str = system_prompt
|
|
90
|
+
self._approver: Approver | None = approver
|
|
91
|
+
|
|
92
|
+
async def run(
|
|
93
|
+
self,
|
|
94
|
+
user_input: str,
|
|
95
|
+
*,
|
|
96
|
+
on_event: EventSink | None = None,
|
|
97
|
+
stream_tokens: bool = False,
|
|
98
|
+
) -> AgentResult:
|
|
99
|
+
state: BudgetState = BudgetState(self._budget)
|
|
100
|
+
messages: list[dict[str, Any]] = [
|
|
101
|
+
{"role": "system", "content": self._system_prompt},
|
|
102
|
+
{"role": "user", "content": user_input},
|
|
103
|
+
]
|
|
104
|
+
tools: list[dict[str, Any]] | None = self._registry.schema() or None
|
|
105
|
+
answer: str = ""
|
|
106
|
+
stop_reason: str = "completed"
|
|
107
|
+
seen_calls: Counter[str] = Counter()
|
|
108
|
+
|
|
109
|
+
token_sink: TokenSink | None = None
|
|
110
|
+
if stream_tokens and on_event is not None:
|
|
111
|
+
|
|
112
|
+
async def token_sink(delta: str) -> None:
|
|
113
|
+
await emit(on_event, "token", text=delta)
|
|
114
|
+
|
|
115
|
+
async with AsyncExitStack() as stack:
|
|
116
|
+
stack.callback(lambda: log.debug("agent_loop_teardown", turns=state.turns))
|
|
117
|
+
|
|
118
|
+
while True:
|
|
119
|
+
stop: BudgetStop | None = state.exceeded()
|
|
120
|
+
if stop is not None:
|
|
121
|
+
stop_reason = stop.value
|
|
122
|
+
answer = answer or f"[stopped: {stop.value} budget reached]"
|
|
123
|
+
break
|
|
124
|
+
|
|
125
|
+
response: LLMResponse = await self._llm.complete(
|
|
126
|
+
messages, tools=tools, on_token=token_sink
|
|
127
|
+
)
|
|
128
|
+
state.record_turn(
|
|
129
|
+
tokens=response.prompt_tokens + response.completion_tokens,
|
|
130
|
+
cost_usd=response.cost_usd,
|
|
131
|
+
)
|
|
132
|
+
log.info(
|
|
133
|
+
"turn",
|
|
134
|
+
n=state.turns,
|
|
135
|
+
tokens=state.tokens,
|
|
136
|
+
cost_usd=round(state.cost_usd, 4),
|
|
137
|
+
tool_calls=len(response.tool_calls),
|
|
138
|
+
)
|
|
139
|
+
await emit(
|
|
140
|
+
on_event,
|
|
141
|
+
"turn",
|
|
142
|
+
n=state.turns,
|
|
143
|
+
tool_calls=len(response.tool_calls),
|
|
144
|
+
cost_usd=round(state.cost_usd, 4),
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
if not response.tool_calls:
|
|
148
|
+
answer = response.content or ""
|
|
149
|
+
stop_reason = "completed"
|
|
150
|
+
break
|
|
151
|
+
|
|
152
|
+
messages.append(
|
|
153
|
+
{
|
|
154
|
+
"role": "assistant",
|
|
155
|
+
"content": response.content,
|
|
156
|
+
"tool_calls": [
|
|
157
|
+
{
|
|
158
|
+
"id": call.id,
|
|
159
|
+
"type": "function",
|
|
160
|
+
"function": {"name": call.name, "arguments": call.arguments},
|
|
161
|
+
}
|
|
162
|
+
for call in response.tool_calls
|
|
163
|
+
],
|
|
164
|
+
}
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
cycling: bool = False
|
|
168
|
+
for call in response.tool_calls:
|
|
169
|
+
parsed: dict[str, Any] = _parse_arguments(call.arguments)
|
|
170
|
+
signature: str = _call_signature(call.name, parsed)
|
|
171
|
+
seen_calls[signature] += 1
|
|
172
|
+
result: ToolResult = await self._registry.dispatch(
|
|
173
|
+
ToolCall(name=call.name, arguments=parsed),
|
|
174
|
+
approver=self._approver,
|
|
175
|
+
)
|
|
176
|
+
await emit(on_event, "tool_call", name=call.name, ok=result.ok)
|
|
177
|
+
messages.append(
|
|
178
|
+
{
|
|
179
|
+
"role": "tool",
|
|
180
|
+
"tool_call_id": call.id,
|
|
181
|
+
"content": _wrap_untrusted(result),
|
|
182
|
+
}
|
|
183
|
+
)
|
|
184
|
+
if seen_calls[signature] >= _CYCLE_LIMIT:
|
|
185
|
+
cycling = True
|
|
186
|
+
|
|
187
|
+
if cycling:
|
|
188
|
+
stop_reason = "cycle"
|
|
189
|
+
answer = answer or "[stopped: repeated the same tool call — likely a loop]"
|
|
190
|
+
log.warning("agent_cycle", turns=state.turns)
|
|
191
|
+
break
|
|
192
|
+
|
|
193
|
+
return AgentResult(
|
|
194
|
+
answer=answer,
|
|
195
|
+
stop_reason=stop_reason,
|
|
196
|
+
turns=state.turns,
|
|
197
|
+
tokens=state.tokens,
|
|
198
|
+
cost_usd=round(state.cost_usd, 6),
|
|
199
|
+
transcript=messages,
|
|
200
|
+
)
|
bare_agent/registry.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""Self-registering tool registry with permission gating and async dispatch."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import inspect
|
|
6
|
+
from collections.abc import Awaitable, Callable
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from enum import StrEnum
|
|
9
|
+
from typing import Any, Protocol, get_type_hints
|
|
10
|
+
|
|
11
|
+
from pydantic import BaseModel, ValidationError
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Permission(StrEnum):
|
|
15
|
+
ALLOW = "allow"
|
|
16
|
+
ASK = "ask"
|
|
17
|
+
DENY = "deny"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ToolFunction(Protocol):
|
|
21
|
+
__name__: str
|
|
22
|
+
|
|
23
|
+
async def __call__(self, args: Any, /) -> Any: ...
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass(frozen=True)
|
|
27
|
+
class ToolCall:
|
|
28
|
+
name: str
|
|
29
|
+
arguments: dict[str, Any]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass(frozen=True)
|
|
33
|
+
class ToolResult:
|
|
34
|
+
name: str
|
|
35
|
+
ok: bool
|
|
36
|
+
content: Any = None
|
|
37
|
+
error: str | None = None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
Approver = Callable[[ToolCall], Awaitable[bool]]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass(frozen=True)
|
|
44
|
+
class Tool:
|
|
45
|
+
name: str
|
|
46
|
+
description: str
|
|
47
|
+
args_model: type[BaseModel]
|
|
48
|
+
func: ToolFunction
|
|
49
|
+
permission: Permission
|
|
50
|
+
|
|
51
|
+
def json_schema(self) -> dict[str, Any]:
|
|
52
|
+
return {
|
|
53
|
+
"type": "function",
|
|
54
|
+
"function": {
|
|
55
|
+
"name": self.name,
|
|
56
|
+
"description": self.description,
|
|
57
|
+
"parameters": self.args_model.model_json_schema(),
|
|
58
|
+
},
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _extract_args_model(func: ToolFunction) -> type[BaseModel]:
|
|
63
|
+
parameters: list[inspect.Parameter] = [
|
|
64
|
+
parameter
|
|
65
|
+
for parameter in inspect.signature(func).parameters.values()
|
|
66
|
+
if parameter.name != "self"
|
|
67
|
+
]
|
|
68
|
+
if len(parameters) != 1:
|
|
69
|
+
raise ValueError(f"tool {func.__name__!r} must take exactly one Pydantic-model argument")
|
|
70
|
+
annotation: Any = get_type_hints(func).get(parameters[0].name)
|
|
71
|
+
if isinstance(annotation, type) and issubclass(annotation, BaseModel):
|
|
72
|
+
return annotation
|
|
73
|
+
raise ValueError(f"tool {func.__name__!r} argument must be annotated with a BaseModel subclass")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class ToolRegistry:
|
|
77
|
+
def __init__(self) -> None:
|
|
78
|
+
self._tools: dict[str, Tool] = {}
|
|
79
|
+
|
|
80
|
+
def tool(
|
|
81
|
+
self, *, permission: Permission = Permission.ALLOW
|
|
82
|
+
) -> Callable[[ToolFunction], ToolFunction]:
|
|
83
|
+
def decorator(func: ToolFunction) -> ToolFunction:
|
|
84
|
+
description: str | None = inspect.getdoc(func)
|
|
85
|
+
if not description:
|
|
86
|
+
raise ValueError(
|
|
87
|
+
f"tool {func.__name__!r} needs a docstring; it becomes the LLM description"
|
|
88
|
+
)
|
|
89
|
+
self._tools[func.__name__] = Tool(
|
|
90
|
+
name=func.__name__,
|
|
91
|
+
description=description,
|
|
92
|
+
args_model=_extract_args_model(func),
|
|
93
|
+
func=func,
|
|
94
|
+
permission=permission,
|
|
95
|
+
)
|
|
96
|
+
return func
|
|
97
|
+
|
|
98
|
+
return decorator
|
|
99
|
+
|
|
100
|
+
def names(self) -> list[str]:
|
|
101
|
+
return list(self._tools)
|
|
102
|
+
|
|
103
|
+
def get(self, name: str) -> Tool | None:
|
|
104
|
+
return self._tools.get(name)
|
|
105
|
+
|
|
106
|
+
def schema(self) -> list[dict[str, Any]]:
|
|
107
|
+
return [tool.json_schema() for tool in self._tools.values()]
|
|
108
|
+
|
|
109
|
+
async def dispatch(self, call: ToolCall, *, approver: Approver | None = None) -> ToolResult:
|
|
110
|
+
tool: Tool | None = self._tools.get(call.name)
|
|
111
|
+
if tool is None:
|
|
112
|
+
return ToolResult(call.name, ok=False, error=f"unknown tool: {call.name!r}")
|
|
113
|
+
|
|
114
|
+
if tool.permission is Permission.DENY:
|
|
115
|
+
return ToolResult(call.name, ok=False, error="denied by policy")
|
|
116
|
+
if tool.permission is Permission.ASK:
|
|
117
|
+
is_approved: bool = await approver(call) if approver is not None else False
|
|
118
|
+
if not is_approved:
|
|
119
|
+
return ToolResult(call.name, ok=False, error="denied (no approval granted)")
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
args: BaseModel = tool.args_model.model_validate(call.arguments)
|
|
123
|
+
except ValidationError as error:
|
|
124
|
+
return ToolResult(call.name, ok=False, error=f"invalid arguments: {error}")
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
content: Any = await tool.func(args)
|
|
128
|
+
except Exception as error:
|
|
129
|
+
return ToolResult(call.name, ok=False, error=f"{type(error).__name__}: {error}")
|
|
130
|
+
|
|
131
|
+
return ToolResult(call.name, ok=True, content=content)
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: bare-agent
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: A framework-free agent runtime you can read, run, and leave. Own the loop, not the framework. Runs local on Ollama at $0 — or any frontier model.
|
|
5
|
+
Project-URL: Homepage, https://github.com/subratamondal1/bare-agent
|
|
6
|
+
Project-URL: Repository, https://github.com/subratamondal1/bare-agent
|
|
7
|
+
Author: Subrata Mondal
|
|
8
|
+
License: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: agents,framework-free,litellm,llm,local-first,ollama,tool-calling
|
|
11
|
+
Requires-Python: >=3.12
|
|
12
|
+
Requires-Dist: litellm>=1.55
|
|
13
|
+
Requires-Dist: orjson>=3.10
|
|
14
|
+
Requires-Dist: pydantic-settings>=2.7
|
|
15
|
+
Requires-Dist: pydantic>=2.10
|
|
16
|
+
Requires-Dist: python-dotenv>=1.0
|
|
17
|
+
Requires-Dist: structlog>=24.4
|
|
18
|
+
Provides-Extra: api
|
|
19
|
+
Requires-Dist: fastapi>=0.115; extra == 'api'
|
|
20
|
+
Requires-Dist: httpx>=0.28; extra == 'api'
|
|
21
|
+
Requires-Dist: redis>=5; extra == 'api'
|
|
22
|
+
Requires-Dist: uvicorn[standard]>=0.32; extra == 'api'
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
<p align="center">
|
|
26
|
+
<img src="https://raw.githubusercontent.com/subratamondal1/bare-agent/main/docs/assets/logo.png" width="96" alt="Bare Agent" />
|
|
27
|
+
</p>
|
|
28
|
+
|
|
29
|
+
<h1 align="center">Bare Agent</h1>
|
|
30
|
+
|
|
31
|
+
<p align="center">
|
|
32
|
+
<strong>Own the loop, not the framework.</strong>
|
|
33
|
+
</p>
|
|
34
|
+
|
|
35
|
+
<p align="center">
|
|
36
|
+
A framework-free agent runtime you can read, run, and leave — a small library you<br/>
|
|
37
|
+
import and call, plus a visual studio that ejects to plain Python with zero dependency on us.
|
|
38
|
+
</p>
|
|
39
|
+
|
|
40
|
+
<p align="center">
|
|
41
|
+
<a href="LICENSE"><img src="https://img.shields.io/badge/license-MIT-blue?style=flat" alt="License: MIT"></a>
|
|
42
|
+
<img src="https://img.shields.io/badge/python-3.12%2B-blue?style=flat" alt="Python 3.12+">
|
|
43
|
+
<img src="https://img.shields.io/badge/tests-29%20passing-brightgreen?style=flat" alt="Tests: 29 passing">
|
|
44
|
+
<img src="https://img.shields.io/badge/local--first-Ollama-orange?style=flat" alt="Local-first">
|
|
45
|
+
<img src="https://img.shields.io/badge/studio-Next.js%2016-black?style=flat" alt="Studio: Next.js 16">
|
|
46
|
+
</p>
|
|
47
|
+
|
|
48
|
+
<p align="center">
|
|
49
|
+
<a href="https://github.com/subratamondal1/bare-agent/actions/workflows/ci.yml"><img src="https://github.com/subratamondal1/bare-agent/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
|
|
50
|
+
<a href="https://github.com/subratamondal1/bare-agent/stargazers"><img src="https://img.shields.io/github/stars/subratamondal1/bare-agent?style=flat&color=yellow" alt="Stars"></a>
|
|
51
|
+
<a href="https://github.com/subratamondal1/bare-agent/commits/main"><img src="https://img.shields.io/github/last-commit/subratamondal1/bare-agent?style=flat" alt="Last commit"></a>
|
|
52
|
+
</p>
|
|
53
|
+
|
|
54
|
+
<p align="center">
|
|
55
|
+
<a href="#features">Features</a> •
|
|
56
|
+
<a href="#quickstart">Quickstart</a> •
|
|
57
|
+
<a href="#the-studio">Studio</a> •
|
|
58
|
+
<a href="#how-it-works">How it works</a> •
|
|
59
|
+
<a href="#eject">Eject</a> •
|
|
60
|
+
<a href="#configuration">Configuration</a> •
|
|
61
|
+
<a href="#development">Development</a>
|
|
62
|
+
</p>
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
Most agent frameworks own your `main()`, hide control flow behind metaclasses and DAG executors,
|
|
67
|
+
and obscure the actual prompts. `bare-agent` is the opposite: a small library — the agent loop, a
|
|
68
|
+
tool registry, a 3-axis budget, and a LiteLLM gateway, ~600 readable lines — that you **import and
|
|
69
|
+
call**. You own the loop. Every prompt is in plain sight. You can always **eject to plain Python**
|
|
70
|
+
and run it with **zero `bare_agent` dependency**.
|
|
71
|
+
|
|
72
|
+
On top of the library sits an optional **visual studio**: wire agents into a chain on a canvas,
|
|
73
|
+
attach tools, **Run** and watch tokens stream live, then eject the whole flow to a self-contained
|
|
74
|
+
`agent.py`. **Local-first** — it runs at zero cost on Ollama; OpenAI, Anthropic, and Gemini are
|
|
75
|
+
optional drop-ins through the same loop. Built on Python 3.12 · LiteLLM · FastAPI · Next.js 16 —
|
|
76
|
+
with **no agent framework** (no LangChain/LangGraph): the loop, the budget, and the failure
|
|
77
|
+
handling are owned directly.
|
|
78
|
+
|
|
79
|
+
<p align="center">
|
|
80
|
+
<img src="https://raw.githubusercontent.com/subratamondal1/bare-agent/main/docs/assets/bare-agent-demo.gif" width="100%" alt="Bare Agent studio: chain a Solver and an Explainer agent on a canvas, attach the calculator, Run and watch each agent's turns, tool calls, and tokens stream live with real per-call cost, then Eject the whole flow to a self-contained Python script." />
|
|
81
|
+
</p>
|
|
82
|
+
|
|
83
|
+
<p align="center">
|
|
84
|
+
<em>The studio, end to end: chain a <strong>Solver</strong> and an <strong>Explainer</strong>, attach the calculator, <strong>Run</strong> and watch each agent stream its turns, tool calls, and tokens live — with real per-call cost attribution (here on <code>gpt-5.4-mini</code>, ~$0.0006 for the whole chain) — then <strong>Eject to Python</strong>, a self-contained <code>agent.py</code> with zero <code>bare_agent</code> dependency. The same loop runs local-first on Ollama at $0.</em>
|
|
85
|
+
</p>
|
|
86
|
+
|
|
87
|
+
## Features
|
|
88
|
+
|
|
89
|
+
| Capability | Detail |
|
|
90
|
+
|---|---|
|
|
91
|
+
| **Framework-free agent loop** | A hand-written tool-use loop over LiteLLM with a 3-axis budget (turns / tokens / wall-clock) + hard cost cap, a retry/fallback ladder, and a self-registering, permission-gated tool registry. The loop is a stateless reducer over an explicit `messages: list[dict]`. |
|
|
92
|
+
| **Local-first, $0 — or BYO frontier key** | Every call goes through LiteLLM, so the model id picks the provider. `ollama_chat/qwen3` runs free and offline; `anthropic/…`, `openai/…`, `gemini/…` are drop-ins. No lock-in. |
|
|
93
|
+
| **Multi-agent chains** | Wire agents agent→agent; the runtime topologically orders them and feeds each answer into the next. Inline runs, queued runs, and ejected code all execute the same chain. |
|
|
94
|
+
| **Visual studio** | A React Flow canvas (Next.js 16 / React 19) to build chains, attach tools, and watch turns / tool calls / tokens stream live over SSE — one readable section per agent. |
|
|
95
|
+
| **Eject to plain Python** | Compile any graph to a standalone `agent.py` (litellm + pydantic only) — tool sources inlined, **zero `bare_agent` import**. Machine-checked to compile. The graph is a convenience, never a cage. |
|
|
96
|
+
| **HITL / permissions** | An `Approver` gates tool calls allow / ask / deny; successful tool output is wrapped `<untrusted_tool_output>` for prompt-injection containment. |
|
|
97
|
+
| **Horizontal scale** | An optional Redis-list job queue + worker pool; Kubernetes + **KEDA scale workers 0→N→0** on queue depth — the same shape as [Argus](https://github.com/subratamondal1/argus)'s searcher fan-out. |
|
|
98
|
+
| **Composition, not configuration** | Seams are Python `Protocol`s — swap the LLM, the approver, or the event sink by passing a different object. No god-object to subclass. |
|
|
99
|
+
|
|
100
|
+
## Quickstart
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
uv add bare-agent # or: pip install bare-agent
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
A complete agent in ~30 lines — the docstring becomes the LLM's tool description:
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
import asyncio
|
|
110
|
+
from pydantic import BaseModel, Field
|
|
111
|
+
from bare_agent import AgentLoop, Budget, LLMClient, ToolRegistry, get_settings
|
|
112
|
+
|
|
113
|
+
registry = ToolRegistry()
|
|
114
|
+
|
|
115
|
+
class AddArgs(BaseModel):
|
|
116
|
+
a: int = Field(description="first addend")
|
|
117
|
+
b: int = Field(description="second addend")
|
|
118
|
+
|
|
119
|
+
@registry.tool()
|
|
120
|
+
async def add(args: AddArgs) -> int:
|
|
121
|
+
"""Add two integers and return their sum."""
|
|
122
|
+
return args.a + args.b
|
|
123
|
+
|
|
124
|
+
async def main() -> None:
|
|
125
|
+
settings = get_settings() # local Ollama by default; set BARE_AGENT_MODEL for frontier
|
|
126
|
+
agent = AgentLoop(
|
|
127
|
+
registry=registry,
|
|
128
|
+
llm=LLMClient.from_settings(settings),
|
|
129
|
+
budget=Budget.from_settings(settings),
|
|
130
|
+
system_prompt="You are a precise assistant. Use tools for arithmetic.",
|
|
131
|
+
)
|
|
132
|
+
result = await agent.run("What is 17 + 25, then add 100 to that?")
|
|
133
|
+
print(result.answer) # -> "142"
|
|
134
|
+
print(result.stop_reason, result.turns, f"${result.cost_usd}") # -> completed 3 $0.0
|
|
135
|
+
|
|
136
|
+
asyncio.run(main())
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
Run it locally for free:
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
ollama pull qwen3 # one-time (qwen3:30b-a3b-thinking on a 32GB Mac)
|
|
143
|
+
make demo # or: uv run python examples/quickstart.py
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## The studio
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
make web # FastAPI on :8000 + Next.js studio on :3000 → http://localhost:3000/studio
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
Open `http://localhost:3000/studio`: **Add** agents and wire them into a chain, attach catalog
|
|
153
|
+
tools, pick a model (local qwen3 at $0 or your frontier key), and **Run** — each agent streams its
|
|
154
|
+
turns, tool calls, and tokens live over SSE in its own section. The backend is standalone: `make
|
|
155
|
+
api` runs the control plane alone, and the library works with no UI at all.
|
|
156
|
+
|
|
157
|
+
## How it works
|
|
158
|
+
|
|
159
|
+
```
|
|
160
|
+
user input
|
|
161
|
+
│
|
|
162
|
+
▼
|
|
163
|
+
┌──────────────┐ answer feeds ┌──────────────┐
|
|
164
|
+
│ Agent 1 │ ───────────────► │ Agent 2 │ ──────────► final answer
|
|
165
|
+
│ + tools │ the next │ + tools │
|
|
166
|
+
└──────────────┘ └──────────────┘
|
|
167
|
+
each agent = ONE hand-written loop:
|
|
168
|
+
explicit messages list · 3-axis budget + cost cap · permission-gated tool dispatch
|
|
169
|
+
|
|
170
|
+
run it: inline over SSE · or queue → worker pool → KEDA scales 0→N→0
|
|
171
|
+
keep it: Eject ──► agent.py (litellm + pydantic only — ZERO bare_agent dependency)
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
The loop is a **stateless reducer** over an explicit `messages: list[dict]`. That one decision pays
|
|
175
|
+
three ways, all for free:
|
|
176
|
+
|
|
177
|
+
- **Durability** — the list is serializable, so checkpoint it and resume after a crash.
|
|
178
|
+
- **Eject-to-code** — the list *is* the program; there was never a framework underneath to lift out.
|
|
179
|
+
- **Testability** — feed a canned `messages` list (or a fake `CompletionClient`), assert.
|
|
180
|
+
|
|
181
|
+
No metaclass magic, no hidden DAG executor, no god-object to subclass, no state trapped in a
|
|
182
|
+
session. Extensibility is composition: `AgentLoop(llm=..., approver=..., registry=...)`.
|
|
183
|
+
|
|
184
|
+
### The 8 primitives (each usable on its own — not a god-object)
|
|
185
|
+
|
|
186
|
+
| # | Primitive | Where |
|
|
187
|
+
|---|---|---|
|
|
188
|
+
| ① | Tool registry — `@registry.tool()` → JSON-schema → permission-gated dispatch | `registry.py` |
|
|
189
|
+
| ② | Prompt assembly — the explicit, serializable `messages: list[dict]` | `loop.py` |
|
|
190
|
+
| ③ | Agent loop — `AsyncExitStack` + 3-axis budget + termination + cycle-stop | `loop.py` |
|
|
191
|
+
| ④ | Retry / fallback over LiteLLM (local Ollama **or** any frontier model) | `llm.py` |
|
|
192
|
+
| ⑤ | State / memory — checkpoint the `messages` list (durability for free) | `loop.py` |
|
|
193
|
+
| ⑥ | HITL / permissions — allow / ask / deny, an `Approver` on `ask` | `registry.py` |
|
|
194
|
+
| ⑦ | Observability — `structlog` + an optional `EventSink` (SSE-ready) | `events.py` |
|
|
195
|
+
| ⑧ | Eval gate — golden replay (roadmap) | — |
|
|
196
|
+
|
|
197
|
+
## Eject
|
|
198
|
+
|
|
199
|
+
Any flow — single agent or a chain — compiles to a standalone script that imports only `litellm`
|
|
200
|
+
and `pydantic`. Tool sources are inlined verbatim; there is **no `bare_agent` import**:
|
|
201
|
+
|
|
202
|
+
```bash
|
|
203
|
+
uv run --with litellm --with pydantic agent.py "your question"
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
In the studio, **Eject to Python** shows the generated code and downloads it. The generated file is
|
|
207
|
+
machine-checked to compile. You can read it, diff it, vendor it, and run it after you stop using
|
|
208
|
+
bare-agent entirely — that is the point.
|
|
209
|
+
|
|
210
|
+
## Configuration
|
|
211
|
+
|
|
212
|
+
Settings are read by [Pydantic Settings](src/bare_agent/config.py) from the environment
|
|
213
|
+
(`BARE_AGENT_` prefix) or `.env` (`cp .env.example .env`). The defaults are fully local and free.
|
|
214
|
+
Common overrides:
|
|
215
|
+
|
|
216
|
+
| Variable | Default | Purpose |
|
|
217
|
+
|---|---|---|
|
|
218
|
+
| `BARE_AGENT_MODEL` | `ollama_chat/qwen3` | LiteLLM model id. Local Ollama by default; `anthropic/…`, `openai/…`, `gemini/…` for hosted. |
|
|
219
|
+
| `BARE_AGENT_OLLAMA_BASE_URL` | `http://localhost:11434` | Ollama server, passed as `api_base` for `ollama_chat/` models. |
|
|
220
|
+
| `BARE_AGENT_FALLBACK_MODELS` | `[]` | Ordered fallback model ids (JSON list) for the retry ladder. |
|
|
221
|
+
| `BARE_AGENT_MAX_TURNS` / `…_TOKENS` / `…_WALLCLOCK_S` / `…_COST_USD` | `8` / `120000` / `180` / `0.50` | The 3-axis budget + hard cost cap; the loop stops on the first to trip. |
|
|
222
|
+
| `BARE_AGENT_USE_QUEUE` | `false` | Route runs through the Redis queue + worker pool (KEDA-autoscalable) instead of inline. |
|
|
223
|
+
| `BARE_AGENT_REDIS_URL` | `redis://localhost:6379/0` | Redis DSN for the run queue + event pub/sub (queue mode). |
|
|
224
|
+
|
|
225
|
+
For a hosted model, set `BARE_AGENT_MODEL=anthropic/…` and export that provider's key
|
|
226
|
+
(`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, `GEMINI_API_KEY`) — LiteLLM reads it from the environment.
|
|
227
|
+
|
|
228
|
+
## Development
|
|
229
|
+
|
|
230
|
+
```bash
|
|
231
|
+
make ci # lock-check + format-check + lint (ruff) + compile + typecheck (ty) + tests (pytest)
|
|
232
|
+
make test # the 29-test suite — hermetic (the LLM and Redis are faked; no daemon needed)
|
|
233
|
+
make web # backend + studio together for local hacking
|
|
234
|
+
make up / down # the Docker stack (api + studio; Ollama stays on the host)
|
|
235
|
+
make queue-up # the Docker stack WITH the KEDA-shaped worker plane (+ redis + worker)
|
|
236
|
+
make help # all targets
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
Kubernetes manifests live in [`k8s/`](k8s/) — an inline deploy (api + studio) and the KEDA worker
|
|
240
|
+
plane (redis + worker). The studio has its own toolchain ([`apps/studio/AGENTS.md`](apps/studio/AGENTS.md));
|
|
241
|
+
the canonical agent rules for the whole repo are in [`AGENTS.md`](AGENTS.md).
|
|
242
|
+
|
|
243
|
+
<!-- Uncomment once the repo has stars (renders an empty chart at 0):
|
|
244
|
+
## Star history
|
|
245
|
+
|
|
246
|
+
<p align="center">
|
|
247
|
+
<a href="https://star-history.com/#subratamondal1/bare-agent&Date">
|
|
248
|
+
<img src="https://api.star-history.com/svg?repos=subratamondal1/bare-agent&type=Date" width="600" alt="Star history">
|
|
249
|
+
</a>
|
|
250
|
+
</p>
|
|
251
|
+
-->
|
|
252
|
+
|
|
253
|
+
## License
|
|
254
|
+
|
|
255
|
+
MIT © 2026 Subrata Mondal — see [LICENSE](LICENSE). Built as the clean, reusable extraction of
|
|
256
|
+
[Argus](https://github.com/subratamondal1/argus)'s agent runtime.
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
bare_agent/__init__.py,sha256=rcQ-zsxFXVuoQxBmO1bNrlZRiRB6S8XGFfqnnFEyMEc,1095
|
|
2
|
+
bare_agent/budget.py,sha256=uc8NCE1tH6l3uiAlOd6p1u_AUGj9U8OF8DIQtswGo5c,1696
|
|
3
|
+
bare_agent/config.py,sha256=dkJfva3dR5SGpDAvgmkYlOPletSZJ2XSIK10jJF7dAo,2689
|
|
4
|
+
bare_agent/events.py,sha256=ZWV3j_Y6_P8zKjq5dInJLTXpaBqOo-XB1SIARHiovSQ,527
|
|
5
|
+
bare_agent/llm.py,sha256=dOWP2V5NEM0qw1a2ba4Dg9DSAd3NZvCiYw9GholWy8M,7083
|
|
6
|
+
bare_agent/logging.py,sha256=G8ymSKdWc6-VTavr7prhqXXk7JkyRnKPTkreuxTEoe8,1415
|
|
7
|
+
bare_agent/loop.py,sha256=i1mku-xJM0B13kFaoBpQ-uPvCpwPxwpUQmWRvxBlfo0,6745
|
|
8
|
+
bare_agent/registry.py,sha256=fkKw0ablqZjnowtKIYB2N6Mk3-HZ7kRYfKJVnJmxnns,4092
|
|
9
|
+
bare_agent-0.0.1.dist-info/METADATA,sha256=Xhv6CLt7f4bTgXL7pIOqycAVKHRqzwiVWMc9ApPoBT4,13666
|
|
10
|
+
bare_agent-0.0.1.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
11
|
+
bare_agent-0.0.1.dist-info/licenses/LICENSE,sha256=v9lKPOS9UqhAyTzPdgHCMV8g4FlMdSD2FY26w5I98jE,1071
|
|
12
|
+
bare_agent-0.0.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Subrata Mondal
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|