freeagent-sdk 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- freeagent_sdk-0.3.1/LICENSE +21 -0
- freeagent_sdk-0.3.1/PKG-INFO +354 -0
- freeagent_sdk-0.3.1/README.md +316 -0
- freeagent_sdk-0.3.1/freeagent/__init__.py +59 -0
- freeagent_sdk-0.3.1/freeagent/_sync.py +49 -0
- freeagent_sdk-0.3.1/freeagent/agent.py +719 -0
- freeagent_sdk-0.3.1/freeagent/circuit_breaker.py +75 -0
- freeagent_sdk-0.3.1/freeagent/cli.py +190 -0
- freeagent_sdk-0.3.1/freeagent/config.py +53 -0
- freeagent_sdk-0.3.1/freeagent/context.py +91 -0
- freeagent_sdk-0.3.1/freeagent/conversation.py +346 -0
- freeagent_sdk-0.3.1/freeagent/engines/__init__.py +265 -0
- freeagent_sdk-0.3.1/freeagent/events.py +81 -0
- freeagent_sdk-0.3.1/freeagent/hooks.py +195 -0
- freeagent_sdk-0.3.1/freeagent/mcp/__init__.py +54 -0
- freeagent_sdk-0.3.1/freeagent/mcp/adapter.py +107 -0
- freeagent_sdk-0.3.1/freeagent/mcp/client.py +90 -0
- freeagent_sdk-0.3.1/freeagent/memory.py +458 -0
- freeagent_sdk-0.3.1/freeagent/messages.py +51 -0
- freeagent_sdk-0.3.1/freeagent/model_info.py +107 -0
- freeagent_sdk-0.3.1/freeagent/providers/__init__.py +55 -0
- freeagent_sdk-0.3.1/freeagent/providers/ollama.py +148 -0
- freeagent_sdk-0.3.1/freeagent/providers/openai_compat.py +352 -0
- freeagent_sdk-0.3.1/freeagent/sanitize.py +93 -0
- freeagent_sdk-0.3.1/freeagent/skills.py +254 -0
- freeagent_sdk-0.3.1/freeagent/telemetry.py +583 -0
- freeagent_sdk-0.3.1/freeagent/tool.py +194 -0
- freeagent_sdk-0.3.1/freeagent/tools/__init__.py +10 -0
- freeagent_sdk-0.3.1/freeagent/tools/calculator.py +23 -0
- freeagent_sdk-0.3.1/freeagent/tools/shell.py +34 -0
- freeagent_sdk-0.3.1/freeagent/tools/system_info.py +33 -0
- freeagent_sdk-0.3.1/freeagent/validator.py +126 -0
- freeagent_sdk-0.3.1/freeagent_sdk.egg-info/PKG-INFO +354 -0
- freeagent_sdk-0.3.1/freeagent_sdk.egg-info/SOURCES.txt +58 -0
- freeagent_sdk-0.3.1/freeagent_sdk.egg-info/dependency_links.txt +1 -0
- freeagent_sdk-0.3.1/freeagent_sdk.egg-info/entry_points.txt +2 -0
- freeagent_sdk-0.3.1/freeagent_sdk.egg-info/requires.txt +13 -0
- freeagent_sdk-0.3.1/freeagent_sdk.egg-info/top_level.txt +1 -0
- freeagent_sdk-0.3.1/pyproject.toml +65 -0
- freeagent_sdk-0.3.1/setup.cfg +4 -0
- freeagent_sdk-0.3.1/tests/test_agent.py +280 -0
- freeagent_sdk-0.3.1/tests/test_caching.py +150 -0
- freeagent_sdk-0.3.1/tests/test_circuit_breaker.py +68 -0
- freeagent_sdk-0.3.1/tests/test_cli.py +72 -0
- freeagent_sdk-0.3.1/tests/test_context.py +103 -0
- freeagent_sdk-0.3.1/tests/test_conversation.py +364 -0
- freeagent_sdk-0.3.1/tests/test_engines.py +162 -0
- freeagent_sdk-0.3.1/tests/test_mcp.py +153 -0
- freeagent_sdk-0.3.1/tests/test_memory.py +184 -0
- freeagent_sdk-0.3.1/tests/test_messages.py +62 -0
- freeagent_sdk-0.3.1/tests/test_model_info.py +222 -0
- freeagent_sdk-0.3.1/tests/test_parallel.py +188 -0
- freeagent_sdk-0.3.1/tests/test_providers.py +120 -0
- freeagent_sdk-0.3.1/tests/test_sanitize.py +86 -0
- freeagent_sdk-0.3.1/tests/test_skills.py +124 -0
- freeagent_sdk-0.3.1/tests/test_streaming.py +340 -0
- freeagent_sdk-0.3.1/tests/test_telemetry.py +176 -0
- freeagent_sdk-0.3.1/tests/test_tool.py +150 -0
- freeagent_sdk-0.3.1/tests/test_trace.py +159 -0
- freeagent_sdk-0.3.1/tests/test_validator.py +119 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Du'An Lightfoot
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: freeagent-sdk
|
|
3
|
+
Version: 0.3.1
|
|
4
|
+
Summary: Local-first AI agent framework. Built for models that aren't perfect.
|
|
5
|
+
Author: FreeAgent Contributors
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://freeagentsdk.com
|
|
8
|
+
Project-URL: Documentation, https://freeagentsdk.com
|
|
9
|
+
Project-URL: Repository, https://github.com/labeveryday/free-agent-sdk
|
|
10
|
+
Project-URL: Issues, https://github.com/labeveryday/free-agent-sdk/issues
|
|
11
|
+
Project-URL: Changelog, https://github.com/labeveryday/free-agent-sdk/blob/main/CHANGELOG.md
|
|
12
|
+
Keywords: ai,agents,llm,ollama,vllm,local,tool-calling,mcp,streaming
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
23
|
+
Classifier: Typing :: Typed
|
|
24
|
+
Requires-Python: >=3.10
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Requires-Dist: httpx>=0.25.0
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
30
|
+
Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
|
|
31
|
+
Provides-Extra: mcp
|
|
32
|
+
Requires-Dist: mcp>=1.0.0; extra == "mcp"
|
|
33
|
+
Provides-Extra: otel
|
|
34
|
+
Requires-Dist: opentelemetry-api>=1.20.0; extra == "otel"
|
|
35
|
+
Requires-Dist: opentelemetry-sdk>=1.20.0; extra == "otel"
|
|
36
|
+
Requires-Dist: opentelemetry-exporter-otlp>=1.20.0; extra == "otel"
|
|
37
|
+
Dynamic: license-file
|
|
38
|
+
|
|
39
|
+
# FreeAgent SDK
|
|
40
|
+
|
|
41
|
+
[](https://pypi.org/project/freeagent-sdk/)
|
|
42
|
+
[](https://pypi.org/project/freeagent-sdk/)
|
|
43
|
+
[](LICENSE)
|
|
44
|
+
[](https://github.com/labeveryday/free-agent-sdk/actions/workflows/tests.yml)
|
|
45
|
+
[](https://freeagentsdk.com)
|
|
46
|
+
[](https://pypi.org/project/freeagent-sdk/)
|
|
47
|
+
|
|
48
|
+
**A clean local agent SDK for Ollama, vLLM, and OpenAI-compatible servers.**
|
|
49
|
+
|
|
50
|
+
Streaming. Multi-turn out of the box. Markdown skills and memory. Built-in telemetry. Single dependency.
|
|
51
|
+
|
|
52
|
+
```
|
|
53
|
+
pip install freeagent-sdk
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
**Links:** [Documentation](https://freeagentsdk.com) · [Tutorial](docs/TUTORIAL.md) · [Changelog](CHANGELOG.md) · [Contributing](CONTRIBUTING.md) · [Examples](examples/) · [Evaluation data](evaluation/)
|
|
57
|
+
|
|
58
|
+
## Why FreeAgent
|
|
59
|
+
|
|
60
|
+
- **Local-first**: works with Ollama and vLLM — your data never leaves your machine
|
|
61
|
+
- **Streaming everywhere**: token-level streaming with semantic events
|
|
62
|
+
- **Multi-turn that just works**: conversation state managed automatically with pluggable strategies
|
|
63
|
+
- **Markdown is first-class**: skills and memory are human-readable `.md` files with frontmatter
|
|
64
|
+
- **Zero-config**: auto-detects model size and tunes defaults — works on 2B and 70B alike
|
|
65
|
+
- **Inspectable**: `agent.trace()` shows exactly what happened
|
|
66
|
+
- **Fast**: actually 2% faster than raw Ollama API (HTTP connection reuse)
|
|
67
|
+
- **Honest**: real benchmark data in this README, not marketing
|
|
68
|
+
|
|
69
|
+
## Quick Start
|
|
70
|
+
|
|
71
|
+
### CLI
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
# One-shot query with streaming
|
|
75
|
+
freeagent ask qwen3:8b "What's the capital of France?"
|
|
76
|
+
|
|
77
|
+
# Interactive chat
|
|
78
|
+
freeagent chat qwen3:8b
|
|
79
|
+
|
|
80
|
+
# List available models
|
|
81
|
+
freeagent models
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### Python
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from freeagent import Agent
|
|
88
|
+
|
|
89
|
+
agent = Agent(model="qwen3:8b")
|
|
90
|
+
print(agent.run("What is Python?"))
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Streaming
|
|
94
|
+
|
|
95
|
+
Real token-by-token streaming, even for tool-using agents:
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
from freeagent import Agent
|
|
99
|
+
from freeagent.events import TokenEvent, ToolCallEvent, ToolResultEvent
|
|
100
|
+
|
|
101
|
+
agent = Agent(model="qwen3:8b", tools=[weather])
|
|
102
|
+
|
|
103
|
+
for event in agent.run_stream("What's the weather in Tokyo?"):
|
|
104
|
+
if isinstance(event, TokenEvent):
|
|
105
|
+
print(event.text, end="", flush=True)
|
|
106
|
+
elif isinstance(event, ToolCallEvent):
|
|
107
|
+
print(f"\n[Calling {event.name}...]")
|
|
108
|
+
elif isinstance(event, ToolResultEvent):
|
|
109
|
+
print(f"[{event.name} -> {'ok' if event.success else 'fail'} ({event.duration_ms:.0f}ms)]")
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
Async version: `async for event in agent.arun_stream("query"):`
|
|
113
|
+
|
|
114
|
+
Event types: `RunStartEvent`, `TokenEvent`, `ToolCallEvent`, `ToolResultEvent`, `ValidationErrorEvent`, `RetryEvent`, `IterationEvent`, `RunCompleteEvent`.
|
|
115
|
+
|
|
116
|
+
## Custom Tools
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
from freeagent import Agent, tool
|
|
120
|
+
|
|
121
|
+
@tool
|
|
122
|
+
def weather(city: str) -> dict:
|
|
123
|
+
"""Get current weather for a city."""
|
|
124
|
+
return {"city": city, "temp": 72, "condition": "sunny"}
|
|
125
|
+
|
|
126
|
+
agent = Agent(model="qwen3:8b", tools=[weather])
|
|
127
|
+
print(agent.run("What's the weather in Portland?"))
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## Multi-Turn Conversations
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
agent = Agent(model="qwen3:8b", tools=[weather])
|
|
134
|
+
agent.run("What's the weather in Tokyo?")
|
|
135
|
+
agent.run("Convert that to Celsius") # remembers Tokyo was 85°F
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### Strategies
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
from freeagent import Agent, SlidingWindow, TokenWindow
|
|
142
|
+
|
|
143
|
+
# Default: SlidingWindow(max_turns=20)
|
|
144
|
+
agent = Agent(model="qwen3:8b")
|
|
145
|
+
|
|
146
|
+
# Token-based budget (better for small context models)
|
|
147
|
+
agent = Agent(model="qwen3:4b", conversation=TokenWindow(max_tokens=3000))
|
|
148
|
+
|
|
149
|
+
# Stateless mode (each run independent)
|
|
150
|
+
agent = Agent(model="qwen3:8b", conversation=None)
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### Session Persistence
|
|
154
|
+
|
|
155
|
+
```python
|
|
156
|
+
agent = Agent(model="qwen3:8b", session="my-chat")
|
|
157
|
+
agent.run("Hello!")
|
|
158
|
+
# Later, in a new process:
|
|
159
|
+
agent = Agent(model="qwen3:8b", session="my-chat") # restores conversation
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## Inspecting Runs
|
|
163
|
+
|
|
164
|
+
Every run is fully traced. See exactly what happened:
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
agent.run("What's 347 * 29?")
|
|
168
|
+
|
|
169
|
+
# One-line summary
|
|
170
|
+
print(agent.last_run.summary())
|
|
171
|
+
# Run 1: qwen3:8b (native) 2300ms, 2 iters, 1 tools
|
|
172
|
+
|
|
173
|
+
# Full timeline
|
|
174
|
+
print(agent.trace())
|
|
175
|
+
# + 0ms model_call_start iter=0
|
|
176
|
+
# + 800ms tool_call calc(expression='347*29')
|
|
177
|
+
# + 802ms tool_result calc -> ok (2ms)
|
|
178
|
+
# + 803ms model_call_start iter=1
|
|
179
|
+
|
|
180
|
+
# Markdown report
|
|
181
|
+
print(agent.last_run.to_markdown())
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
## Model-Aware Defaults
|
|
185
|
+
|
|
186
|
+
FreeAgent auto-detects model capabilities from Ollama and tunes itself:
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
# Auto-tuned: detects 2B model, strips skills and memory tool
|
|
190
|
+
agent = Agent(model="gemma4:e2b")
|
|
191
|
+
|
|
192
|
+
# Auto-tuned: detects 8B model, keeps full defaults
|
|
193
|
+
agent = Agent(model="qwen3:8b")
|
|
194
|
+
|
|
195
|
+
# Override auto-tuning
|
|
196
|
+
agent = Agent(model="gemma4:e2b", bundled_skills=True, memory_tool=True)
|
|
197
|
+
|
|
198
|
+
# Disable auto-tuning entirely
|
|
199
|
+
agent = Agent(model="qwen3:8b", auto_tune=False)
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
Access detected info: `agent.model_info.parameter_count`, `agent.model_info.context_length`, `agent.model_info.capabilities`.
|
|
203
|
+
|
|
204
|
+
## Skills (Markdown Prompt Extensions)
|
|
205
|
+
|
|
206
|
+
```markdown
|
|
207
|
+
---
|
|
208
|
+
name: nba-analyst
|
|
209
|
+
description: Basketball statistics expert
|
|
210
|
+
tools: [search, calculator]
|
|
211
|
+
---
|
|
212
|
+
|
|
213
|
+
You are an NBA analyst. Always cite your sources.
|
|
214
|
+
When comparing players, use per-game averages.
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
```python
|
|
218
|
+
agent = Agent(model="qwen3:8b", tools=[search, calculator], skills=["./my-skills"])
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
Bundled skills load automatically. User skills extend them — duplicate names override.
|
|
222
|
+
|
|
223
|
+
## Memory (Markdown-Backed)
|
|
224
|
+
|
|
225
|
+
Every agent has built-in memory stored as human-readable `.md` files:
|
|
226
|
+
|
|
227
|
+
```
|
|
228
|
+
.freeagent/memory/
|
|
229
|
+
├── MEMORY.md # Index
|
|
230
|
+
├── user.md # auto_load: true → in system prompt
|
|
231
|
+
├── facts.md # Accumulated facts
|
|
232
|
+
└── 2026-04-05.md # Daily log
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
The agent gets a `memory` tool with actions: `read`, `write`, `append`, `search`, `list`. Only the index and `auto_load` files go into the system prompt — everything else is on demand.
|
|
236
|
+
|
|
237
|
+
## Multi-Provider Support
|
|
238
|
+
|
|
239
|
+
```python
|
|
240
|
+
from freeagent import Agent, VLLMProvider, OpenAICompatProvider
|
|
241
|
+
|
|
242
|
+
# vLLM
|
|
243
|
+
provider = VLLMProvider(model="qwen3-8b")
|
|
244
|
+
agent = Agent(model="qwen3-8b", provider=provider, tools=[my_tool])
|
|
245
|
+
|
|
246
|
+
# Any OpenAI-compatible server
|
|
247
|
+
provider = OpenAICompatProvider(model="llama3.1:8b", base_url="http://localhost:1234")
|
|
248
|
+
agent = Agent(model="llama3.1:8b", provider=provider, tools=[my_tool])
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
## Telemetry
|
|
252
|
+
|
|
253
|
+
Built-in, always on:
|
|
254
|
+
|
|
255
|
+
```python
|
|
256
|
+
agent.run("What's the weather?")
|
|
257
|
+
print(agent.metrics) # quick summary
|
|
258
|
+
print(agent.metrics.tool_stats()) # per-tool breakdown
|
|
259
|
+
agent.metrics.to_json("m.json") # export
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
Optional OpenTelemetry: `pip install freeagent-sdk[otel]`
|
|
263
|
+
|
|
264
|
+
## MCP Support
|
|
265
|
+
|
|
266
|
+
```python
|
|
267
|
+
from freeagent.mcp import connect
|
|
268
|
+
|
|
269
|
+
async with connect("npx -y @modelcontextprotocol/server-filesystem /tmp") as tools:
|
|
270
|
+
agent = Agent(model="qwen3:8b", tools=tools)
|
|
271
|
+
result = await agent.arun("List files in /tmp")
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
Install with: `pip install freeagent-sdk[mcp]`
|
|
275
|
+
|
|
276
|
+
## Real Performance
|
|
277
|
+
|
|
278
|
+
Tested against the raw Ollama API with the same eval suite (100+ cases, 4 models). Full data in `evaluation/`.
|
|
279
|
+
|
|
280
|
+
### Multi-Turn Conversations (6 conversations, 15 turns)
|
|
281
|
+
|
|
282
|
+
| Model | Raw Ollama | FreeAgent |
|
|
283
|
+
|-------|-----------|-----------|
|
|
284
|
+
| qwen3:8b | 93% | **87%** |
|
|
285
|
+
| qwen3:4b | 93% | **87%** |
|
|
286
|
+
| llama3.1:8b | 87% | **80%** |
|
|
287
|
+
| gemma4:e2b (2B) | N/A | **80%** |
|
|
288
|
+
|
|
289
|
+
### Tool Calling Accuracy (8 cases)
|
|
290
|
+
|
|
291
|
+
| Model | Raw Ollama | FreeAgent |
|
|
292
|
+
|-------|-----------|-----------|
|
|
293
|
+
| qwen3:8b | 75% | 75% |
|
|
294
|
+
| qwen3:4b | 100% | 88% |
|
|
295
|
+
| llama3.1:8b | 62% | **75% (+13%)** |
|
|
296
|
+
|
|
297
|
+
### Streaming Latency (median of 3 runs)
|
|
298
|
+
|
|
299
|
+
| Model | Chat TTFT | Chat Total | Tool TTFT | Tool Total |
|
|
300
|
+
|-------|----------|-----------|----------|-----------|
|
|
301
|
+
| qwen3:8b | 12.8s | 13.9s | 5.2s | 10.0s |
|
|
302
|
+
| qwen3:4b | 14.7s | 14.5s | 28.2s | 31.6s |
|
|
303
|
+
| llama3.1:8b | 1.5s | 1.4s | 1.8s | 2.1s |
|
|
304
|
+
| gemma4:e2b | 4.7s | 5.1s | 8.2s | 12.1s |
|
|
305
|
+
|
|
306
|
+
TTFT ≈ total for chat (generation is fast once started). Tool TTFT includes tool execution round-trip.
|
|
307
|
+
|
|
308
|
+
### Auto-Tune (v0.3.1)
|
|
309
|
+
|
|
310
|
+
| Model | auto_tune=True | All On | Manual Strip | Delta vs All On |
|
|
311
|
+
|-------|---------------|--------|-------------|----------------|
|
|
312
|
+
| qwen3:8b | 91% | 91% | — | +0% |
|
|
313
|
+
| qwen3:4b | 91% | 91% | — | +0% |
|
|
314
|
+
| llama3.1:8b | 100% | 100% | — | +0% |
|
|
315
|
+
| gemma4:e2b | **91%** | 55% | 73% | **+36%** |
|
|
316
|
+
|
|
317
|
+
Auto-tune detects gemma4:e2b as a small model and strips bundled skills + memory tool. This improves accuracy from 55% → 91%.
|
|
318
|
+
|
|
319
|
+
### Honest Caveats
|
|
320
|
+
|
|
321
|
+
- **Guardrails rarely fire**: 0/40 real rescues in adversarial testing. Modern models handle fuzzy names and type coercion natively.
|
|
322
|
+
- **Multi-turn gap to raw Ollama is noise**: 87% vs 93% — re-running failures produces passes. Non-deterministic.
|
|
323
|
+
- **Skills help qwen3:4b but hurt gemma4:e2b** — fixed by auto-tune, which strips them for small models.
|
|
324
|
+
- **Streaming TTFT ≈ total time** on small models: generation is fast, model thinking dominates latency.
|
|
325
|
+
|
|
326
|
+
Full analysis: `evaluation/THESIS_ANALYSIS.md`
|
|
327
|
+
|
|
328
|
+
## Tested Models
|
|
329
|
+
|
|
330
|
+
| Model | Size | Mode | Reliability |
|
|
331
|
+
|-------|------|------|-------------|
|
|
332
|
+
| Qwen3 8B | 8.2B | Native | Very Good |
|
|
333
|
+
| Qwen3 4B | 4.0B | Native | Good (best with skills) |
|
|
334
|
+
| Llama 3.1 8B | 8.0B | Native | Good |
|
|
335
|
+
| Gemma4 E2B | 5.1B | Native | Good (auto-tuned) |
|
|
336
|
+
|
|
337
|
+
## Requirements
|
|
338
|
+
|
|
339
|
+
- Python 3.10+
|
|
340
|
+
- Ollama running locally (`ollama serve`)
|
|
341
|
+
- A model pulled (`ollama pull qwen3:8b`)
|
|
342
|
+
|
|
343
|
+
## Documentation
|
|
344
|
+
|
|
345
|
+
- **[Tutorial](docs/TUTORIAL.md)** — 5-minute walkthrough from install to working agent
|
|
346
|
+
- **[Website](https://labeveryday.github.io/free-agent-sdk/)** — landing page and feature overview
|
|
347
|
+
- **[Examples](examples/)** — runnable scripts covering tools, memory, hooks, MCP
|
|
348
|
+
- **[Evaluation data](evaluation/)** — benchmark results and thesis analysis
|
|
349
|
+
- **[Changelog](CHANGELOG.md)** — release history
|
|
350
|
+
- **[Contributing](CONTRIBUTING.md)** — how to run tests, add skills, submit PRs
|
|
351
|
+
|
|
352
|
+
## License
|
|
353
|
+
|
|
354
|
+
MIT
|
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
# FreeAgent SDK
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/freeagent-sdk/)
|
|
4
|
+
[](https://pypi.org/project/freeagent-sdk/)
|
|
5
|
+
[](LICENSE)
|
|
6
|
+
[](https://github.com/labeveryday/free-agent-sdk/actions/workflows/tests.yml)
|
|
7
|
+
[](https://freeagentsdk.com)
|
|
8
|
+
[](https://pypi.org/project/freeagent-sdk/)
|
|
9
|
+
|
|
10
|
+
**A clean local agent SDK for Ollama, vLLM, and OpenAI-compatible servers.**
|
|
11
|
+
|
|
12
|
+
Streaming. Multi-turn out of the box. Markdown skills and memory. Built-in telemetry. Single dependency.
|
|
13
|
+
|
|
14
|
+
```
|
|
15
|
+
pip install freeagent-sdk
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
**Links:** [Documentation](https://freeagentsdk.com) · [Tutorial](docs/TUTORIAL.md) · [Changelog](CHANGELOG.md) · [Contributing](CONTRIBUTING.md) · [Examples](examples/) · [Evaluation data](evaluation/)
|
|
19
|
+
|
|
20
|
+
## Why FreeAgent
|
|
21
|
+
|
|
22
|
+
- **Local-first**: works with Ollama and vLLM — your data never leaves your machine
|
|
23
|
+
- **Streaming everywhere**: token-level streaming with semantic events
|
|
24
|
+
- **Multi-turn that just works**: conversation state managed automatically with pluggable strategies
|
|
25
|
+
- **Markdown is first-class**: skills and memory are human-readable `.md` files with frontmatter
|
|
26
|
+
- **Zero-config**: auto-detects model size and tunes defaults — works on 2B and 70B alike
|
|
27
|
+
- **Inspectable**: `agent.trace()` shows exactly what happened
|
|
28
|
+
- **Fast**: actually 2% faster than raw Ollama API (HTTP connection reuse)
|
|
29
|
+
- **Honest**: real benchmark data in this README, not marketing
|
|
30
|
+
|
|
31
|
+
## Quick Start
|
|
32
|
+
|
|
33
|
+
### CLI
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
# One-shot query with streaming
|
|
37
|
+
freeagent ask qwen3:8b "What's the capital of France?"
|
|
38
|
+
|
|
39
|
+
# Interactive chat
|
|
40
|
+
freeagent chat qwen3:8b
|
|
41
|
+
|
|
42
|
+
# List available models
|
|
43
|
+
freeagent models
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Python
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from freeagent import Agent
|
|
50
|
+
|
|
51
|
+
agent = Agent(model="qwen3:8b")
|
|
52
|
+
print(agent.run("What is Python?"))
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Streaming
|
|
56
|
+
|
|
57
|
+
Real token-by-token streaming, even for tool-using agents:
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from freeagent import Agent
|
|
61
|
+
from freeagent.events import TokenEvent, ToolCallEvent, ToolResultEvent
|
|
62
|
+
|
|
63
|
+
agent = Agent(model="qwen3:8b", tools=[weather])
|
|
64
|
+
|
|
65
|
+
for event in agent.run_stream("What's the weather in Tokyo?"):
|
|
66
|
+
if isinstance(event, TokenEvent):
|
|
67
|
+
print(event.text, end="", flush=True)
|
|
68
|
+
elif isinstance(event, ToolCallEvent):
|
|
69
|
+
print(f"\n[Calling {event.name}...]")
|
|
70
|
+
elif isinstance(event, ToolResultEvent):
|
|
71
|
+
print(f"[{event.name} -> {'ok' if event.success else 'fail'} ({event.duration_ms:.0f}ms)]")
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Async version: `async for event in agent.arun_stream("query"):`
|
|
75
|
+
|
|
76
|
+
Event types: `RunStartEvent`, `TokenEvent`, `ToolCallEvent`, `ToolResultEvent`, `ValidationErrorEvent`, `RetryEvent`, `IterationEvent`, `RunCompleteEvent`.
|
|
77
|
+
|
|
78
|
+
## Custom Tools
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
from freeagent import Agent, tool
|
|
82
|
+
|
|
83
|
+
@tool
|
|
84
|
+
def weather(city: str) -> dict:
|
|
85
|
+
"""Get current weather for a city."""
|
|
86
|
+
return {"city": city, "temp": 72, "condition": "sunny"}
|
|
87
|
+
|
|
88
|
+
agent = Agent(model="qwen3:8b", tools=[weather])
|
|
89
|
+
print(agent.run("What's the weather in Portland?"))
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## Multi-Turn Conversations
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
agent = Agent(model="qwen3:8b", tools=[weather])
|
|
96
|
+
agent.run("What's the weather in Tokyo?")
|
|
97
|
+
agent.run("Convert that to Celsius") # remembers Tokyo was 85°F
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Strategies
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
from freeagent import Agent, SlidingWindow, TokenWindow
|
|
104
|
+
|
|
105
|
+
# Default: SlidingWindow(max_turns=20)
|
|
106
|
+
agent = Agent(model="qwen3:8b")
|
|
107
|
+
|
|
108
|
+
# Token-based budget (better for small context models)
|
|
109
|
+
agent = Agent(model="qwen3:4b", conversation=TokenWindow(max_tokens=3000))
|
|
110
|
+
|
|
111
|
+
# Stateless mode (each run independent)
|
|
112
|
+
agent = Agent(model="qwen3:8b", conversation=None)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Session Persistence
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
agent = Agent(model="qwen3:8b", session="my-chat")
|
|
119
|
+
agent.run("Hello!")
|
|
120
|
+
# Later, in a new process:
|
|
121
|
+
agent = Agent(model="qwen3:8b", session="my-chat") # restores conversation
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Inspecting Runs
|
|
125
|
+
|
|
126
|
+
Every run is fully traced. See exactly what happened:
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
agent.run("What's 347 * 29?")
|
|
130
|
+
|
|
131
|
+
# One-line summary
|
|
132
|
+
print(agent.last_run.summary())
|
|
133
|
+
# Run 1: qwen3:8b (native) 2300ms, 2 iters, 1 tools
|
|
134
|
+
|
|
135
|
+
# Full timeline
|
|
136
|
+
print(agent.trace())
|
|
137
|
+
# + 0ms model_call_start iter=0
|
|
138
|
+
# + 800ms tool_call calc(expression='347*29')
|
|
139
|
+
# + 802ms tool_result calc -> ok (2ms)
|
|
140
|
+
# + 803ms model_call_start iter=1
|
|
141
|
+
|
|
142
|
+
# Markdown report
|
|
143
|
+
print(agent.last_run.to_markdown())
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## Model-Aware Defaults
|
|
147
|
+
|
|
148
|
+
FreeAgent auto-detects model capabilities from Ollama and tunes itself:
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
# Auto-tuned: detects 2B model, strips skills and memory tool
|
|
152
|
+
agent = Agent(model="gemma4:e2b")
|
|
153
|
+
|
|
154
|
+
# Auto-tuned: detects 8B model, keeps full defaults
|
|
155
|
+
agent = Agent(model="qwen3:8b")
|
|
156
|
+
|
|
157
|
+
# Override auto-tuning
|
|
158
|
+
agent = Agent(model="gemma4:e2b", bundled_skills=True, memory_tool=True)
|
|
159
|
+
|
|
160
|
+
# Disable auto-tuning entirely
|
|
161
|
+
agent = Agent(model="qwen3:8b", auto_tune=False)
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Access detected info: `agent.model_info.parameter_count`, `agent.model_info.context_length`, `agent.model_info.capabilities`.
|
|
165
|
+
|
|
166
|
+
## Skills (Markdown Prompt Extensions)
|
|
167
|
+
|
|
168
|
+
```markdown
|
|
169
|
+
---
|
|
170
|
+
name: nba-analyst
|
|
171
|
+
description: Basketball statistics expert
|
|
172
|
+
tools: [search, calculator]
|
|
173
|
+
---
|
|
174
|
+
|
|
175
|
+
You are an NBA analyst. Always cite your sources.
|
|
176
|
+
When comparing players, use per-game averages.
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
```python
|
|
180
|
+
agent = Agent(model="qwen3:8b", tools=[search, calculator], skills=["./my-skills"])
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
Bundled skills load automatically. User skills extend them — duplicate names override.
|
|
184
|
+
|
|
185
|
+
## Memory (Markdown-Backed)
|
|
186
|
+
|
|
187
|
+
Every agent has built-in memory stored as human-readable `.md` files:
|
|
188
|
+
|
|
189
|
+
```
|
|
190
|
+
.freeagent/memory/
|
|
191
|
+
├── MEMORY.md # Index
|
|
192
|
+
├── user.md # auto_load: true → in system prompt
|
|
193
|
+
├── facts.md # Accumulated facts
|
|
194
|
+
└── 2026-04-05.md # Daily log
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
The agent gets a `memory` tool with actions: `read`, `write`, `append`, `search`, `list`. Only the index and `auto_load` files go into the system prompt — everything else is on demand.
|
|
198
|
+
|
|
199
|
+
## Multi-Provider Support
|
|
200
|
+
|
|
201
|
+
```python
|
|
202
|
+
from freeagent import Agent, VLLMProvider, OpenAICompatProvider
|
|
203
|
+
|
|
204
|
+
# vLLM
|
|
205
|
+
provider = VLLMProvider(model="qwen3-8b")
|
|
206
|
+
agent = Agent(model="qwen3-8b", provider=provider, tools=[my_tool])
|
|
207
|
+
|
|
208
|
+
# Any OpenAI-compatible server
|
|
209
|
+
provider = OpenAICompatProvider(model="llama3.1:8b", base_url="http://localhost:1234")
|
|
210
|
+
agent = Agent(model="llama3.1:8b", provider=provider, tools=[my_tool])
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
## Telemetry
|
|
214
|
+
|
|
215
|
+
Built-in, always on:
|
|
216
|
+
|
|
217
|
+
```python
|
|
218
|
+
agent.run("What's the weather?")
|
|
219
|
+
print(agent.metrics) # quick summary
|
|
220
|
+
print(agent.metrics.tool_stats()) # per-tool breakdown
|
|
221
|
+
agent.metrics.to_json("m.json") # export
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
Optional OpenTelemetry: `pip install freeagent-sdk[otel]`
|
|
225
|
+
|
|
226
|
+
## MCP Support
|
|
227
|
+
|
|
228
|
+
```python
|
|
229
|
+
from freeagent.mcp import connect
|
|
230
|
+
|
|
231
|
+
async with connect("npx -y @modelcontextprotocol/server-filesystem /tmp") as tools:
|
|
232
|
+
agent = Agent(model="qwen3:8b", tools=tools)
|
|
233
|
+
result = await agent.arun("List files in /tmp")
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
Install with: `pip install freeagent-sdk[mcp]`
|
|
237
|
+
|
|
238
|
+
## Real Performance
|
|
239
|
+
|
|
240
|
+
Tested against the raw Ollama API with the same eval suite (100+ cases, 4 models). Full data in `evaluation/`.
|
|
241
|
+
|
|
242
|
+
### Multi-Turn Conversations (6 conversations, 15 turns)
|
|
243
|
+
|
|
244
|
+
| Model | Raw Ollama | FreeAgent |
|
|
245
|
+
|-------|-----------|-----------|
|
|
246
|
+
| qwen3:8b | 93% | **87%** |
|
|
247
|
+
| qwen3:4b | 93% | **87%** |
|
|
248
|
+
| llama3.1:8b | 87% | **80%** |
|
|
249
|
+
| gemma4:e2b (2B) | N/A | **80%** |
|
|
250
|
+
|
|
251
|
+
### Tool Calling Accuracy (8 cases)
|
|
252
|
+
|
|
253
|
+
| Model | Raw Ollama | FreeAgent |
|
|
254
|
+
|-------|-----------|-----------|
|
|
255
|
+
| qwen3:8b | 75% | 75% |
|
|
256
|
+
| qwen3:4b | 100% | 88% |
|
|
257
|
+
| llama3.1:8b | 62% | **75% (+13%)** |
|
|
258
|
+
|
|
259
|
+
### Streaming Latency (median of 3 runs)
|
|
260
|
+
|
|
261
|
+
| Model | Chat TTFT | Chat Total | Tool TTFT | Tool Total |
|
|
262
|
+
|-------|----------|-----------|----------|-----------|
|
|
263
|
+
| qwen3:8b | 12.8s | 13.9s | 5.2s | 10.0s |
|
|
264
|
+
| qwen3:4b | 14.7s | 14.5s | 28.2s | 31.6s |
|
|
265
|
+
| llama3.1:8b | 1.5s | 1.4s | 1.8s | 2.1s |
|
|
266
|
+
| gemma4:e2b | 4.7s | 5.1s | 8.2s | 12.1s |
|
|
267
|
+
|
|
268
|
+
TTFT ≈ total for chat (generation is fast once started). Tool TTFT includes tool execution round-trip.
|
|
269
|
+
|
|
270
|
+
### Auto-Tune (v0.3.1)
|
|
271
|
+
|
|
272
|
+
| Model | auto_tune=True | All On | Manual Strip | Delta vs All On |
|
|
273
|
+
|-------|---------------|--------|-------------|----------------|
|
|
274
|
+
| qwen3:8b | 91% | 91% | — | +0% |
|
|
275
|
+
| qwen3:4b | 91% | 91% | — | +0% |
|
|
276
|
+
| llama3.1:8b | 100% | 100% | — | +0% |
|
|
277
|
+
| gemma4:e2b | **91%** | 55% | 73% | **+36%** |
|
|
278
|
+
|
|
279
|
+
Auto-tune detects gemma4:e2b as a small model and strips bundled skills + memory tool. This improves accuracy from 55% → 91%.
|
|
280
|
+
|
|
281
|
+
### Honest Caveats
|
|
282
|
+
|
|
283
|
+
- **Guardrails rarely fire**: 0/40 real rescues in adversarial testing. Modern models handle fuzzy names and type coercion natively.
|
|
284
|
+
- **Multi-turn gap to raw Ollama is noise**: 87% vs 93% — re-running failures produces passes. Non-deterministic.
|
|
285
|
+
- **Skills help qwen3:4b but hurt gemma4:e2b** — fixed by auto-tune, which strips them for small models.
|
|
286
|
+
- **Streaming TTFT ≈ total time** on small models: generation is fast, model thinking dominates latency.
|
|
287
|
+
|
|
288
|
+
Full analysis: `evaluation/THESIS_ANALYSIS.md`
|
|
289
|
+
|
|
290
|
+
## Tested Models
|
|
291
|
+
|
|
292
|
+
| Model | Size | Mode | Reliability |
|
|
293
|
+
|-------|------|------|-------------|
|
|
294
|
+
| Qwen3 8B | 8.2B | Native | Very Good |
|
|
295
|
+
| Qwen3 4B | 4.0B | Native | Good (best with skills) |
|
|
296
|
+
| Llama 3.1 8B | 8.0B | Native | Good |
|
|
297
|
+
| Gemma4 E2B | 5.1B | Native | Good (auto-tuned) |
|
|
298
|
+
|
|
299
|
+
## Requirements
|
|
300
|
+
|
|
301
|
+
- Python 3.10+
|
|
302
|
+
- Ollama running locally (`ollama serve`)
|
|
303
|
+
- A model pulled (`ollama pull qwen3:8b`)
|
|
304
|
+
|
|
305
|
+
## Documentation
|
|
306
|
+
|
|
307
|
+
- **[Tutorial](docs/TUTORIAL.md)** — 5-minute walkthrough from install to working agent
|
|
308
|
+
- **[Website](https://labeveryday.github.io/free-agent-sdk/)** — landing page and feature overview
|
|
309
|
+
- **[Examples](examples/)** — runnable scripts covering tools, memory, hooks, MCP
|
|
310
|
+
- **[Evaluation data](evaluation/)** — benchmark results and thesis analysis
|
|
311
|
+
- **[Changelog](CHANGELOG.md)** — release history
|
|
312
|
+
- **[Contributing](CONTRIBUTING.md)** — how to run tests, add skills, submit PRs
|
|
313
|
+
|
|
314
|
+
## License
|
|
315
|
+
|
|
316
|
+
MIT
|