freeagent-sdk 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. freeagent_sdk-0.3.1/LICENSE +21 -0
  2. freeagent_sdk-0.3.1/PKG-INFO +354 -0
  3. freeagent_sdk-0.3.1/README.md +316 -0
  4. freeagent_sdk-0.3.1/freeagent/__init__.py +59 -0
  5. freeagent_sdk-0.3.1/freeagent/_sync.py +49 -0
  6. freeagent_sdk-0.3.1/freeagent/agent.py +719 -0
  7. freeagent_sdk-0.3.1/freeagent/circuit_breaker.py +75 -0
  8. freeagent_sdk-0.3.1/freeagent/cli.py +190 -0
  9. freeagent_sdk-0.3.1/freeagent/config.py +53 -0
  10. freeagent_sdk-0.3.1/freeagent/context.py +91 -0
  11. freeagent_sdk-0.3.1/freeagent/conversation.py +346 -0
  12. freeagent_sdk-0.3.1/freeagent/engines/__init__.py +265 -0
  13. freeagent_sdk-0.3.1/freeagent/events.py +81 -0
  14. freeagent_sdk-0.3.1/freeagent/hooks.py +195 -0
  15. freeagent_sdk-0.3.1/freeagent/mcp/__init__.py +54 -0
  16. freeagent_sdk-0.3.1/freeagent/mcp/adapter.py +107 -0
  17. freeagent_sdk-0.3.1/freeagent/mcp/client.py +90 -0
  18. freeagent_sdk-0.3.1/freeagent/memory.py +458 -0
  19. freeagent_sdk-0.3.1/freeagent/messages.py +51 -0
  20. freeagent_sdk-0.3.1/freeagent/model_info.py +107 -0
  21. freeagent_sdk-0.3.1/freeagent/providers/__init__.py +55 -0
  22. freeagent_sdk-0.3.1/freeagent/providers/ollama.py +148 -0
  23. freeagent_sdk-0.3.1/freeagent/providers/openai_compat.py +352 -0
  24. freeagent_sdk-0.3.1/freeagent/sanitize.py +93 -0
  25. freeagent_sdk-0.3.1/freeagent/skills.py +254 -0
  26. freeagent_sdk-0.3.1/freeagent/telemetry.py +583 -0
  27. freeagent_sdk-0.3.1/freeagent/tool.py +194 -0
  28. freeagent_sdk-0.3.1/freeagent/tools/__init__.py +10 -0
  29. freeagent_sdk-0.3.1/freeagent/tools/calculator.py +23 -0
  30. freeagent_sdk-0.3.1/freeagent/tools/shell.py +34 -0
  31. freeagent_sdk-0.3.1/freeagent/tools/system_info.py +33 -0
  32. freeagent_sdk-0.3.1/freeagent/validator.py +126 -0
  33. freeagent_sdk-0.3.1/freeagent_sdk.egg-info/PKG-INFO +354 -0
  34. freeagent_sdk-0.3.1/freeagent_sdk.egg-info/SOURCES.txt +58 -0
  35. freeagent_sdk-0.3.1/freeagent_sdk.egg-info/dependency_links.txt +1 -0
  36. freeagent_sdk-0.3.1/freeagent_sdk.egg-info/entry_points.txt +2 -0
  37. freeagent_sdk-0.3.1/freeagent_sdk.egg-info/requires.txt +13 -0
  38. freeagent_sdk-0.3.1/freeagent_sdk.egg-info/top_level.txt +1 -0
  39. freeagent_sdk-0.3.1/pyproject.toml +65 -0
  40. freeagent_sdk-0.3.1/setup.cfg +4 -0
  41. freeagent_sdk-0.3.1/tests/test_agent.py +280 -0
  42. freeagent_sdk-0.3.1/tests/test_caching.py +150 -0
  43. freeagent_sdk-0.3.1/tests/test_circuit_breaker.py +68 -0
  44. freeagent_sdk-0.3.1/tests/test_cli.py +72 -0
  45. freeagent_sdk-0.3.1/tests/test_context.py +103 -0
  46. freeagent_sdk-0.3.1/tests/test_conversation.py +364 -0
  47. freeagent_sdk-0.3.1/tests/test_engines.py +162 -0
  48. freeagent_sdk-0.3.1/tests/test_mcp.py +153 -0
  49. freeagent_sdk-0.3.1/tests/test_memory.py +184 -0
  50. freeagent_sdk-0.3.1/tests/test_messages.py +62 -0
  51. freeagent_sdk-0.3.1/tests/test_model_info.py +222 -0
  52. freeagent_sdk-0.3.1/tests/test_parallel.py +188 -0
  53. freeagent_sdk-0.3.1/tests/test_providers.py +120 -0
  54. freeagent_sdk-0.3.1/tests/test_sanitize.py +86 -0
  55. freeagent_sdk-0.3.1/tests/test_skills.py +124 -0
  56. freeagent_sdk-0.3.1/tests/test_streaming.py +340 -0
  57. freeagent_sdk-0.3.1/tests/test_telemetry.py +176 -0
  58. freeagent_sdk-0.3.1/tests/test_tool.py +150 -0
  59. freeagent_sdk-0.3.1/tests/test_trace.py +159 -0
  60. freeagent_sdk-0.3.1/tests/test_validator.py +119 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Du'An Lightfoot
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,354 @@
1
+ Metadata-Version: 2.4
2
+ Name: freeagent-sdk
3
+ Version: 0.3.1
4
+ Summary: Local-first AI agent framework. Built for models that aren't perfect.
5
+ Author: FreeAgent Contributors
6
+ License: MIT
7
+ Project-URL: Homepage, https://freeagentsdk.com
8
+ Project-URL: Documentation, https://freeagentsdk.com
9
+ Project-URL: Repository, https://github.com/labeveryday/free-agent-sdk
10
+ Project-URL: Issues, https://github.com/labeveryday/free-agent-sdk/issues
11
+ Project-URL: Changelog, https://github.com/labeveryday/free-agent-sdk/blob/main/CHANGELOG.md
12
+ Keywords: ai,agents,llm,ollama,vllm,local,tool-calling,mcp,streaming
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
+ Classifier: Typing :: Typed
24
+ Requires-Python: >=3.10
25
+ Description-Content-Type: text/markdown
26
+ License-File: LICENSE
27
+ Requires-Dist: httpx>=0.25.0
28
+ Provides-Extra: dev
29
+ Requires-Dist: pytest>=7.0; extra == "dev"
30
+ Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
31
+ Provides-Extra: mcp
32
+ Requires-Dist: mcp>=1.0.0; extra == "mcp"
33
+ Provides-Extra: otel
34
+ Requires-Dist: opentelemetry-api>=1.20.0; extra == "otel"
35
+ Requires-Dist: opentelemetry-sdk>=1.20.0; extra == "otel"
36
+ Requires-Dist: opentelemetry-exporter-otlp>=1.20.0; extra == "otel"
37
+ Dynamic: license-file
38
+
39
+ # FreeAgent SDK
40
+
41
+ [![PyPI version](https://img.shields.io/pypi/v/freeagent-sdk.svg?color=1c5d99&labelColor=222)](https://pypi.org/project/freeagent-sdk/)
42
+ [![Python versions](https://img.shields.io/pypi/pyversions/freeagent-sdk.svg?color=639fab&labelColor=222)](https://pypi.org/project/freeagent-sdk/)
43
+ [![License: MIT](https://img.shields.io/badge/License-MIT-bbcde5.svg?labelColor=222)](LICENSE)
44
+ [![Tests](https://github.com/labeveryday/free-agent-sdk/actions/workflows/tests.yml/badge.svg)](https://github.com/labeveryday/free-agent-sdk/actions/workflows/tests.yml)
45
+ [![Docs](https://img.shields.io/badge/docs-freeagentsdk.com-1c5d99?labelColor=222)](https://freeagentsdk.com)
46
+ [![Downloads](https://img.shields.io/pypi/dm/freeagent-sdk.svg?color=639fab&labelColor=222)](https://pypi.org/project/freeagent-sdk/)
47
+
48
+ **A clean local agent SDK for Ollama, vLLM, and OpenAI-compatible servers.**
49
+
50
+ Streaming. Multi-turn out of the box. Markdown skills and memory. Built-in telemetry. Single dependency.
51
+
52
+ ```
53
+ pip install freeagent-sdk
54
+ ```
55
+
56
+ **Links:** [Documentation](https://freeagentsdk.com) · [Tutorial](docs/TUTORIAL.md) · [Changelog](CHANGELOG.md) · [Contributing](CONTRIBUTING.md) · [Examples](examples/) · [Evaluation data](evaluation/)
57
+
58
+ ## Why FreeAgent
59
+
60
+ - **Local-first**: works with Ollama and vLLM — your data never leaves your machine
61
+ - **Streaming everywhere**: token-level streaming with semantic events
62
+ - **Multi-turn that just works**: conversation state managed automatically with pluggable strategies
63
+ - **Markdown is first-class**: skills and memory are human-readable `.md` files with frontmatter
64
+ - **Zero-config**: auto-detects model size and tunes defaults — works on 2B and 70B alike
65
+ - **Inspectable**: `agent.trace()` shows exactly what happened
66
+ - **Fast**: actually 2% faster than raw Ollama API (HTTP connection reuse)
67
+ - **Honest**: real benchmark data in this README, not marketing
68
+
69
+ ## Quick Start
70
+
71
+ ### CLI
72
+
73
+ ```bash
74
+ # One-shot query with streaming
75
+ freeagent ask qwen3:8b "What's the capital of France?"
76
+
77
+ # Interactive chat
78
+ freeagent chat qwen3:8b
79
+
80
+ # List available models
81
+ freeagent models
82
+ ```
83
+
84
+ ### Python
85
+
86
+ ```python
87
+ from freeagent import Agent
88
+
89
+ agent = Agent(model="qwen3:8b")
90
+ print(agent.run("What is Python?"))
91
+ ```
92
+
93
+ ## Streaming
94
+
95
+ Real token-by-token streaming, even for tool-using agents:
96
+
97
+ ```python
98
+ from freeagent import Agent
99
+ from freeagent.events import TokenEvent, ToolCallEvent, ToolResultEvent
100
+
101
+ agent = Agent(model="qwen3:8b", tools=[weather])
102
+
103
+ for event in agent.run_stream("What's the weather in Tokyo?"):
104
+ if isinstance(event, TokenEvent):
105
+ print(event.text, end="", flush=True)
106
+ elif isinstance(event, ToolCallEvent):
107
+ print(f"\n[Calling {event.name}...]")
108
+ elif isinstance(event, ToolResultEvent):
109
+ print(f"[{event.name} -> {'ok' if event.success else 'fail'} ({event.duration_ms:.0f}ms)]")
110
+ ```
111
+
112
+ Async version: `async for event in agent.arun_stream("query"):`
113
+
114
+ Event types: `RunStartEvent`, `TokenEvent`, `ToolCallEvent`, `ToolResultEvent`, `ValidationErrorEvent`, `RetryEvent`, `IterationEvent`, `RunCompleteEvent`.
115
+
116
+ ## Custom Tools
117
+
118
+ ```python
119
+ from freeagent import Agent, tool
120
+
121
+ @tool
122
+ def weather(city: str) -> dict:
123
+ """Get current weather for a city."""
124
+ return {"city": city, "temp": 72, "condition": "sunny"}
125
+
126
+ agent = Agent(model="qwen3:8b", tools=[weather])
127
+ print(agent.run("What's the weather in Portland?"))
128
+ ```
129
+
130
+ ## Multi-Turn Conversations
131
+
132
+ ```python
133
+ agent = Agent(model="qwen3:8b", tools=[weather])
134
+ agent.run("What's the weather in Tokyo?")
135
+ agent.run("Convert that to Celsius") # remembers Tokyo was 85°F
136
+ ```
137
+
138
+ ### Strategies
139
+
140
+ ```python
141
+ from freeagent import Agent, SlidingWindow, TokenWindow
142
+
143
+ # Default: SlidingWindow(max_turns=20)
144
+ agent = Agent(model="qwen3:8b")
145
+
146
+ # Token-based budget (better for small context models)
147
+ agent = Agent(model="qwen3:4b", conversation=TokenWindow(max_tokens=3000))
148
+
149
+ # Stateless mode (each run independent)
150
+ agent = Agent(model="qwen3:8b", conversation=None)
151
+ ```
152
+
153
+ ### Session Persistence
154
+
155
+ ```python
156
+ agent = Agent(model="qwen3:8b", session="my-chat")
157
+ agent.run("Hello!")
158
+ # Later, in a new process:
159
+ agent = Agent(model="qwen3:8b", session="my-chat") # restores conversation
160
+ ```
161
+
162
+ ## Inspecting Runs
163
+
164
+ Every run is fully traced. See exactly what happened:
165
+
166
+ ```python
167
+ agent.run("What's 347 * 29?")
168
+
169
+ # One-line summary
170
+ print(agent.last_run.summary())
171
+ # Run 1: qwen3:8b (native) 2300ms, 2 iters, 1 tools
172
+
173
+ # Full timeline
174
+ print(agent.trace())
175
+ # + 0ms model_call_start iter=0
176
+ # + 800ms tool_call calc(expression='347*29')
177
+ # + 802ms tool_result calc -> ok (2ms)
178
+ # + 803ms model_call_start iter=1
179
+
180
+ # Markdown report
181
+ print(agent.last_run.to_markdown())
182
+ ```
183
+
184
+ ## Model-Aware Defaults
185
+
186
+ FreeAgent auto-detects model capabilities from Ollama and tunes itself:
187
+
188
+ ```python
189
+ # Auto-tuned: detects 2B model, strips skills and memory tool
190
+ agent = Agent(model="gemma4:e2b")
191
+
192
+ # Auto-tuned: detects 8B model, keeps full defaults
193
+ agent = Agent(model="qwen3:8b")
194
+
195
+ # Override auto-tuning
196
+ agent = Agent(model="gemma4:e2b", bundled_skills=True, memory_tool=True)
197
+
198
+ # Disable auto-tuning entirely
199
+ agent = Agent(model="qwen3:8b", auto_tune=False)
200
+ ```
201
+
202
+ Access detected info: `agent.model_info.parameter_count`, `agent.model_info.context_length`, `agent.model_info.capabilities`.
203
+
204
+ ## Skills (Markdown Prompt Extensions)
205
+
206
+ ```markdown
207
+ ---
208
+ name: nba-analyst
209
+ description: Basketball statistics expert
210
+ tools: [search, calculator]
211
+ ---
212
+
213
+ You are an NBA analyst. Always cite your sources.
214
+ When comparing players, use per-game averages.
215
+ ```
216
+
217
+ ```python
218
+ agent = Agent(model="qwen3:8b", tools=[search, calculator], skills=["./my-skills"])
219
+ ```
220
+
221
+ Bundled skills load automatically. User skills extend them — duplicate names override.
222
+
223
+ ## Memory (Markdown-Backed)
224
+
225
+ Every agent has built-in memory stored as human-readable `.md` files:
226
+
227
+ ```
228
+ .freeagent/memory/
229
+ ├── MEMORY.md # Index
230
+ ├── user.md # auto_load: true → in system prompt
231
+ ├── facts.md # Accumulated facts
232
+ └── 2026-04-05.md # Daily log
233
+ ```
234
+
235
+ The agent gets a `memory` tool with actions: `read`, `write`, `append`, `search`, `list`. Only the index and `auto_load` files go into the system prompt — everything else is on demand.
236
+
237
+ ## Multi-Provider Support
238
+
239
+ ```python
240
+ from freeagent import Agent, VLLMProvider, OpenAICompatProvider
241
+
242
+ # vLLM
243
+ provider = VLLMProvider(model="qwen3-8b")
244
+ agent = Agent(model="qwen3-8b", provider=provider, tools=[my_tool])
245
+
246
+ # Any OpenAI-compatible server
247
+ provider = OpenAICompatProvider(model="llama3.1:8b", base_url="http://localhost:1234")
248
+ agent = Agent(model="llama3.1:8b", provider=provider, tools=[my_tool])
249
+ ```
250
+
251
+ ## Telemetry
252
+
253
+ Built-in, always on:
254
+
255
+ ```python
256
+ agent.run("What's the weather?")
257
+ print(agent.metrics) # quick summary
258
+ print(agent.metrics.tool_stats()) # per-tool breakdown
259
+ agent.metrics.to_json("m.json") # export
260
+ ```
261
+
262
+ Optional OpenTelemetry: `pip install freeagent-sdk[otel]`
263
+
264
+ ## MCP Support
265
+
266
+ ```python
267
+ from freeagent.mcp import connect
268
+
269
+ async with connect("npx -y @modelcontextprotocol/server-filesystem /tmp") as tools:
270
+ agent = Agent(model="qwen3:8b", tools=tools)
271
+ result = await agent.arun("List files in /tmp")
272
+ ```
273
+
274
+ Install with: `pip install freeagent-sdk[mcp]`
275
+
276
+ ## Real Performance
277
+
278
+ Tested against the raw Ollama API with the same eval suite (100+ cases, 4 models). Full data in `evaluation/`.
279
+
280
+ ### Multi-Turn Conversations (6 conversations, 15 turns)
281
+
282
+ | Model | Raw Ollama | FreeAgent |
283
+ |-------|-----------|-----------|
284
+ | qwen3:8b | 93% | **87%** |
285
+ | qwen3:4b | 93% | **87%** |
286
+ | llama3.1:8b | 87% | **80%** |
287
+ | gemma4:e2b (2B) | N/A | **80%** |
288
+
289
+ ### Tool Calling Accuracy (8 cases)
290
+
291
+ | Model | Raw Ollama | FreeAgent |
292
+ |-------|-----------|-----------|
293
+ | qwen3:8b | 75% | 75% |
294
+ | qwen3:4b | 100% | 88% |
295
+ | llama3.1:8b | 62% | **75% (+13%)** |
296
+
297
+ ### Streaming Latency (median of 3 runs)
298
+
299
+ | Model | Chat TTFT | Chat Total | Tool TTFT | Tool Total |
300
+ |-------|----------|-----------|----------|-----------|
301
+ | qwen3:8b | 12.8s | 13.9s | 5.2s | 10.0s |
302
+ | qwen3:4b | 14.7s | 14.5s | 28.2s | 31.6s |
303
+ | llama3.1:8b | 1.5s | 1.4s | 1.8s | 2.1s |
304
+ | gemma4:e2b | 4.7s | 5.1s | 8.2s | 12.1s |
305
+
306
+ TTFT ≈ total for chat (generation is fast once started). Tool TTFT includes tool execution round-trip.
307
+
308
+ ### Auto-Tune (v0.3.1)
309
+
310
+ | Model | auto_tune=True | All On | Manual Strip | Delta vs All On |
311
+ |-------|---------------|--------|-------------|----------------|
312
+ | qwen3:8b | 91% | 91% | — | +0% |
313
+ | qwen3:4b | 91% | 91% | — | +0% |
314
+ | llama3.1:8b | 100% | 100% | — | +0% |
315
+ | gemma4:e2b | **91%** | 55% | 73% | **+36%** |
316
+
317
+ Auto-tune detects gemma4:e2b as a small model and strips bundled skills + memory tool. This improves accuracy from 55% → 91%.
318
+
319
+ ### Honest Caveats
320
+
321
+ - **Guardrails rarely fire**: 0/40 real rescues in adversarial testing. Modern models handle fuzzy names and type coercion natively.
322
+ - **Multi-turn gap to raw Ollama is noise**: 87% vs 93% — re-running failures produces passes. Non-deterministic.
323
+ - **Skills help qwen3:4b but hurt gemma4:e2b** — fixed by auto-tune, which strips them for small models.
324
+ - **Streaming TTFT ≈ total time** on small models: generation is fast, model thinking dominates latency.
325
+
326
+ Full analysis: `evaluation/THESIS_ANALYSIS.md`
327
+
328
+ ## Tested Models
329
+
330
+ | Model | Size | Mode | Reliability |
331
+ |-------|------|------|-------------|
332
+ | Qwen3 8B | 8.2B | Native | Very Good |
333
+ | Qwen3 4B | 4.0B | Native | Good (best with skills) |
334
+ | Llama 3.1 8B | 8.0B | Native | Good |
335
+ | Gemma4 E2B | 5.1B | Native | Good (auto-tuned) |
336
+
337
+ ## Requirements
338
+
339
+ - Python 3.10+
340
+ - Ollama running locally (`ollama serve`)
341
+ - A model pulled (`ollama pull qwen3:8b`)
342
+
343
+ ## Documentation
344
+
345
+ - **[Tutorial](docs/TUTORIAL.md)** — 5-minute walkthrough from install to working agent
346
+ - **[Website](https://labeveryday.github.io/free-agent-sdk/)** — landing page and feature overview
347
+ - **[Examples](examples/)** — runnable scripts covering tools, memory, hooks, MCP
348
+ - **[Evaluation data](evaluation/)** — benchmark results and thesis analysis
349
+ - **[Changelog](CHANGELOG.md)** — release history
350
+ - **[Contributing](CONTRIBUTING.md)** — how to run tests, add skills, submit PRs
351
+
352
+ ## License
353
+
354
+ MIT
@@ -0,0 +1,316 @@
1
+ # FreeAgent SDK
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/freeagent-sdk.svg?color=1c5d99&labelColor=222)](https://pypi.org/project/freeagent-sdk/)
4
+ [![Python versions](https://img.shields.io/pypi/pyversions/freeagent-sdk.svg?color=639fab&labelColor=222)](https://pypi.org/project/freeagent-sdk/)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-bbcde5.svg?labelColor=222)](LICENSE)
6
+ [![Tests](https://github.com/labeveryday/free-agent-sdk/actions/workflows/tests.yml/badge.svg)](https://github.com/labeveryday/free-agent-sdk/actions/workflows/tests.yml)
7
+ [![Docs](https://img.shields.io/badge/docs-freeagentsdk.com-1c5d99?labelColor=222)](https://freeagentsdk.com)
8
+ [![Downloads](https://img.shields.io/pypi/dm/freeagent-sdk.svg?color=639fab&labelColor=222)](https://pypi.org/project/freeagent-sdk/)
9
+
10
+ **A clean local agent SDK for Ollama, vLLM, and OpenAI-compatible servers.**
11
+
12
+ Streaming. Multi-turn out of the box. Markdown skills and memory. Built-in telemetry. Single dependency.
13
+
14
+ ```
15
+ pip install freeagent-sdk
16
+ ```
17
+
18
+ **Links:** [Documentation](https://freeagentsdk.com) · [Tutorial](docs/TUTORIAL.md) · [Changelog](CHANGELOG.md) · [Contributing](CONTRIBUTING.md) · [Examples](examples/) · [Evaluation data](evaluation/)
19
+
20
+ ## Why FreeAgent
21
+
22
+ - **Local-first**: works with Ollama and vLLM — your data never leaves your machine
23
+ - **Streaming everywhere**: token-level streaming with semantic events
24
+ - **Multi-turn that just works**: conversation state managed automatically with pluggable strategies
25
+ - **Markdown is first-class**: skills and memory are human-readable `.md` files with frontmatter
26
+ - **Zero-config**: auto-detects model size and tunes defaults — works on 2B and 70B alike
27
+ - **Inspectable**: `agent.trace()` shows exactly what happened
28
+ - **Fast**: actually 2% faster than raw Ollama API (HTTP connection reuse)
29
+ - **Honest**: real benchmark data in this README, not marketing
30
+
31
+ ## Quick Start
32
+
33
+ ### CLI
34
+
35
+ ```bash
36
+ # One-shot query with streaming
37
+ freeagent ask qwen3:8b "What's the capital of France?"
38
+
39
+ # Interactive chat
40
+ freeagent chat qwen3:8b
41
+
42
+ # List available models
43
+ freeagent models
44
+ ```
45
+
46
+ ### Python
47
+
48
+ ```python
49
+ from freeagent import Agent
50
+
51
+ agent = Agent(model="qwen3:8b")
52
+ print(agent.run("What is Python?"))
53
+ ```
54
+
55
+ ## Streaming
56
+
57
+ Real token-by-token streaming, even for tool-using agents:
58
+
59
+ ```python
60
+ from freeagent import Agent
61
+ from freeagent.events import TokenEvent, ToolCallEvent, ToolResultEvent
62
+
63
+ agent = Agent(model="qwen3:8b", tools=[weather])
64
+
65
+ for event in agent.run_stream("What's the weather in Tokyo?"):
66
+ if isinstance(event, TokenEvent):
67
+ print(event.text, end="", flush=True)
68
+ elif isinstance(event, ToolCallEvent):
69
+ print(f"\n[Calling {event.name}...]")
70
+ elif isinstance(event, ToolResultEvent):
71
+ print(f"[{event.name} -> {'ok' if event.success else 'fail'} ({event.duration_ms:.0f}ms)]")
72
+ ```
73
+
74
+ Async version: `async for event in agent.arun_stream("query"):`
75
+
76
+ Event types: `RunStartEvent`, `TokenEvent`, `ToolCallEvent`, `ToolResultEvent`, `ValidationErrorEvent`, `RetryEvent`, `IterationEvent`, `RunCompleteEvent`.
77
+
78
+ ## Custom Tools
79
+
80
+ ```python
81
+ from freeagent import Agent, tool
82
+
83
+ @tool
84
+ def weather(city: str) -> dict:
85
+ """Get current weather for a city."""
86
+ return {"city": city, "temp": 72, "condition": "sunny"}
87
+
88
+ agent = Agent(model="qwen3:8b", tools=[weather])
89
+ print(agent.run("What's the weather in Portland?"))
90
+ ```
91
+
92
+ ## Multi-Turn Conversations
93
+
94
+ ```python
95
+ agent = Agent(model="qwen3:8b", tools=[weather])
96
+ agent.run("What's the weather in Tokyo?")
97
+ agent.run("Convert that to Celsius") # remembers Tokyo was 85°F
98
+ ```
99
+
100
+ ### Strategies
101
+
102
+ ```python
103
+ from freeagent import Agent, SlidingWindow, TokenWindow
104
+
105
+ # Default: SlidingWindow(max_turns=20)
106
+ agent = Agent(model="qwen3:8b")
107
+
108
+ # Token-based budget (better for small context models)
109
+ agent = Agent(model="qwen3:4b", conversation=TokenWindow(max_tokens=3000))
110
+
111
+ # Stateless mode (each run independent)
112
+ agent = Agent(model="qwen3:8b", conversation=None)
113
+ ```
114
+
115
+ ### Session Persistence
116
+
117
+ ```python
118
+ agent = Agent(model="qwen3:8b", session="my-chat")
119
+ agent.run("Hello!")
120
+ # Later, in a new process:
121
+ agent = Agent(model="qwen3:8b", session="my-chat") # restores conversation
122
+ ```
123
+
124
+ ## Inspecting Runs
125
+
126
+ Every run is fully traced. See exactly what happened:
127
+
128
+ ```python
129
+ agent.run("What's 347 * 29?")
130
+
131
+ # One-line summary
132
+ print(agent.last_run.summary())
133
+ # Run 1: qwen3:8b (native) 2300ms, 2 iters, 1 tools
134
+
135
+ # Full timeline
136
+ print(agent.trace())
137
+ # + 0ms model_call_start iter=0
138
+ # + 800ms tool_call calc(expression='347*29')
139
+ # + 802ms tool_result calc -> ok (2ms)
140
+ # + 803ms model_call_start iter=1
141
+
142
+ # Markdown report
143
+ print(agent.last_run.to_markdown())
144
+ ```
145
+
146
+ ## Model-Aware Defaults
147
+
148
+ FreeAgent auto-detects model capabilities from Ollama and tunes itself:
149
+
150
+ ```python
151
+ # Auto-tuned: detects 2B model, strips skills and memory tool
152
+ agent = Agent(model="gemma4:e2b")
153
+
154
+ # Auto-tuned: detects 8B model, keeps full defaults
155
+ agent = Agent(model="qwen3:8b")
156
+
157
+ # Override auto-tuning
158
+ agent = Agent(model="gemma4:e2b", bundled_skills=True, memory_tool=True)
159
+
160
+ # Disable auto-tuning entirely
161
+ agent = Agent(model="qwen3:8b", auto_tune=False)
162
+ ```
163
+
164
+ Access detected info: `agent.model_info.parameter_count`, `agent.model_info.context_length`, `agent.model_info.capabilities`.
165
+
166
+ ## Skills (Markdown Prompt Extensions)
167
+
168
+ ```markdown
169
+ ---
170
+ name: nba-analyst
171
+ description: Basketball statistics expert
172
+ tools: [search, calculator]
173
+ ---
174
+
175
+ You are an NBA analyst. Always cite your sources.
176
+ When comparing players, use per-game averages.
177
+ ```
178
+
179
+ ```python
180
+ agent = Agent(model="qwen3:8b", tools=[search, calculator], skills=["./my-skills"])
181
+ ```
182
+
183
+ Bundled skills load automatically. User skills extend them — duplicate names override.
184
+
185
+ ## Memory (Markdown-Backed)
186
+
187
+ Every agent has built-in memory stored as human-readable `.md` files:
188
+
189
+ ```
190
+ .freeagent/memory/
191
+ ├── MEMORY.md # Index
192
+ ├── user.md # auto_load: true → in system prompt
193
+ ├── facts.md # Accumulated facts
194
+ └── 2026-04-05.md # Daily log
195
+ ```
196
+
197
+ The agent gets a `memory` tool with actions: `read`, `write`, `append`, `search`, `list`. Only the index and `auto_load` files go into the system prompt — everything else is on demand.
198
+
199
+ ## Multi-Provider Support
200
+
201
+ ```python
202
+ from freeagent import Agent, VLLMProvider, OpenAICompatProvider
203
+
204
+ # vLLM
205
+ provider = VLLMProvider(model="qwen3-8b")
206
+ agent = Agent(model="qwen3-8b", provider=provider, tools=[my_tool])
207
+
208
+ # Any OpenAI-compatible server
209
+ provider = OpenAICompatProvider(model="llama3.1:8b", base_url="http://localhost:1234")
210
+ agent = Agent(model="llama3.1:8b", provider=provider, tools=[my_tool])
211
+ ```
212
+
213
+ ## Telemetry
214
+
215
+ Built-in, always on:
216
+
217
+ ```python
218
+ agent.run("What's the weather?")
219
+ print(agent.metrics) # quick summary
220
+ print(agent.metrics.tool_stats()) # per-tool breakdown
221
+ agent.metrics.to_json("m.json") # export
222
+ ```
223
+
224
+ Optional OpenTelemetry: `pip install freeagent-sdk[otel]`
225
+
226
+ ## MCP Support
227
+
228
+ ```python
229
+ from freeagent.mcp import connect
230
+
231
+ async with connect("npx -y @modelcontextprotocol/server-filesystem /tmp") as tools:
232
+ agent = Agent(model="qwen3:8b", tools=tools)
233
+ result = await agent.arun("List files in /tmp")
234
+ ```
235
+
236
+ Install with: `pip install freeagent-sdk[mcp]`
237
+
238
+ ## Real Performance
239
+
240
+ Tested against the raw Ollama API with the same eval suite (100+ cases, 4 models). Full data in `evaluation/`.
241
+
242
+ ### Multi-Turn Conversations (6 conversations, 15 turns)
243
+
244
+ | Model | Raw Ollama | FreeAgent |
245
+ |-------|-----------|-----------|
246
+ | qwen3:8b | 93% | **87%** |
247
+ | qwen3:4b | 93% | **87%** |
248
+ | llama3.1:8b | 87% | **80%** |
249
+ | gemma4:e2b (2B) | N/A | **80%** |
250
+
251
+ ### Tool Calling Accuracy (8 cases)
252
+
253
+ | Model | Raw Ollama | FreeAgent |
254
+ |-------|-----------|-----------|
255
+ | qwen3:8b | 75% | 75% |
256
+ | qwen3:4b | 100% | 88% |
257
+ | llama3.1:8b | 62% | **75% (+13%)** |
258
+
259
+ ### Streaming Latency (median of 3 runs)
260
+
261
+ | Model | Chat TTFT | Chat Total | Tool TTFT | Tool Total |
262
+ |-------|----------|-----------|----------|-----------|
263
+ | qwen3:8b | 12.8s | 13.9s | 5.2s | 10.0s |
264
+ | qwen3:4b | 14.7s | 14.5s | 28.2s | 31.6s |
265
+ | llama3.1:8b | 1.5s | 1.4s | 1.8s | 2.1s |
266
+ | gemma4:e2b | 4.7s | 5.1s | 8.2s | 12.1s |
267
+
268
+ TTFT ≈ total for chat (generation is fast once started). Tool TTFT includes tool execution round-trip.
269
+
270
+ ### Auto-Tune (v0.3.1)
271
+
272
+ | Model | auto_tune=True | All On | Manual Strip | Delta vs All On |
273
+ |-------|---------------|--------|-------------|----------------|
274
+ | qwen3:8b | 91% | 91% | — | +0% |
275
+ | qwen3:4b | 91% | 91% | — | +0% |
276
+ | llama3.1:8b | 100% | 100% | — | +0% |
277
+ | gemma4:e2b | **91%** | 55% | 73% | **+36%** |
278
+
279
+ Auto-tune detects gemma4:e2b as a small model and strips bundled skills + memory tool. This improves accuracy from 55% → 91%.
280
+
281
+ ### Honest Caveats
282
+
283
+ - **Guardrails rarely fire**: 0/40 real rescues in adversarial testing. Modern models handle fuzzy names and type coercion natively.
284
+ - **Multi-turn gap to raw Ollama is noise**: 87% vs 93% — re-running failures produces passes. Non-deterministic.
285
+ - **Skills help qwen3:4b but hurt gemma4:e2b** — fixed by auto-tune, which strips them for small models.
286
+ - **Streaming TTFT ≈ total time** on small models: generation is fast, model thinking dominates latency.
287
+
288
+ Full analysis: `evaluation/THESIS_ANALYSIS.md`
289
+
290
+ ## Tested Models
291
+
292
+ | Model | Size | Mode | Reliability |
293
+ |-------|------|------|-------------|
294
+ | Qwen3 8B | 8.2B | Native | Very Good |
295
+ | Qwen3 4B | 4.0B | Native | Good (best with skills) |
296
+ | Llama 3.1 8B | 8.0B | Native | Good |
297
+ | Gemma4 E2B | 5.1B | Native | Good (auto-tuned) |
298
+
299
+ ## Requirements
300
+
301
+ - Python 3.10+
302
+ - Ollama running locally (`ollama serve`)
303
+ - A model pulled (`ollama pull qwen3:8b`)
304
+
305
+ ## Documentation
306
+
307
+ - **[Tutorial](docs/TUTORIAL.md)** — 5-minute walkthrough from install to working agent
308
+ - **[Website](https://labeveryday.github.io/free-agent-sdk/)** — landing page and feature overview
309
+ - **[Examples](examples/)** — runnable scripts covering tools, memory, hooks, MCP
310
+ - **[Evaluation data](evaluation/)** — benchmark results and thesis analysis
311
+ - **[Changelog](CHANGELOG.md)** — release history
312
+ - **[Contributing](CONTRIBUTING.md)** — how to run tests, add skills, submit PRs
313
+
314
+ ## License
315
+
316
+ MIT