agentforge-ml 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentforge_ml-0.1.0/.github/workflows/ci.yml +55 -0
- agentforge_ml-0.1.0/.github/workflows/docs.yml +46 -0
- agentforge_ml-0.1.0/.gitignore +57 -0
- agentforge_ml-0.1.0/CHANGELOG.md +20 -0
- agentforge_ml-0.1.0/LICENSE +21 -0
- agentforge_ml-0.1.0/PKG-INFO +242 -0
- agentforge_ml-0.1.0/README.md +181 -0
- agentforge_ml-0.1.0/benchmarks/results/.gitkeep +0 -0
- agentforge_ml-0.1.0/docs/CONTRIBUTING.md +37 -0
- agentforge_ml-0.1.0/docs/evaluation.md +67 -0
- agentforge_ml-0.1.0/docs/index.md +58 -0
- agentforge_ml-0.1.0/docs/memory.md +46 -0
- agentforge_ml-0.1.0/docs/react.md +65 -0
- agentforge_ml-0.1.0/docs/tools.md +92 -0
- agentforge_ml-0.1.0/examples/basic_agent.py +43 -0
- agentforge_ml-0.1.0/examples/eval_agent.py +66 -0
- agentforge_ml-0.1.0/examples/eval_set.jsonl +5 -0
- agentforge_ml-0.1.0/examples/rag_agent.py +56 -0
- agentforge_ml-0.1.0/mkdocs.yml +46 -0
- agentforge_ml-0.1.0/pyproject.toml +95 -0
- agentforge_ml-0.1.0/src/agentforge/__init__.py +12 -0
- agentforge_ml-0.1.0/src/agentforge/cli.py +175 -0
- agentforge_ml-0.1.0/src/agentforge/core/__init__.py +15 -0
- agentforge_ml-0.1.0/src/agentforge/core/agent.py +186 -0
- agentforge_ml-0.1.0/src/agentforge/core/parser.py +85 -0
- agentforge_ml-0.1.0/src/agentforge/core/prompts.py +59 -0
- agentforge_ml-0.1.0/src/agentforge/eval/__init__.py +19 -0
- agentforge_ml-0.1.0/src/agentforge/eval/metrics.py +105 -0
- agentforge_ml-0.1.0/src/agentforge/eval/report.py +55 -0
- agentforge_ml-0.1.0/src/agentforge/llm/__init__.py +7 -0
- agentforge_ml-0.1.0/src/agentforge/llm/base.py +16 -0
- agentforge_ml-0.1.0/src/agentforge/llm/hf.py +83 -0
- agentforge_ml-0.1.0/src/agentforge/llm/quantized.py +39 -0
- agentforge_ml-0.1.0/src/agentforge/memory/__init__.py +7 -0
- agentforge_ml-0.1.0/src/agentforge/memory/base.py +23 -0
- agentforge_ml-0.1.0/src/agentforge/memory/conversation.py +30 -0
- agentforge_ml-0.1.0/src/agentforge/memory/persistent.py +80 -0
- agentforge_ml-0.1.0/src/agentforge/serve/__init__.py +5 -0
- agentforge_ml-0.1.0/src/agentforge/serve/app.py +83 -0
- agentforge_ml-0.1.0/src/agentforge/tools/__init__.py +18 -0
- agentforge_ml-0.1.0/src/agentforge/tools/base.py +55 -0
- agentforge_ml-0.1.0/src/agentforge/tools/calculator.py +115 -0
- agentforge_ml-0.1.0/src/agentforge/tools/python_repl.py +143 -0
- agentforge_ml-0.1.0/src/agentforge/tools/rag.py +54 -0
- agentforge_ml-0.1.0/src/agentforge/tools/sql.py +64 -0
- agentforge_ml-0.1.0/src/agentforge/tools/web_search.py +48 -0
- agentforge_ml-0.1.0/src/agentforge/utils.py +35 -0
- agentforge_ml-0.1.0/tests/__init__.py +0 -0
- agentforge_ml-0.1.0/tests/conftest.py +37 -0
- agentforge_ml-0.1.0/tests/test_agent.py +59 -0
- agentforge_ml-0.1.0/tests/test_cli.py +22 -0
- agentforge_ml-0.1.0/tests/test_eval.py +89 -0
- agentforge_ml-0.1.0/tests/test_memory.py +55 -0
- agentforge_ml-0.1.0/tests/test_parser.py +46 -0
- agentforge_ml-0.1.0/tests/test_serve.py +47 -0
- agentforge_ml-0.1.0/tests/test_tools.py +101 -0
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
lint:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
- uses: actions/setup-python@v5
|
|
15
|
+
with:
|
|
16
|
+
python-version: "3.11"
|
|
17
|
+
cache: pip
|
|
18
|
+
- run: |
|
|
19
|
+
python -m pip install --upgrade pip
|
|
20
|
+
pip install ruff
|
|
21
|
+
- run: ruff check src tests
|
|
22
|
+
- run: ruff format --check src tests
|
|
23
|
+
|
|
24
|
+
test:
|
|
25
|
+
runs-on: ubuntu-latest
|
|
26
|
+
strategy:
|
|
27
|
+
fail-fast: false
|
|
28
|
+
matrix:
|
|
29
|
+
python-version: ["3.10", "3.11", "3.12"]
|
|
30
|
+
steps:
|
|
31
|
+
- uses: actions/checkout@v4
|
|
32
|
+
- uses: actions/setup-python@v5
|
|
33
|
+
with:
|
|
34
|
+
python-version: ${{ matrix.python-version }}
|
|
35
|
+
cache: pip
|
|
36
|
+
- run: |
|
|
37
|
+
python -m pip install --upgrade pip
|
|
38
|
+
pip install -e ".[dev,serve,tools,eval]"
|
|
39
|
+
- run: pytest -m "not slow and not gpu and not network" --cov=agentforge --cov-report=xml
|
|
40
|
+
|
|
41
|
+
build:
|
|
42
|
+
runs-on: ubuntu-latest
|
|
43
|
+
needs: [lint, test]
|
|
44
|
+
steps:
|
|
45
|
+
- uses: actions/checkout@v4
|
|
46
|
+
- uses: actions/setup-python@v5
|
|
47
|
+
with:
|
|
48
|
+
python-version: "3.11"
|
|
49
|
+
- run: |
|
|
50
|
+
python -m pip install --upgrade pip build
|
|
51
|
+
python -m build
|
|
52
|
+
- uses: actions/upload-artifact@v4
|
|
53
|
+
with:
|
|
54
|
+
name: dist
|
|
55
|
+
path: dist/
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
name: docs
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
paths:
|
|
7
|
+
- docs/**
|
|
8
|
+
- mkdocs.yml
|
|
9
|
+
- .github/workflows/docs.yml
|
|
10
|
+
workflow_dispatch:
|
|
11
|
+
|
|
12
|
+
permissions:
|
|
13
|
+
contents: read
|
|
14
|
+
pages: write
|
|
15
|
+
id-token: write
|
|
16
|
+
|
|
17
|
+
concurrency:
|
|
18
|
+
group: pages
|
|
19
|
+
cancel-in-progress: false
|
|
20
|
+
|
|
21
|
+
jobs:
|
|
22
|
+
build:
|
|
23
|
+
runs-on: ubuntu-latest
|
|
24
|
+
steps:
|
|
25
|
+
- uses: actions/checkout@v4
|
|
26
|
+
- uses: actions/setup-python@v5
|
|
27
|
+
with:
|
|
28
|
+
python-version: "3.11"
|
|
29
|
+
cache: pip
|
|
30
|
+
- run: |
|
|
31
|
+
python -m pip install --upgrade pip
|
|
32
|
+
pip install mkdocs-material pymdown-extensions
|
|
33
|
+
- run: mkdocs build --strict
|
|
34
|
+
- uses: actions/upload-pages-artifact@v3
|
|
35
|
+
with:
|
|
36
|
+
path: site
|
|
37
|
+
|
|
38
|
+
deploy:
|
|
39
|
+
needs: build
|
|
40
|
+
runs-on: ubuntu-latest
|
|
41
|
+
environment:
|
|
42
|
+
name: github-pages
|
|
43
|
+
url: ${{ steps.deployment.outputs.page_url }}
|
|
44
|
+
steps:
|
|
45
|
+
- id: deployment
|
|
46
|
+
uses: actions/deploy-pages@v4
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
__pycache__/
|
|
2
|
+
*.py[cod]
|
|
3
|
+
*$py.class
|
|
4
|
+
*.so
|
|
5
|
+
|
|
6
|
+
.Python
|
|
7
|
+
build/
|
|
8
|
+
dist/
|
|
9
|
+
*.egg-info/
|
|
10
|
+
*.egg
|
|
11
|
+
|
|
12
|
+
.venv/
|
|
13
|
+
venv/
|
|
14
|
+
env/
|
|
15
|
+
|
|
16
|
+
.vscode/
|
|
17
|
+
.idea/
|
|
18
|
+
*.swp
|
|
19
|
+
.DS_Store
|
|
20
|
+
|
|
21
|
+
.pytest_cache/
|
|
22
|
+
.mypy_cache/
|
|
23
|
+
.ruff_cache/
|
|
24
|
+
.coverage
|
|
25
|
+
htmlcov/
|
|
26
|
+
|
|
27
|
+
.ipynb_checkpoints/
|
|
28
|
+
|
|
29
|
+
# Caches
|
|
30
|
+
.cache/
|
|
31
|
+
hf_cache/
|
|
32
|
+
qdrant_storage/
|
|
33
|
+
.agentforge/
|
|
34
|
+
|
|
35
|
+
# Eval results
|
|
36
|
+
benchmarks/results/*.json
|
|
37
|
+
benchmarks/results/*.csv
|
|
38
|
+
!benchmarks/results/.gitkeep
|
|
39
|
+
|
|
40
|
+
# MkDocs
|
|
41
|
+
site/
|
|
42
|
+
|
|
43
|
+
*.log
|
|
44
|
+
logs/
|
|
45
|
+
|
|
46
|
+
.env
|
|
47
|
+
.env.local
|
|
48
|
+
|
|
49
|
+
*.safetensors
|
|
50
|
+
*.bin
|
|
51
|
+
*.gguf
|
|
52
|
+
*.pt
|
|
53
|
+
|
|
54
|
+
# DBs
|
|
55
|
+
*.db
|
|
56
|
+
*.sqlite
|
|
57
|
+
*.sqlite3
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## [Unreleased]
|
|
4
|
+
|
|
5
|
+
## [0.1.0] — 2026-06-19 — first release
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
- `Agent` ReAct loop with structured Thought / Action / Observation parser.
|
|
9
|
+
- `ToolRegistry` + 5 built-in tools: `calculator` (AST whitelist),
|
|
10
|
+
`python_repl` (sandboxed exec), `web_search` (DuckDuckGo), `sql` (read-only
|
|
11
|
+
sqlite), `rag` (wraps a `ragforge` Pipeline).
|
|
12
|
+
- LLM backends: `HFLLM` and `QuantizedHFLLM` (via `turboquant-ml` for NF4/GPTQ/AWQ).
|
|
13
|
+
- Memory backends: `ConversationMemory` (in-process FIFO) and
|
|
14
|
+
`PersistentMemory` (SQLite, session-scoped).
|
|
15
|
+
- Eval harness: `task_completion`, `final_answer_match`, `tool_accuracy`,
|
|
16
|
+
`step_efficiency` with a CLI orchestrator.
|
|
17
|
+
- FastAPI server with `/health`, `/tools`, `/ask`.
|
|
18
|
+
- Typer CLI: `af ask / eval / serve / tools`.
|
|
19
|
+
- pytest suite (offline, scripted LLM fixture).
|
|
20
|
+
- CI on Python 3.10-3.12, MkDocs Material docs site.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 AgentForge Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agentforge-ml
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: AgentForge — ReAct agents on open-weight LLMs with tools (RAG, REPL, web, SQL, calculator) and an eval harness. Pairs with ragforge-ml and turboquant-ml.
|
|
5
|
+
Project-URL: Homepage, https://github.com/Ademo93/agentforge
|
|
6
|
+
Project-URL: Repository, https://github.com/Ademo93/agentforge
|
|
7
|
+
Project-URL: Issues, https://github.com/Ademo93/agentforge/issues
|
|
8
|
+
Project-URL: Documentation, https://Ademo93.github.io/agentforge/
|
|
9
|
+
Author: AgentForge Contributors
|
|
10
|
+
License: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: agents,evaluation,function-calling,llm,open-source-llm,rag,react,tool-use
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Requires-Dist: numpy>=1.24
|
|
24
|
+
Requires-Dist: pydantic>=2.7
|
|
25
|
+
Requires-Dist: pyyaml>=6.0
|
|
26
|
+
Requires-Dist: rich>=13.7
|
|
27
|
+
Requires-Dist: torch>=2.2
|
|
28
|
+
Requires-Dist: tqdm>=4.66
|
|
29
|
+
Requires-Dist: transformers>=4.40
|
|
30
|
+
Requires-Dist: typer>=0.12
|
|
31
|
+
Provides-Extra: all
|
|
32
|
+
Requires-Dist: accelerate>=0.30; extra == 'all'
|
|
33
|
+
Requires-Dist: duckduckgo-search>=6.0; extra == 'all'
|
|
34
|
+
Requires-Dist: fastapi>=0.111; extra == 'all'
|
|
35
|
+
Requires-Dist: pandas>=2.2; extra == 'all'
|
|
36
|
+
Requires-Dist: ragforge-ml>=0.1; extra == 'all'
|
|
37
|
+
Requires-Dist: sympy>=1.12; extra == 'all'
|
|
38
|
+
Requires-Dist: turboquant-ml>=0.1; extra == 'all'
|
|
39
|
+
Requires-Dist: uvicorn>=0.30; extra == 'all'
|
|
40
|
+
Provides-Extra: dev
|
|
41
|
+
Requires-Dist: httpx>=0.27; extra == 'dev'
|
|
42
|
+
Requires-Dist: mypy>=1.10; extra == 'dev'
|
|
43
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
44
|
+
Requires-Dist: pytest-cov>=5.0; extra == 'dev'
|
|
45
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
46
|
+
Requires-Dist: ruff>=0.5; extra == 'dev'
|
|
47
|
+
Provides-Extra: eval
|
|
48
|
+
Requires-Dist: pandas>=2.2; extra == 'eval'
|
|
49
|
+
Provides-Extra: quantized
|
|
50
|
+
Requires-Dist: accelerate>=0.30; extra == 'quantized'
|
|
51
|
+
Requires-Dist: turboquant-ml>=0.1; extra == 'quantized'
|
|
52
|
+
Provides-Extra: rag
|
|
53
|
+
Requires-Dist: ragforge-ml>=0.1; extra == 'rag'
|
|
54
|
+
Provides-Extra: serve
|
|
55
|
+
Requires-Dist: fastapi>=0.111; extra == 'serve'
|
|
56
|
+
Requires-Dist: uvicorn>=0.30; extra == 'serve'
|
|
57
|
+
Provides-Extra: tools
|
|
58
|
+
Requires-Dist: duckduckgo-search>=6.0; extra == 'tools'
|
|
59
|
+
Requires-Dist: sympy>=1.12; extra == 'tools'
|
|
60
|
+
Description-Content-Type: text/markdown
|
|
61
|
+
|
|
62
|
+
<h1 align="center">AgentForge</h1>
|
|
63
|
+
|
|
64
|
+
<p align="center">
|
|
65
|
+
<strong>ReAct agents on open-weight LLMs — tools, memory, and an eval harness.</strong>
|
|
66
|
+
<br>
|
|
67
|
+
Pairs with <a href="https://github.com/Ademo93/ragforge">ragforge-ml</a> for retrieval and
|
|
68
|
+
<a href="https://github.com/Ademo93/turboquant">turboquant-ml</a> for quantized model serving.
|
|
69
|
+
</p>
|
|
70
|
+
|
|
71
|
+
<p align="center">
|
|
72
|
+
<a href="https://pypi.org/project/agentforge-ml/"><img alt="PyPI" src="https://img.shields.io/badge/pypi-agentforge--ml-blue"></a>
|
|
73
|
+
<a href="#"><img alt="Python" src="https://img.shields.io/badge/python-3.10%2B-blue"></a>
|
|
74
|
+
<a href="#"><img alt="License" src="https://img.shields.io/badge/license-MIT-green"></a>
|
|
75
|
+
<a href="https://Ademo93.github.io/agentforge/"><img alt="Docs" src="https://img.shields.io/badge/docs-mkdocs--material-blue"></a>
|
|
76
|
+
</p>
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
## Why AgentForge?
|
|
81
|
+
|
|
82
|
+
Most "agent framework" projects use proprietary models (GPT-4, Claude) behind a
|
|
83
|
+
DSL of `Runnable.invoke()` chains nobody can debug. AgentForge is the opposite:
|
|
84
|
+
**ReAct loops on open-weight LLMs (Llama, Qwen, Mistral), with a small registry
|
|
85
|
+
of well-bounded tools, and an evaluation harness so you can measure whether
|
|
86
|
+
your agent is actually doing what you asked.**
|
|
87
|
+
|
|
88
|
+
Three opinions:
|
|
89
|
+
|
|
90
|
+
1. **Open models first.** Defaults work on `Qwen/Qwen2.5-3B-Instruct` and any
|
|
91
|
+
chat-template HF model. No API key required. Plug in
|
|
92
|
+
[turboquant-ml](https://github.com/Ademo93/turboquant) to serve the model
|
|
93
|
+
quantized.
|
|
94
|
+
2. **ReAct, not magic.** The loop is a 60-line function (`agent.py:run`) that
|
|
95
|
+
alternates Thought / Action / Observation steps. Easy to read, easy to debug.
|
|
96
|
+
3. **Tools have hard boundaries.** Python REPL runs in an AST-whitelisted
|
|
97
|
+
sandbox; SQL is read-only; web search is rate-limited; RAG retrieval is
|
|
98
|
+
delegated to [ragforge-ml](https://github.com/Ademo93/ragforge).
|
|
99
|
+
|
|
100
|
+
## Features
|
|
101
|
+
|
|
102
|
+
| Stage | Default |
|
|
103
|
+
|---|---|
|
|
104
|
+
| **LLM** | Any HuggingFace chat-template model. Optional `bnb-nf4` via `turboquant-ml`. |
|
|
105
|
+
| **Loop** | ReAct with `max_steps`, structured Thought/Action/Observation parser |
|
|
106
|
+
| **Tools** | `calculator`, `python` (sandboxed), `web_search` (DuckDuckGo), `sql` (read-only sqlite), `rag` (RAGforge) |
|
|
107
|
+
| **Memory** | In-memory conversation, persistent SQLite store |
|
|
108
|
+
| **Eval** | `task_completion`, `tool_accuracy`, `step_efficiency`, `final_answer_match` |
|
|
109
|
+
| **Serve** | FastAPI `/ask`, `/tools`, `/health` |
|
|
110
|
+
| **CLI** | `agentforge ask / eval / tools / serve` |
|
|
111
|
+
|
|
112
|
+
## Installation
|
|
113
|
+
|
|
114
|
+
The PyPI distribution is `agentforge-ml` (the unsuffixed `agentforge` name was
|
|
115
|
+
taken by an unrelated project). Python import and CLI are just `agentforge` /
|
|
116
|
+
`af`:
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
pip install agentforge-ml # core
|
|
120
|
+
pip install "agentforge-ml[tools]" # + sympy + duckduckgo-search
|
|
121
|
+
pip install "agentforge-ml[rag]" # + ragforge-ml integration
|
|
122
|
+
pip install "agentforge-ml[quantized]" # + turboquant-ml NF4 path
|
|
123
|
+
pip install "agentforge-ml[serve]" # + FastAPI
|
|
124
|
+
pip install "agentforge-ml[all]" # everything
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
## 60-second tour
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
from agentforge import Agent
|
|
131
|
+
from agentforge.tools import Calculator, WebSearch, PythonREPL
|
|
132
|
+
|
|
133
|
+
agent = Agent.from_defaults(
|
|
134
|
+
model_id="Qwen/Qwen2.5-3B-Instruct",
|
|
135
|
+
tools=[Calculator(), PythonREPL(), WebSearch()],
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
result = agent.run("What is 47 * 1337, then take its square root?")
|
|
139
|
+
print(result.final_answer)
|
|
140
|
+
for step in result.steps:
|
|
141
|
+
print(f" [{step.tool}] {step.action_input!r} -> {step.observation!r}")
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### With RAG
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
from agentforge import Agent
|
|
148
|
+
from agentforge.tools import RAGTool
|
|
149
|
+
from ragforge import Pipeline
|
|
150
|
+
|
|
151
|
+
rag = Pipeline.from_defaults(model_id="Qwen/Qwen2.5-3B-Instruct")
|
|
152
|
+
rag.ingest(["docs/"])
|
|
153
|
+
|
|
154
|
+
agent = Agent.from_defaults(
|
|
155
|
+
model_id="Qwen/Qwen2.5-3B-Instruct",
|
|
156
|
+
tools=[RAGTool(rag)],
|
|
157
|
+
)
|
|
158
|
+
print(agent.run("What is our company refund policy?").final_answer)
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
### CLI
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
af ask "What is 17 squared?" --tools calculator
|
|
165
|
+
af ask "Latest CVE for log4j?" --tools web_search
|
|
166
|
+
af eval data/eval_set.jsonl --tools calculator,python_repl
|
|
167
|
+
af serve --tools calculator,python_repl --port 8080
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
## ReAct loop, in a picture
|
|
171
|
+
|
|
172
|
+
```text
|
|
173
|
+
question -> [LLM] Thought + Action -> [Tool] Observation
|
|
174
|
+
^ |
|
|
175
|
+
|_______________________________________|
|
|
176
|
+
up to max_steps
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
If the LLM emits `Final Answer:` the loop exits. Otherwise it loops until
|
|
180
|
+
`max_steps`. The parser is forgiving: it tolerates whitespace and case but
|
|
181
|
+
falls back to the last completed step on truncation.
|
|
182
|
+
|
|
183
|
+
## Eval harness
|
|
184
|
+
|
|
185
|
+
Built-in, pure Python, no judge model required:
|
|
186
|
+
|
|
187
|
+
| Metric | What it measures |
|
|
188
|
+
|---|---|
|
|
189
|
+
| **`task_completion`** | Did the agent produce a `Final Answer:`? |
|
|
190
|
+
| **`final_answer_match`** | Does the answer contain the ground-truth string (case-folded substring)? |
|
|
191
|
+
| **`tool_accuracy`** | Of the steps, what fraction used the expected tool? |
|
|
192
|
+
| **`step_efficiency`** | `ground_truth_steps / actual_steps`, clipped to [0, 1] |
|
|
193
|
+
|
|
194
|
+
```bash
|
|
195
|
+
af eval examples/eval_set.jsonl --tools all
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
```text
|
|
199
|
+
+--------------------+--------+
|
|
200
|
+
| metric | mean |
|
|
201
|
+
+--------------------+--------+
|
|
202
|
+
| task_completion | 0.95 |
|
|
203
|
+
| final_answer_match | 0.81 |
|
|
204
|
+
| tool_accuracy | 0.88 |
|
|
205
|
+
| step_efficiency | 0.72 |
|
|
206
|
+
+--------------------+--------+
|
|
207
|
+
n=80 · p50=2.4s · p95=8.1s
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
## Architecture
|
|
211
|
+
|
|
212
|
+
```
|
|
213
|
+
agentforge/
|
|
214
|
+
├── core/ # ReAct loop + parser + prompts
|
|
215
|
+
├── tools/ # registry, calculator, python repl, web search, sql, rag
|
|
216
|
+
├── memory/ # conversation, persistent sqlite
|
|
217
|
+
├── llm/ # HuggingFace causal LM wrapper
|
|
218
|
+
├── eval/ # 4 metrics + orchestrator
|
|
219
|
+
├── serve/ # FastAPI app
|
|
220
|
+
└── cli.py # af / agentforge
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
Every stage is a small module behind a small interface (`LLM`, `Tool`,
|
|
224
|
+
`Memory`) — swap any of them in two lines.
|
|
225
|
+
|
|
226
|
+
## Roadmap
|
|
227
|
+
|
|
228
|
+
- [x] ReAct loop with structured parsing
|
|
229
|
+
- [x] Tool protocol + registry
|
|
230
|
+
- [x] 5 built-in tools (calculator, python, web, sql, rag)
|
|
231
|
+
- [x] Persistent SQLite memory
|
|
232
|
+
- [x] Eval: task completion, final-answer match, tool accuracy, step efficiency
|
|
233
|
+
- [x] FastAPI server + Typer CLI
|
|
234
|
+
- [x] turboquant-ml integration (NF4 / GPTQ / AWQ models)
|
|
235
|
+
- [ ] Plan-and-execute pattern alongside ReAct
|
|
236
|
+
- [ ] Streaming step output in `/ask`
|
|
237
|
+
- [ ] Tool-use chat templates (Qwen tool format, Llama-3 tool format)
|
|
238
|
+
- [ ] Multi-agent coordination
|
|
239
|
+
|
|
240
|
+
## License
|
|
241
|
+
|
|
242
|
+
[MIT](LICENSE).
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
<h1 align="center">AgentForge</h1>
|
|
2
|
+
|
|
3
|
+
<p align="center">
|
|
4
|
+
<strong>ReAct agents on open-weight LLMs — tools, memory, and an eval harness.</strong>
|
|
5
|
+
<br>
|
|
6
|
+
Pairs with <a href="https://github.com/Ademo93/ragforge">ragforge-ml</a> for retrieval and
|
|
7
|
+
<a href="https://github.com/Ademo93/turboquant">turboquant-ml</a> for quantized model serving.
|
|
8
|
+
</p>
|
|
9
|
+
|
|
10
|
+
<p align="center">
|
|
11
|
+
<a href="https://pypi.org/project/agentforge-ml/"><img alt="PyPI" src="https://img.shields.io/badge/pypi-agentforge--ml-blue"></a>
|
|
12
|
+
<a href="#"><img alt="Python" src="https://img.shields.io/badge/python-3.10%2B-blue"></a>
|
|
13
|
+
<a href="#"><img alt="License" src="https://img.shields.io/badge/license-MIT-green"></a>
|
|
14
|
+
<a href="https://Ademo93.github.io/agentforge/"><img alt="Docs" src="https://img.shields.io/badge/docs-mkdocs--material-blue"></a>
|
|
15
|
+
</p>
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## Why AgentForge?
|
|
20
|
+
|
|
21
|
+
Most "agent framework" projects use proprietary models (GPT-4, Claude) behind a
|
|
22
|
+
DSL of `Runnable.invoke()` chains nobody can debug. AgentForge is the opposite:
|
|
23
|
+
**ReAct loops on open-weight LLMs (Llama, Qwen, Mistral), with a small registry
|
|
24
|
+
of well-bounded tools, and an evaluation harness so you can measure whether
|
|
25
|
+
your agent is actually doing what you asked.**
|
|
26
|
+
|
|
27
|
+
Three opinions:
|
|
28
|
+
|
|
29
|
+
1. **Open models first.** Defaults work on `Qwen/Qwen2.5-3B-Instruct` and any
|
|
30
|
+
chat-template HF model. No API key required. Plug in
|
|
31
|
+
[turboquant-ml](https://github.com/Ademo93/turboquant) to serve the model
|
|
32
|
+
quantized.
|
|
33
|
+
2. **ReAct, not magic.** The loop is a 60-line function (`agent.py:run`) that
|
|
34
|
+
alternates Thought / Action / Observation steps. Easy to read, easy to debug.
|
|
35
|
+
3. **Tools have hard boundaries.** Python REPL runs in an AST-whitelisted
|
|
36
|
+
sandbox; SQL is read-only; web search is rate-limited; RAG retrieval is
|
|
37
|
+
delegated to [ragforge-ml](https://github.com/Ademo93/ragforge).
|
|
38
|
+
|
|
39
|
+
## Features
|
|
40
|
+
|
|
41
|
+
| Stage | Default |
|
|
42
|
+
|---|---|
|
|
43
|
+
| **LLM** | Any HuggingFace chat-template model. Optional `bnb-nf4` via `turboquant-ml`. |
|
|
44
|
+
| **Loop** | ReAct with `max_steps`, structured Thought/Action/Observation parser |
|
|
45
|
+
| **Tools** | `calculator`, `python` (sandboxed), `web_search` (DuckDuckGo), `sql` (read-only sqlite), `rag` (RAGforge) |
|
|
46
|
+
| **Memory** | In-memory conversation, persistent SQLite store |
|
|
47
|
+
| **Eval** | `task_completion`, `tool_accuracy`, `step_efficiency`, `final_answer_match` |
|
|
48
|
+
| **Serve** | FastAPI `/ask`, `/tools`, `/health` |
|
|
49
|
+
| **CLI** | `agentforge ask / eval / tools / serve` |
|
|
50
|
+
|
|
51
|
+
## Installation
|
|
52
|
+
|
|
53
|
+
The PyPI distribution is `agentforge-ml` (the unsuffixed `agentforge` name was
|
|
54
|
+
taken by an unrelated project). Python import and CLI are just `agentforge` /
|
|
55
|
+
`af`:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pip install agentforge-ml # core
|
|
59
|
+
pip install "agentforge-ml[tools]" # + sympy + duckduckgo-search
|
|
60
|
+
pip install "agentforge-ml[rag]" # + ragforge-ml integration
|
|
61
|
+
pip install "agentforge-ml[quantized]" # + turboquant-ml NF4 path
|
|
62
|
+
pip install "agentforge-ml[serve]" # + FastAPI
|
|
63
|
+
pip install "agentforge-ml[all]" # everything
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## 60-second tour
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
from agentforge import Agent
|
|
70
|
+
from agentforge.tools import Calculator, WebSearch, PythonREPL
|
|
71
|
+
|
|
72
|
+
agent = Agent.from_defaults(
|
|
73
|
+
model_id="Qwen/Qwen2.5-3B-Instruct",
|
|
74
|
+
tools=[Calculator(), PythonREPL(), WebSearch()],
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
result = agent.run("What is 47 * 1337, then take its square root?")
|
|
78
|
+
print(result.final_answer)
|
|
79
|
+
for step in result.steps:
|
|
80
|
+
print(f" [{step.tool}] {step.action_input!r} -> {step.observation!r}")
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### With RAG
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from agentforge import Agent
|
|
87
|
+
from agentforge.tools import RAGTool
|
|
88
|
+
from ragforge import Pipeline
|
|
89
|
+
|
|
90
|
+
rag = Pipeline.from_defaults(model_id="Qwen/Qwen2.5-3B-Instruct")
|
|
91
|
+
rag.ingest(["docs/"])
|
|
92
|
+
|
|
93
|
+
agent = Agent.from_defaults(
|
|
94
|
+
model_id="Qwen/Qwen2.5-3B-Instruct",
|
|
95
|
+
tools=[RAGTool(rag)],
|
|
96
|
+
)
|
|
97
|
+
print(agent.run("What is our company refund policy?").final_answer)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### CLI
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
af ask "What is 17 squared?" --tools calculator
|
|
104
|
+
af ask "Latest CVE for log4j?" --tools web_search
|
|
105
|
+
af eval data/eval_set.jsonl --tools calculator,python_repl
|
|
106
|
+
af serve --tools calculator,python_repl --port 8080
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## ReAct loop, in a picture
|
|
110
|
+
|
|
111
|
+
```text
|
|
112
|
+
question -> [LLM] Thought + Action -> [Tool] Observation
|
|
113
|
+
^ |
|
|
114
|
+
|_______________________________________|
|
|
115
|
+
up to max_steps
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
If the LLM emits `Final Answer:` the loop exits. Otherwise it loops until
|
|
119
|
+
`max_steps`. The parser is forgiving: it tolerates whitespace and case but
|
|
120
|
+
falls back to the last completed step on truncation.
|
|
121
|
+
|
|
122
|
+
## Eval harness
|
|
123
|
+
|
|
124
|
+
Built-in, pure Python, no judge model required:
|
|
125
|
+
|
|
126
|
+
| Metric | What it measures |
|
|
127
|
+
|---|---|
|
|
128
|
+
| **`task_completion`** | Did the agent produce a `Final Answer:`? |
|
|
129
|
+
| **`final_answer_match`** | Does the answer contain the ground-truth string (case-folded substring)? |
|
|
130
|
+
| **`tool_accuracy`** | Of the steps, what fraction used the expected tool? |
|
|
131
|
+
| **`step_efficiency`** | `ground_truth_steps / actual_steps`, clipped to [0, 1] |
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
af eval examples/eval_set.jsonl --tools all
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
```text
|
|
138
|
+
+--------------------+--------+
|
|
139
|
+
| metric | mean |
|
|
140
|
+
+--------------------+--------+
|
|
141
|
+
| task_completion | 0.95 |
|
|
142
|
+
| final_answer_match | 0.81 |
|
|
143
|
+
| tool_accuracy | 0.88 |
|
|
144
|
+
| step_efficiency | 0.72 |
|
|
145
|
+
+--------------------+--------+
|
|
146
|
+
n=80 · p50=2.4s · p95=8.1s
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
## Architecture
|
|
150
|
+
|
|
151
|
+
```
|
|
152
|
+
agentforge/
|
|
153
|
+
├── core/ # ReAct loop + parser + prompts
|
|
154
|
+
├── tools/ # registry, calculator, python repl, web search, sql, rag
|
|
155
|
+
├── memory/ # conversation, persistent sqlite
|
|
156
|
+
├── llm/ # HuggingFace causal LM wrapper
|
|
157
|
+
├── eval/ # 4 metrics + orchestrator
|
|
158
|
+
├── serve/ # FastAPI app
|
|
159
|
+
└── cli.py # af / agentforge
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
Every stage is a small module behind a small interface (`LLM`, `Tool`,
|
|
163
|
+
`Memory`) — swap any of them in two lines.
|
|
164
|
+
|
|
165
|
+
## Roadmap
|
|
166
|
+
|
|
167
|
+
- [x] ReAct loop with structured parsing
|
|
168
|
+
- [x] Tool protocol + registry
|
|
169
|
+
- [x] 5 built-in tools (calculator, python, web, sql, rag)
|
|
170
|
+
- [x] Persistent SQLite memory
|
|
171
|
+
- [x] Eval: task completion, final-answer match, tool accuracy, step efficiency
|
|
172
|
+
- [x] FastAPI server + Typer CLI
|
|
173
|
+
- [x] turboquant-ml integration (NF4 / GPTQ / AWQ models)
|
|
174
|
+
- [ ] Plan-and-execute pattern alongside ReAct
|
|
175
|
+
- [ ] Streaming step output in `/ask`
|
|
176
|
+
- [ ] Tool-use chat templates (Qwen tool format, Llama-3 tool format)
|
|
177
|
+
- [ ] Multi-agent coordination
|
|
178
|
+
|
|
179
|
+
## License
|
|
180
|
+
|
|
181
|
+
[MIT](LICENSE).
|
|
File without changes
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Contributing
|
|
2
|
+
|
|
3
|
+
```bash
|
|
4
|
+
git clone https://github.com/Ademo93/agentforge
|
|
5
|
+
cd agentforge
|
|
6
|
+
python -m venv .venv
|
|
7
|
+
source .venv/bin/activate # Windows: .venv\Scripts\activate
|
|
8
|
+
pip install -e ".[dev,serve,tools,eval]"
|
|
9
|
+
pytest
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
## Style
|
|
13
|
+
|
|
14
|
+
- Ruff for lint and format: `ruff check . && ruff format .`
|
|
15
|
+
- Defer heavy imports inside functions (`transformers`, `sentence-transformers`,
|
|
16
|
+
`fastapi`, `duckduckgo_search`) so unit tests stay fast.
|
|
17
|
+
- Each stage (core, tools, memory, llm, eval, serve) is its own subpackage with
|
|
18
|
+
a tiny public surface. Keep that boundary.
|
|
19
|
+
|
|
20
|
+
## Adding a tool
|
|
21
|
+
|
|
22
|
+
1. Drop `src/agentforge/tools/your_tool.py` with a class exposing `name`,
|
|
23
|
+
`description`, `run(input_str) -> str`.
|
|
24
|
+
2. Re-export it from `tools/__init__.py`.
|
|
25
|
+
3. Write a unit test that calls `tool.run(...)` with a known input.
|
|
26
|
+
4. Add a line to the table in `docs/tools.md`.
|
|
27
|
+
|
|
28
|
+
If the tool has side effects (network, disk, DB), the docstring **must** state
|
|
29
|
+
its constraints (rate limit, sandbox, read-only).
|
|
30
|
+
|
|
31
|
+
## Adding a metric
|
|
32
|
+
|
|
33
|
+
1. Add to `src/agentforge/eval/metrics.py` with the signature
|
|
34
|
+
`metric(result: AgentResult, sample: dict) -> float`.
|
|
35
|
+
2. Register it in `_REGISTRY`.
|
|
36
|
+
3. Add a unit test in `tests/test_eval.py`.
|
|
37
|
+
4. Mention it in `docs/evaluation.md`.
|