agentforge-ml 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. agentforge_ml-0.1.0/.github/workflows/ci.yml +55 -0
  2. agentforge_ml-0.1.0/.github/workflows/docs.yml +46 -0
  3. agentforge_ml-0.1.0/.gitignore +57 -0
  4. agentforge_ml-0.1.0/CHANGELOG.md +20 -0
  5. agentforge_ml-0.1.0/LICENSE +21 -0
  6. agentforge_ml-0.1.0/PKG-INFO +242 -0
  7. agentforge_ml-0.1.0/README.md +181 -0
  8. agentforge_ml-0.1.0/benchmarks/results/.gitkeep +0 -0
  9. agentforge_ml-0.1.0/docs/CONTRIBUTING.md +37 -0
  10. agentforge_ml-0.1.0/docs/evaluation.md +67 -0
  11. agentforge_ml-0.1.0/docs/index.md +58 -0
  12. agentforge_ml-0.1.0/docs/memory.md +46 -0
  13. agentforge_ml-0.1.0/docs/react.md +65 -0
  14. agentforge_ml-0.1.0/docs/tools.md +92 -0
  15. agentforge_ml-0.1.0/examples/basic_agent.py +43 -0
  16. agentforge_ml-0.1.0/examples/eval_agent.py +66 -0
  17. agentforge_ml-0.1.0/examples/eval_set.jsonl +5 -0
  18. agentforge_ml-0.1.0/examples/rag_agent.py +56 -0
  19. agentforge_ml-0.1.0/mkdocs.yml +46 -0
  20. agentforge_ml-0.1.0/pyproject.toml +95 -0
  21. agentforge_ml-0.1.0/src/agentforge/__init__.py +12 -0
  22. agentforge_ml-0.1.0/src/agentforge/cli.py +175 -0
  23. agentforge_ml-0.1.0/src/agentforge/core/__init__.py +15 -0
  24. agentforge_ml-0.1.0/src/agentforge/core/agent.py +186 -0
  25. agentforge_ml-0.1.0/src/agentforge/core/parser.py +85 -0
  26. agentforge_ml-0.1.0/src/agentforge/core/prompts.py +59 -0
  27. agentforge_ml-0.1.0/src/agentforge/eval/__init__.py +19 -0
  28. agentforge_ml-0.1.0/src/agentforge/eval/metrics.py +105 -0
  29. agentforge_ml-0.1.0/src/agentforge/eval/report.py +55 -0
  30. agentforge_ml-0.1.0/src/agentforge/llm/__init__.py +7 -0
  31. agentforge_ml-0.1.0/src/agentforge/llm/base.py +16 -0
  32. agentforge_ml-0.1.0/src/agentforge/llm/hf.py +83 -0
  33. agentforge_ml-0.1.0/src/agentforge/llm/quantized.py +39 -0
  34. agentforge_ml-0.1.0/src/agentforge/memory/__init__.py +7 -0
  35. agentforge_ml-0.1.0/src/agentforge/memory/base.py +23 -0
  36. agentforge_ml-0.1.0/src/agentforge/memory/conversation.py +30 -0
  37. agentforge_ml-0.1.0/src/agentforge/memory/persistent.py +80 -0
  38. agentforge_ml-0.1.0/src/agentforge/serve/__init__.py +5 -0
  39. agentforge_ml-0.1.0/src/agentforge/serve/app.py +83 -0
  40. agentforge_ml-0.1.0/src/agentforge/tools/__init__.py +18 -0
  41. agentforge_ml-0.1.0/src/agentforge/tools/base.py +55 -0
  42. agentforge_ml-0.1.0/src/agentforge/tools/calculator.py +115 -0
  43. agentforge_ml-0.1.0/src/agentforge/tools/python_repl.py +143 -0
  44. agentforge_ml-0.1.0/src/agentforge/tools/rag.py +54 -0
  45. agentforge_ml-0.1.0/src/agentforge/tools/sql.py +64 -0
  46. agentforge_ml-0.1.0/src/agentforge/tools/web_search.py +48 -0
  47. agentforge_ml-0.1.0/src/agentforge/utils.py +35 -0
  48. agentforge_ml-0.1.0/tests/__init__.py +0 -0
  49. agentforge_ml-0.1.0/tests/conftest.py +37 -0
  50. agentforge_ml-0.1.0/tests/test_agent.py +59 -0
  51. agentforge_ml-0.1.0/tests/test_cli.py +22 -0
  52. agentforge_ml-0.1.0/tests/test_eval.py +89 -0
  53. agentforge_ml-0.1.0/tests/test_memory.py +55 -0
  54. agentforge_ml-0.1.0/tests/test_parser.py +46 -0
  55. agentforge_ml-0.1.0/tests/test_serve.py +47 -0
  56. agentforge_ml-0.1.0/tests/test_tools.py +101 -0
@@ -0,0 +1,55 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ lint:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+ - uses: actions/setup-python@v5
15
+ with:
16
+ python-version: "3.11"
17
+ cache: pip
18
+ - run: |
19
+ python -m pip install --upgrade pip
20
+ pip install ruff
21
+ - run: ruff check src tests
22
+ - run: ruff format --check src tests
23
+
24
+ test:
25
+ runs-on: ubuntu-latest
26
+ strategy:
27
+ fail-fast: false
28
+ matrix:
29
+ python-version: ["3.10", "3.11", "3.12"]
30
+ steps:
31
+ - uses: actions/checkout@v4
32
+ - uses: actions/setup-python@v5
33
+ with:
34
+ python-version: ${{ matrix.python-version }}
35
+ cache: pip
36
+ - run: |
37
+ python -m pip install --upgrade pip
38
+ pip install -e ".[dev,serve,tools,eval]"
39
+ - run: pytest -m "not slow and not gpu and not network" --cov=agentforge --cov-report=xml
40
+
41
+ build:
42
+ runs-on: ubuntu-latest
43
+ needs: [lint, test]
44
+ steps:
45
+ - uses: actions/checkout@v4
46
+ - uses: actions/setup-python@v5
47
+ with:
48
+ python-version: "3.11"
49
+ - run: |
50
+ python -m pip install --upgrade pip build
51
+ python -m build
52
+ - uses: actions/upload-artifact@v4
53
+ with:
54
+ name: dist
55
+ path: dist/
@@ -0,0 +1,46 @@
1
+ name: docs
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ paths:
7
+ - docs/**
8
+ - mkdocs.yml
9
+ - .github/workflows/docs.yml
10
+ workflow_dispatch:
11
+
12
+ permissions:
13
+ contents: read
14
+ pages: write
15
+ id-token: write
16
+
17
+ concurrency:
18
+ group: pages
19
+ cancel-in-progress: false
20
+
21
+ jobs:
22
+ build:
23
+ runs-on: ubuntu-latest
24
+ steps:
25
+ - uses: actions/checkout@v4
26
+ - uses: actions/setup-python@v5
27
+ with:
28
+ python-version: "3.11"
29
+ cache: pip
30
+ - run: |
31
+ python -m pip install --upgrade pip
32
+ pip install mkdocs-material pymdown-extensions
33
+ - run: mkdocs build --strict
34
+ - uses: actions/upload-pages-artifact@v3
35
+ with:
36
+ path: site
37
+
38
+ deploy:
39
+ needs: build
40
+ runs-on: ubuntu-latest
41
+ environment:
42
+ name: github-pages
43
+ url: ${{ steps.deployment.outputs.page_url }}
44
+ steps:
45
+ - id: deployment
46
+ uses: actions/deploy-pages@v4
@@ -0,0 +1,57 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.so
5
+
6
+ .Python
7
+ build/
8
+ dist/
9
+ *.egg-info/
10
+ *.egg
11
+
12
+ .venv/
13
+ venv/
14
+ env/
15
+
16
+ .vscode/
17
+ .idea/
18
+ *.swp
19
+ .DS_Store
20
+
21
+ .pytest_cache/
22
+ .mypy_cache/
23
+ .ruff_cache/
24
+ .coverage
25
+ htmlcov/
26
+
27
+ .ipynb_checkpoints/
28
+
29
+ # Caches
30
+ .cache/
31
+ hf_cache/
32
+ qdrant_storage/
33
+ .agentforge/
34
+
35
+ # Eval results
36
+ benchmarks/results/*.json
37
+ benchmarks/results/*.csv
38
+ !benchmarks/results/.gitkeep
39
+
40
+ # MkDocs
41
+ site/
42
+
43
+ *.log
44
+ logs/
45
+
46
+ .env
47
+ .env.local
48
+
49
+ *.safetensors
50
+ *.bin
51
+ *.gguf
52
+ *.pt
53
+
54
+ # DBs
55
+ *.db
56
+ *.sqlite
57
+ *.sqlite3
@@ -0,0 +1,20 @@
1
+ # Changelog
2
+
3
+ ## [Unreleased]
4
+
5
+ ## [0.1.0] — 2026-06-19 — first release
6
+
7
+ ### Added
8
+ - `Agent` ReAct loop with structured Thought / Action / Observation parser.
9
+ - `ToolRegistry` + 5 built-in tools: `calculator` (AST whitelist),
10
+ `python_repl` (sandboxed exec), `web_search` (DuckDuckGo), `sql` (read-only
11
+ sqlite), `rag` (wraps a `ragforge` Pipeline).
12
+ - LLM backends: `HFLLM` and `QuantizedHFLLM` (via `turboquant-ml` for NF4/GPTQ/AWQ).
13
+ - Memory backends: `ConversationMemory` (in-process FIFO) and
14
+ `PersistentMemory` (SQLite, session-scoped).
15
+ - Eval harness: `task_completion`, `final_answer_match`, `tool_accuracy`,
16
+ `step_efficiency` with a CLI orchestrator.
17
+ - FastAPI server with `/health`, `/tools`, `/ask`.
18
+ - Typer CLI: `af ask / eval / serve / tools`.
19
+ - pytest suite (offline, scripted LLM fixture).
20
+ - CI on Python 3.10-3.12, MkDocs Material docs site.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 AgentForge Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,242 @@
1
+ Metadata-Version: 2.4
2
+ Name: agentforge-ml
3
+ Version: 0.1.0
4
+ Summary: AgentForge — ReAct agents on open-weight LLMs with tools (RAG, REPL, web, SQL, calculator) and an eval harness. Pairs with ragforge-ml and turboquant-ml.
5
+ Project-URL: Homepage, https://github.com/Ademo93/agentforge
6
+ Project-URL: Repository, https://github.com/Ademo93/agentforge
7
+ Project-URL: Issues, https://github.com/Ademo93/agentforge/issues
8
+ Project-URL: Documentation, https://Ademo93.github.io/agentforge/
9
+ Author: AgentForge Contributors
10
+ License: MIT
11
+ License-File: LICENSE
12
+ Keywords: agents,evaluation,function-calling,llm,open-source-llm,rag,react,tool-use
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Science/Research
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Requires-Python: >=3.10
23
+ Requires-Dist: numpy>=1.24
24
+ Requires-Dist: pydantic>=2.7
25
+ Requires-Dist: pyyaml>=6.0
26
+ Requires-Dist: rich>=13.7
27
+ Requires-Dist: torch>=2.2
28
+ Requires-Dist: tqdm>=4.66
29
+ Requires-Dist: transformers>=4.40
30
+ Requires-Dist: typer>=0.12
31
+ Provides-Extra: all
32
+ Requires-Dist: accelerate>=0.30; extra == 'all'
33
+ Requires-Dist: duckduckgo-search>=6.0; extra == 'all'
34
+ Requires-Dist: fastapi>=0.111; extra == 'all'
35
+ Requires-Dist: pandas>=2.2; extra == 'all'
36
+ Requires-Dist: ragforge-ml>=0.1; extra == 'all'
37
+ Requires-Dist: sympy>=1.12; extra == 'all'
38
+ Requires-Dist: turboquant-ml>=0.1; extra == 'all'
39
+ Requires-Dist: uvicorn>=0.30; extra == 'all'
40
+ Provides-Extra: dev
41
+ Requires-Dist: httpx>=0.27; extra == 'dev'
42
+ Requires-Dist: mypy>=1.10; extra == 'dev'
43
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
44
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
45
+ Requires-Dist: pytest>=8.0; extra == 'dev'
46
+ Requires-Dist: ruff>=0.5; extra == 'dev'
47
+ Provides-Extra: eval
48
+ Requires-Dist: pandas>=2.2; extra == 'eval'
49
+ Provides-Extra: quantized
50
+ Requires-Dist: accelerate>=0.30; extra == 'quantized'
51
+ Requires-Dist: turboquant-ml>=0.1; extra == 'quantized'
52
+ Provides-Extra: rag
53
+ Requires-Dist: ragforge-ml>=0.1; extra == 'rag'
54
+ Provides-Extra: serve
55
+ Requires-Dist: fastapi>=0.111; extra == 'serve'
56
+ Requires-Dist: uvicorn>=0.30; extra == 'serve'
57
+ Provides-Extra: tools
58
+ Requires-Dist: duckduckgo-search>=6.0; extra == 'tools'
59
+ Requires-Dist: sympy>=1.12; extra == 'tools'
60
+ Description-Content-Type: text/markdown
61
+
62
+ <h1 align="center">AgentForge</h1>
63
+
64
+ <p align="center">
65
+ <strong>ReAct agents on open-weight LLMs — tools, memory, and an eval harness.</strong>
66
+ <br>
67
+ Pairs with <a href="https://github.com/Ademo93/ragforge">ragforge-ml</a> for retrieval and
68
+ <a href="https://github.com/Ademo93/turboquant">turboquant-ml</a> for quantized model serving.
69
+ </p>
70
+
71
+ <p align="center">
72
+ <a href="https://pypi.org/project/agentforge-ml/"><img alt="PyPI" src="https://img.shields.io/badge/pypi-agentforge--ml-blue"></a>
73
+ <a href="#"><img alt="Python" src="https://img.shields.io/badge/python-3.10%2B-blue"></a>
74
+ <a href="#"><img alt="License" src="https://img.shields.io/badge/license-MIT-green"></a>
75
+ <a href="https://Ademo93.github.io/agentforge/"><img alt="Docs" src="https://img.shields.io/badge/docs-mkdocs--material-blue"></a>
76
+ </p>
77
+
78
+ ---
79
+
80
+ ## Why AgentForge?
81
+
82
+ Most "agent framework" projects use proprietary models (GPT-4, Claude) behind a
83
+ DSL of `Runnable.invoke()` chains nobody can debug. AgentForge is the opposite:
84
+ **ReAct loops on open-weight LLMs (Llama, Qwen, Mistral), with a small registry
85
+ of well-bounded tools, and an evaluation harness so you can measure whether
86
+ your agent is actually doing what you asked.**
87
+
88
+ Three opinions:
89
+
90
+ 1. **Open models first.** Defaults work on `Qwen/Qwen2.5-3B-Instruct` and any
91
+ chat-template HF model. No API key required. Plug in
92
+ [turboquant-ml](https://github.com/Ademo93/turboquant) to serve the model
93
+ quantized.
94
+ 2. **ReAct, not magic.** The loop is a 60-line function (`agent.py:run`) that
95
+ alternates Thought / Action / Observation steps. Easy to read, easy to debug.
96
+ 3. **Tools have hard boundaries.** Python REPL runs in an AST-whitelisted
97
+ sandbox; SQL is read-only; web search is rate-limited; RAG retrieval is
98
+ delegated to [ragforge-ml](https://github.com/Ademo93/ragforge).
99
+
100
+ ## Features
101
+
102
+ | Stage | Default |
103
+ |---|---|
104
+ | **LLM** | Any HuggingFace chat-template model. Optional `bnb-nf4` via `turboquant-ml`. |
105
+ | **Loop** | ReAct with `max_steps`, structured Thought/Action/Observation parser |
106
+ | **Tools** | `calculator`, `python` (sandboxed), `web_search` (DuckDuckGo), `sql` (read-only sqlite), `rag` (RAGforge) |
107
+ | **Memory** | In-memory conversation, persistent SQLite store |
108
+ | **Eval** | `task_completion`, `tool_accuracy`, `step_efficiency`, `final_answer_match` |
109
+ | **Serve** | FastAPI `/ask`, `/tools`, `/health` |
110
+ | **CLI** | `agentforge ask / eval / tools / serve` |
111
+
112
+ ## Installation
113
+
114
+ The PyPI distribution is `agentforge-ml` (the unsuffixed `agentforge` name was
115
+ taken by an unrelated project). Python import and CLI are just `agentforge` /
116
+ `af`:
117
+
118
+ ```bash
119
+ pip install agentforge-ml # core
120
+ pip install "agentforge-ml[tools]" # + sympy + duckduckgo-search
121
+ pip install "agentforge-ml[rag]" # + ragforge-ml integration
122
+ pip install "agentforge-ml[quantized]" # + turboquant-ml NF4 path
123
+ pip install "agentforge-ml[serve]" # + FastAPI
124
+ pip install "agentforge-ml[all]" # everything
125
+ ```
126
+
127
+ ## 60-second tour
128
+
129
+ ```python
130
+ from agentforge import Agent
131
+ from agentforge.tools import Calculator, WebSearch, PythonREPL
132
+
133
+ agent = Agent.from_defaults(
134
+ model_id="Qwen/Qwen2.5-3B-Instruct",
135
+ tools=[Calculator(), PythonREPL(), WebSearch()],
136
+ )
137
+
138
+ result = agent.run("What is 47 * 1337, then take its square root?")
139
+ print(result.final_answer)
140
+ for step in result.steps:
141
+ print(f" [{step.tool}] {step.action_input!r} -> {step.observation!r}")
142
+ ```
143
+
144
+ ### With RAG
145
+
146
+ ```python
147
+ from agentforge import Agent
148
+ from agentforge.tools import RAGTool
149
+ from ragforge import Pipeline
150
+
151
+ rag = Pipeline.from_defaults(model_id="Qwen/Qwen2.5-3B-Instruct")
152
+ rag.ingest(["docs/"])
153
+
154
+ agent = Agent.from_defaults(
155
+ model_id="Qwen/Qwen2.5-3B-Instruct",
156
+ tools=[RAGTool(rag)],
157
+ )
158
+ print(agent.run("What is our company refund policy?").final_answer)
159
+ ```
160
+
161
+ ### CLI
162
+
163
+ ```bash
164
+ af ask "What is 17 squared?" --tools calculator
165
+ af ask "Latest CVE for log4j?" --tools web_search
166
+ af eval data/eval_set.jsonl --tools calculator,python_repl
167
+ af serve --tools calculator,python_repl --port 8080
168
+ ```
169
+
170
+ ## ReAct loop, in a picture
171
+
172
+ ```text
173
+ question -> [LLM] Thought + Action -> [Tool] Observation
174
+ ^ |
175
+ |_______________________________________|
176
+ up to max_steps
177
+ ```
178
+
179
+ If the LLM emits `Final Answer:` the loop exits. Otherwise it loops until
180
+ `max_steps`. The parser is forgiving: it tolerates whitespace and case but
181
+ falls back to the last completed step on truncation.
182
+
183
+ ## Eval harness
184
+
185
+ Built-in, pure Python, no judge model required:
186
+
187
+ | Metric | What it measures |
188
+ |---|---|
189
+ | **`task_completion`** | Did the agent produce a `Final Answer:`? |
190
+ | **`final_answer_match`** | Does the answer contain the ground-truth string (case-folded substring)? |
191
+ | **`tool_accuracy`** | Of the steps, what fraction used the expected tool? |
192
+ | **`step_efficiency`** | `ground_truth_steps / actual_steps`, clipped to [0, 1] |
193
+
194
+ ```bash
195
+ af eval examples/eval_set.jsonl --tools all
196
+ ```
197
+
198
+ ```text
199
+ +--------------------+--------+
200
+ | metric | mean |
201
+ +--------------------+--------+
202
+ | task_completion | 0.95 |
203
+ | final_answer_match | 0.81 |
204
+ | tool_accuracy | 0.88 |
205
+ | step_efficiency | 0.72 |
206
+ +--------------------+--------+
207
+ n=80 · p50=2.4s · p95=8.1s
208
+ ```
209
+
210
+ ## Architecture
211
+
212
+ ```
213
+ agentforge/
214
+ ├── core/ # ReAct loop + parser + prompts
215
+ ├── tools/ # registry, calculator, python repl, web search, sql, rag
216
+ ├── memory/ # conversation, persistent sqlite
217
+ ├── llm/ # HuggingFace causal LM wrapper
218
+ ├── eval/ # 4 metrics + orchestrator
219
+ ├── serve/ # FastAPI app
220
+ └── cli.py # af / agentforge
221
+ ```
222
+
223
+ Every stage is a small module behind a small interface (`LLM`, `Tool`,
224
+ `Memory`) — swap any of them in two lines.
225
+
226
+ ## Roadmap
227
+
228
+ - [x] ReAct loop with structured parsing
229
+ - [x] Tool protocol + registry
230
+ - [x] 5 built-in tools (calculator, python, web, sql, rag)
231
+ - [x] Persistent SQLite memory
232
+ - [x] Eval: task completion, final-answer match, tool accuracy, step efficiency
233
+ - [x] FastAPI server + Typer CLI
234
+ - [x] turboquant-ml integration (NF4 / GPTQ / AWQ models)
235
+ - [ ] Plan-and-execute pattern alongside ReAct
236
+ - [ ] Streaming step output in `/ask`
237
+ - [ ] Tool-use chat templates (Qwen tool format, Llama-3 tool format)
238
+ - [ ] Multi-agent coordination
239
+
240
+ ## License
241
+
242
+ [MIT](LICENSE).
@@ -0,0 +1,181 @@
1
+ <h1 align="center">AgentForge</h1>
2
+
3
+ <p align="center">
4
+ <strong>ReAct agents on open-weight LLMs — tools, memory, and an eval harness.</strong>
5
+ <br>
6
+ Pairs with <a href="https://github.com/Ademo93/ragforge">ragforge-ml</a> for retrieval and
7
+ <a href="https://github.com/Ademo93/turboquant">turboquant-ml</a> for quantized model serving.
8
+ </p>
9
+
10
+ <p align="center">
11
+ <a href="https://pypi.org/project/agentforge-ml/"><img alt="PyPI" src="https://img.shields.io/badge/pypi-agentforge--ml-blue"></a>
12
+ <a href="#"><img alt="Python" src="https://img.shields.io/badge/python-3.10%2B-blue"></a>
13
+ <a href="#"><img alt="License" src="https://img.shields.io/badge/license-MIT-green"></a>
14
+ <a href="https://Ademo93.github.io/agentforge/"><img alt="Docs" src="https://img.shields.io/badge/docs-mkdocs--material-blue"></a>
15
+ </p>
16
+
17
+ ---
18
+
19
+ ## Why AgentForge?
20
+
21
+ Most "agent framework" projects use proprietary models (GPT-4, Claude) behind a
22
+ DSL of `Runnable.invoke()` chains nobody can debug. AgentForge is the opposite:
23
+ **ReAct loops on open-weight LLMs (Llama, Qwen, Mistral), with a small registry
24
+ of well-bounded tools, and an evaluation harness so you can measure whether
25
+ your agent is actually doing what you asked.**
26
+
27
+ Three opinions:
28
+
29
+ 1. **Open models first.** Defaults work on `Qwen/Qwen2.5-3B-Instruct` and any
30
+ chat-template HF model. No API key required. Plug in
31
+ [turboquant-ml](https://github.com/Ademo93/turboquant) to serve the model
32
+ quantized.
33
+ 2. **ReAct, not magic.** The loop is a 60-line function (`agent.py:run`) that
34
+ alternates Thought / Action / Observation steps. Easy to read, easy to debug.
35
+ 3. **Tools have hard boundaries.** Python REPL runs in an AST-whitelisted
36
+ sandbox; SQL is read-only; web search is rate-limited; RAG retrieval is
37
+ delegated to [ragforge-ml](https://github.com/Ademo93/ragforge).
38
+
39
+ ## Features
40
+
41
+ | Stage | Default |
42
+ |---|---|
43
+ | **LLM** | Any HuggingFace chat-template model. Optional `bnb-nf4` via `turboquant-ml`. |
44
+ | **Loop** | ReAct with `max_steps`, structured Thought/Action/Observation parser |
45
+ | **Tools** | `calculator`, `python` (sandboxed), `web_search` (DuckDuckGo), `sql` (read-only sqlite), `rag` (RAGforge) |
46
+ | **Memory** | In-memory conversation, persistent SQLite store |
47
+ | **Eval** | `task_completion`, `tool_accuracy`, `step_efficiency`, `final_answer_match` |
48
+ | **Serve** | FastAPI `/ask`, `/tools`, `/health` |
49
+ | **CLI** | `agentforge ask / eval / tools / serve` |
50
+
51
+ ## Installation
52
+
53
+ The PyPI distribution is `agentforge-ml` (the unsuffixed `agentforge` name was
54
+ taken by an unrelated project). Python import and CLI are just `agentforge` /
55
+ `af`:
56
+
57
+ ```bash
58
+ pip install agentforge-ml # core
59
+ pip install "agentforge-ml[tools]" # + sympy + duckduckgo-search
60
+ pip install "agentforge-ml[rag]" # + ragforge-ml integration
61
+ pip install "agentforge-ml[quantized]" # + turboquant-ml NF4 path
62
+ pip install "agentforge-ml[serve]" # + FastAPI
63
+ pip install "agentforge-ml[all]" # everything
64
+ ```
65
+
66
+ ## 60-second tour
67
+
68
+ ```python
69
+ from agentforge import Agent
70
+ from agentforge.tools import Calculator, WebSearch, PythonREPL
71
+
72
+ agent = Agent.from_defaults(
73
+ model_id="Qwen/Qwen2.5-3B-Instruct",
74
+ tools=[Calculator(), PythonREPL(), WebSearch()],
75
+ )
76
+
77
+ result = agent.run("What is 47 * 1337, then take its square root?")
78
+ print(result.final_answer)
79
+ for step in result.steps:
80
+ print(f" [{step.tool}] {step.action_input!r} -> {step.observation!r}")
81
+ ```
82
+
83
+ ### With RAG
84
+
85
+ ```python
86
+ from agentforge import Agent
87
+ from agentforge.tools import RAGTool
88
+ from ragforge import Pipeline
89
+
90
+ rag = Pipeline.from_defaults(model_id="Qwen/Qwen2.5-3B-Instruct")
91
+ rag.ingest(["docs/"])
92
+
93
+ agent = Agent.from_defaults(
94
+ model_id="Qwen/Qwen2.5-3B-Instruct",
95
+ tools=[RAGTool(rag)],
96
+ )
97
+ print(agent.run("What is our company refund policy?").final_answer)
98
+ ```
99
+
100
+ ### CLI
101
+
102
+ ```bash
103
+ af ask "What is 17 squared?" --tools calculator
104
+ af ask "Latest CVE for log4j?" --tools web_search
105
+ af eval data/eval_set.jsonl --tools calculator,python_repl
106
+ af serve --tools calculator,python_repl --port 8080
107
+ ```
108
+
109
+ ## ReAct loop, in a picture
110
+
111
+ ```text
112
+ question -> [LLM] Thought + Action -> [Tool] Observation
113
+ ^ |
114
+ |_______________________________________|
115
+ up to max_steps
116
+ ```
117
+
118
+ If the LLM emits `Final Answer:` the loop exits. Otherwise it loops until
119
+ `max_steps`. The parser is forgiving: it tolerates whitespace and case but
120
+ falls back to the last completed step on truncation.
121
+
122
+ ## Eval harness
123
+
124
+ Built-in, pure Python, no judge model required:
125
+
126
+ | Metric | What it measures |
127
+ |---|---|
128
+ | **`task_completion`** | Did the agent produce a `Final Answer:`? |
129
+ | **`final_answer_match`** | Does the answer contain the ground-truth string (case-folded substring)? |
130
+ | **`tool_accuracy`** | Of the steps, what fraction used the expected tool? |
131
+ | **`step_efficiency`** | `ground_truth_steps / actual_steps`, clipped to [0, 1] |
132
+
133
+ ```bash
134
+ af eval examples/eval_set.jsonl --tools all
135
+ ```
136
+
137
+ ```text
138
+ +--------------------+--------+
139
+ | metric | mean |
140
+ +--------------------+--------+
141
+ | task_completion | 0.95 |
142
+ | final_answer_match | 0.81 |
143
+ | tool_accuracy | 0.88 |
144
+ | step_efficiency | 0.72 |
145
+ +--------------------+--------+
146
+ n=80 · p50=2.4s · p95=8.1s
147
+ ```
148
+
149
+ ## Architecture
150
+
151
+ ```
152
+ agentforge/
153
+ ├── core/ # ReAct loop + parser + prompts
154
+ ├── tools/ # registry, calculator, python repl, web search, sql, rag
155
+ ├── memory/ # conversation, persistent sqlite
156
+ ├── llm/ # HuggingFace causal LM wrapper
157
+ ├── eval/ # 4 metrics + orchestrator
158
+ ├── serve/ # FastAPI app
159
+ └── cli.py # af / agentforge
160
+ ```
161
+
162
+ Every stage is a small module behind a small interface (`LLM`, `Tool`,
163
+ `Memory`) — swap any of them in two lines.
164
+
165
+ ## Roadmap
166
+
167
+ - [x] ReAct loop with structured parsing
168
+ - [x] Tool protocol + registry
169
+ - [x] 5 built-in tools (calculator, python, web, sql, rag)
170
+ - [x] Persistent SQLite memory
171
+ - [x] Eval: task completion, final-answer match, tool accuracy, step efficiency
172
+ - [x] FastAPI server + Typer CLI
173
+ - [x] turboquant-ml integration (NF4 / GPTQ / AWQ models)
174
+ - [ ] Plan-and-execute pattern alongside ReAct
175
+ - [ ] Streaming step output in `/ask`
176
+ - [ ] Tool-use chat templates (Qwen tool format, Llama-3 tool format)
177
+ - [ ] Multi-agent coordination
178
+
179
+ ## License
180
+
181
+ [MIT](LICENSE).
File without changes
@@ -0,0 +1,37 @@
1
+ # Contributing
2
+
3
+ ```bash
4
+ git clone https://github.com/Ademo93/agentforge
5
+ cd agentforge
6
+ python -m venv .venv
7
+ source .venv/bin/activate # Windows: .venv\Scripts\activate
8
+ pip install -e ".[dev,serve,tools,eval]"
9
+ pytest
10
+ ```
11
+
12
+ ## Style
13
+
14
+ - Ruff for lint and format: `ruff check . && ruff format .`
15
+ - Defer heavy imports inside functions (`transformers`, `sentence-transformers`,
16
+ `fastapi`, `duckduckgo_search`) so unit tests stay fast.
17
+ - Each stage (core, tools, memory, llm, eval, serve) is its own subpackage with
18
+ a tiny public surface. Keep that boundary.
19
+
20
+ ## Adding a tool
21
+
22
+ 1. Drop `src/agentforge/tools/your_tool.py` with a class exposing `name`,
23
+ `description`, `run(input_str) -> str`.
24
+ 2. Re-export it from `tools/__init__.py`.
25
+ 3. Write a unit test that calls `tool.run(...)` with a known input.
26
+ 4. Add a line to the table in `docs/tools.md`.
27
+
28
+ If the tool has side effects (network, disk, DB), the docstring **must** state
29
+ its constraints (rate limit, sandbox, read-only).
30
+
31
+ ## Adding a metric
32
+
33
+ 1. Add to `src/agentforge/eval/metrics.py` with the signature
34
+ `metric(result: AgentResult, sample: dict) -> float`.
35
+ 2. Register it in `_REGISTRY`.
36
+ 3. Add a unit test in `tests/test_eval.py`.
37
+ 4. Mention it in `docs/evaluation.md`.