forum-engine 1.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. forum_engine-1.4.0/LICENSE +78 -0
  2. forum_engine-1.4.0/PKG-INFO +281 -0
  3. forum_engine-1.4.0/README.md +184 -0
  4. forum_engine-1.4.0/pyproject.toml +44 -0
  5. forum_engine-1.4.0/setup.cfg +4 -0
  6. forum_engine-1.4.0/src/forum/__init__.py +3 -0
  7. forum_engine-1.4.0/src/forum/actor.py +43 -0
  8. forum_engine-1.4.0/src/forum/api_executor.py +86 -0
  9. forum_engine-1.4.0/src/forum/budget.py +18 -0
  10. forum_engine-1.4.0/src/forum/chat_executor.py +103 -0
  11. forum_engine-1.4.0/src/forum/cli.py +264 -0
  12. forum_engine-1.4.0/src/forum/context.py +22 -0
  13. forum_engine-1.4.0/src/forum/control.py +121 -0
  14. forum_engine-1.4.0/src/forum/daemon.py +186 -0
  15. forum_engine-1.4.0/src/forum/dispatch.py +65 -0
  16. forum_engine-1.4.0/src/forum/engine.py +300 -0
  17. forum_engine-1.4.0/src/forum/executor.py +75 -0
  18. forum_engine-1.4.0/src/forum/hashing.py +17 -0
  19. forum_engine-1.4.0/src/forum/http_surface.py +180 -0
  20. forum_engine-1.4.0/src/forum/intent.py +64 -0
  21. forum_engine-1.4.0/src/forum/ledger.py +225 -0
  22. forum_engine-1.4.0/src/forum/llm.py +25 -0
  23. forum_engine-1.4.0/src/forum/manifests/default-roster.toml +208 -0
  24. forum_engine-1.4.0/src/forum/mcp_surface.py +171 -0
  25. forum_engine-1.4.0/src/forum/message.py +41 -0
  26. forum_engine-1.4.0/src/forum/plan.py +41 -0
  27. forum_engine-1.4.0/src/forum/policy.py +18 -0
  28. forum_engine-1.4.0/src/forum/report.py +82 -0
  29. forum_engine-1.4.0/src/forum/roster.py +82 -0
  30. forum_engine-1.4.0/src/forum/routing.py +50 -0
  31. forum_engine-1.4.0/src/forum/storage.py +144 -0
  32. forum_engine-1.4.0/src/forum/supervisor.py +29 -0
  33. forum_engine-1.4.0/src/forum_engine.egg-info/PKG-INFO +281 -0
  34. forum_engine-1.4.0/src/forum_engine.egg-info/SOURCES.txt +72 -0
  35. forum_engine-1.4.0/src/forum_engine.egg-info/dependency_links.txt +1 -0
  36. forum_engine-1.4.0/src/forum_engine.egg-info/entry_points.txt +2 -0
  37. forum_engine-1.4.0/src/forum_engine.egg-info/requires.txt +6 -0
  38. forum_engine-1.4.0/src/forum_engine.egg-info/top_level.txt +1 -0
  39. forum_engine-1.4.0/tests/test_actor.py +51 -0
  40. forum_engine-1.4.0/tests/test_api_executor.py +65 -0
  41. forum_engine-1.4.0/tests/test_budget.py +111 -0
  42. forum_engine-1.4.0/tests/test_chat_executor.py +72 -0
  43. forum_engine-1.4.0/tests/test_classifier.py +31 -0
  44. forum_engine-1.4.0/tests/test_cli.py +134 -0
  45. forum_engine-1.4.0/tests/test_context.py +76 -0
  46. forum_engine-1.4.0/tests/test_coordinator.py +57 -0
  47. forum_engine-1.4.0/tests/test_daemon.py +242 -0
  48. forum_engine-1.4.0/tests/test_dispatch.py +94 -0
  49. forum_engine-1.4.0/tests/test_engine.py +111 -0
  50. forum_engine-1.4.0/tests/test_escalation.py +122 -0
  51. forum_engine-1.4.0/tests/test_executor.py +10 -0
  52. forum_engine-1.4.0/tests/test_hashing.py +12 -0
  53. forum_engine-1.4.0/tests/test_http_surface.py +169 -0
  54. forum_engine-1.4.0/tests/test_intent.py +165 -0
  55. forum_engine-1.4.0/tests/test_ledger_chain.py +63 -0
  56. forum_engine-1.4.0/tests/test_ledger_get.py +31 -0
  57. forum_engine-1.4.0/tests/test_ledger_replay.py +58 -0
  58. forum_engine-1.4.0/tests/test_llm.py +26 -0
  59. forum_engine-1.4.0/tests/test_mcp_surface.py +154 -0
  60. forum_engine-1.4.0/tests/test_message.py +21 -0
  61. forum_engine-1.4.0/tests/test_plan.py +35 -0
  62. forum_engine-1.4.0/tests/test_policy.py +17 -0
  63. forum_engine-1.4.0/tests/test_real_model.py +50 -0
  64. forum_engine-1.4.0/tests/test_report.py +131 -0
  65. forum_engine-1.4.0/tests/test_roster.py +57 -0
  66. forum_engine-1.4.0/tests/test_roster_default.py +56 -0
  67. forum_engine-1.4.0/tests/test_routing.py +44 -0
  68. forum_engine-1.4.0/tests/test_routing_ladder.py +77 -0
  69. forum_engine-1.4.0/tests/test_storage.py +135 -0
  70. forum_engine-1.4.0/tests/test_subprocess_executor.py +26 -0
  71. forum_engine-1.4.0/tests/test_supervisor.py +30 -0
  72. forum_engine-1.4.0/tests/test_synthesizer.py +21 -0
  73. forum_engine-1.4.0/tests/test_validator.py +32 -0
  74. forum_engine-1.4.0/tests/test_witnessing.py +111 -0
@@ -0,0 +1,78 @@
1
+ Forum Fair-Source License, Version 1.0
2
+
3
+ Copyright (c) 2026 Zain Dana Harper. All rights reserved.
4
+
5
+ This license governs use of the accompanying software ("the Software"). By using,
6
+ copying, modifying, or distributing the Software, you accept these terms. The
7
+ Software is source-available, not open source: the source is published so you can
8
+ read it, run it, and build on it, while commercial use that competes with the
9
+ project is reserved so the project can fund its own continued development.
10
+
11
+ 1. Definitions
12
+
13
+ "Licensor" means Zain Dana Harper, the copyright holder.
14
+
15
+ "You" means the individual or entity exercising rights under this license.
16
+
17
+ "Competing Use" means making the Software, or a modified version of it,
18
+ available to a third party as a commercial product or service that
19
+ substitutes for, or offers substantially the same functionality as, the
20
+ Software or any product or service the Licensor offers using the Software.
21
+
22
+ 2. Grant
23
+
24
+ Subject to your compliance with this license, the Licensor grants you a
25
+ worldwide, royalty-free, non-exclusive, non-transferable license to read,
26
+ run, copy, modify, create derivative works of, and redistribute the Software
27
+ for any Permitted Purpose.
28
+
29
+ 3. Permitted Purpose
30
+
31
+ A Permitted Purpose is any purpose other than a Competing Use. Permitted
32
+ Purposes include, without limitation: internal use within your organization;
33
+ personal use; evaluation; non-commercial education and research; and use in
34
+ providing professional services to a party that is itself using the Software
35
+ under this license.
36
+
37
+ 4. Reserved Commercial Use
38
+
39
+ A Competing Use is reserved to the Licensor and requires a separate
40
+ commercial license. This reservation is what funds the project's continued
41
+ development. To obtain a commercial license, contact the Licensor (see
42
+ Contact below).
43
+
44
+ 5. Conditions
45
+
46
+ You must retain, in all copies and derivative works you distribute, this
47
+ license, the copyright notice, and all attribution notices. You may add your
48
+ own notices to changes you make, so long as the origin of the Software is not
49
+ misrepresented.
50
+
51
+ 6. Trademarks
52
+
53
+ This license does not grant any right to use the Licensor's names, logos, or
54
+ trademarks.
55
+
56
+ 7. Disclaimer of Warranty
57
+
58
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
59
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
60
+ FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT.
61
+
62
+ 8. Limitation of Liability
63
+
64
+ IN NO EVENT SHALL THE LICENSOR BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER
65
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT, OR OTHERWISE, ARISING FROM,
66
+ OUT OF, OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
67
+ SOFTWARE.
68
+
69
+ 9. Termination
70
+
71
+ If you breach this license, your rights under it terminate automatically. They
72
+ may be reinstated by the Licensor in writing.
73
+
74
+ 10. Contact
75
+
76
+ For commercial licensing or to report a security issue, open a private
77
+ security advisory at https://github.com/HarperZ9/forum/security or reach the
78
+ Licensor via https://github.com/HarperZ9.
@@ -0,0 +1,281 @@
1
+ Metadata-Version: 2.4
2
+ Name: forum-engine
3
+ Version: 1.4.0
4
+ Summary: An orchestration engine for AI agents that records every step in a ledger you can verify
5
+ Author: Zain Dana Harper
6
+ License: Forum Fair-Source License, Version 1.0
7
+
8
+ Copyright (c) 2026 Zain Dana Harper. All rights reserved.
9
+
10
+ This license governs use of the accompanying software ("the Software"). By using,
11
+ copying, modifying, or distributing the Software, you accept these terms. The
12
+ Software is source-available, not open source: the source is published so you can
13
+ read it, run it, and build on it, while commercial use that competes with the
14
+ project is reserved so the project can fund its own continued development.
15
+
16
+ 1. Definitions
17
+
18
+ "Licensor" means Zain Dana Harper, the copyright holder.
19
+
20
+ "You" means the individual or entity exercising rights under this license.
21
+
22
+ "Competing Use" means making the Software, or a modified version of it,
23
+ available to a third party as a commercial product or service that
24
+ substitutes for, or offers substantially the same functionality as, the
25
+ Software or any product or service the Licensor offers using the Software.
26
+
27
+ 2. Grant
28
+
29
+ Subject to your compliance with this license, the Licensor grants you a
30
+ worldwide, royalty-free, non-exclusive, non-transferable license to read,
31
+ run, copy, modify, create derivative works of, and redistribute the Software
32
+ for any Permitted Purpose.
33
+
34
+ 3. Permitted Purpose
35
+
36
+ A Permitted Purpose is any purpose other than a Competing Use. Permitted
37
+ Purposes include, without limitation: internal use within your organization;
38
+ personal use; evaluation; non-commercial education and research; and use in
39
+ providing professional services to a party that is itself using the Software
40
+ under this license.
41
+
42
+ 4. Reserved Commercial Use
43
+
44
+ A Competing Use is reserved to the Licensor and requires a separate
45
+ commercial license. This reservation is what funds the project's continued
46
+ development. To obtain a commercial license, contact the Licensor (see
47
+ Contact below).
48
+
49
+ 5. Conditions
50
+
51
+ You must retain, in all copies and derivative works you distribute, this
52
+ license, the copyright notice, and all attribution notices. You may add your
53
+ own notices to changes you make, so long as the origin of the Software is not
54
+ misrepresented.
55
+
56
+ 6. Trademarks
57
+
58
+ This license does not grant any right to use the Licensor's names, logos, or
59
+ trademarks.
60
+
61
+ 7. Disclaimer of Warranty
62
+
63
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
64
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
65
+ FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT.
66
+
67
+ 8. Limitation of Liability
68
+
69
+ IN NO EVENT SHALL THE LICENSOR BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER
70
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT, OR OTHERWISE, ARISING FROM,
71
+ OUT OF, OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
72
+ SOFTWARE.
73
+
74
+ 9. Termination
75
+
76
+ If you breach this license, your rights under it terminate automatically. They
77
+ may be reinstated by the Licensor in writing.
78
+
79
+ 10. Contact
80
+
81
+ For commercial licensing or to report a security issue, open a private
82
+ security advisory at https://github.com/HarperZ9/forum/security or reach the
83
+ Licensor via https://github.com/HarperZ9.
84
+
85
+ Project-URL: Homepage, https://github.com/HarperZ9/forum
86
+ Project-URL: Repository, https://github.com/HarperZ9/forum
87
+ Keywords: agents,orchestration,ledger,accountability,multi-agent,audit,zero-dependency
88
+ Requires-Python: >=3.11
89
+ Description-Content-Type: text/markdown
90
+ License-File: LICENSE
91
+ Provides-Extra: dev
92
+ Requires-Dist: pytest>=8; extra == "dev"
93
+ Requires-Dist: pytest-cov>=5; extra == "dev"
94
+ Requires-Dist: ruff>=0.6; extra == "dev"
95
+ Requires-Dist: mypy>=1.10; extra == "dev"
96
+ Dynamic: license-file
97
+
98
+ # Forum
99
+
100
+ [![CI](https://github.com/HarperZ9/forum/actions/workflows/ci.yml/badge.svg)](https://github.com/HarperZ9/forum/actions/workflows/ci.yml)
101
+ ![license: fair-source](https://img.shields.io/badge/license-fair--source-blue.svg)
102
+ ![python: 3.11+](https://img.shields.io/badge/python-3.11%2B-blue.svg)
103
+ ![deps: none](https://img.shields.io/badge/deps-none-success.svg)
104
+
105
+ Every few months there's a new framework for orchestrating AI agents. You wire one
106
+ up, hand it a task, and it works. Then you try to run it for real, and you hit the
107
+ question that actually matters: what happened on that run, and can you prove it?
108
+ Usually all you've got is a pile of model output and a log you're supposed to trust.
109
+
110
+ Forum starts from that question. It's an orchestration engine for fleets of agents,
111
+ and the idea underneath it is simple. The record of what happened isn't a side effect
112
+ of the work. It is the work. Every routing decision, every task, every result goes
113
+ into a ledger you can verify, replay, and trace. Think of how a bank reconciles its
114
+ books instead of trusting the teller's memory.
115
+
116
+ Here's why it's built this way. A language model has no memory of its own. Each call
117
+ starts from nothing. If you want to build something dependable on top of that, you
118
+ have to give a forgetful mind two things it can't supply for itself: a record that
119
+ outlives the conversation, and a way to check that record instead of trusting it. You
120
+ also need reach, the ability to act across a lot of agents at once. That's the real
121
+ project. The small zero-dependency pieces in this repo aren't the goal. They're the
122
+ bricks.
123
+
124
+ Everything here is built and runs. The foundation (the ledger, the router, the
125
+ planner), the runtime that executes a plan across agents and witnesses every step,
126
+ real executors (a task can shell out to any command, including a model CLI, or call a
127
+ model over the API), the control loop that turns a plain request into a plan and a
128
+ single verified answer, a durable ledger that survives a restart, an always-on daemon
129
+ over HTTP and MCP, and a `forum` command to drive it all. Every routing decision,
130
+ plan, task, result, and verdict goes into a ledger you can verify, replay, and trace.
131
+ The examples below show it, and the small zero-dependency pieces are still the bricks.
132
+
133
+ ## Watch it work
134
+
135
+ ```bash
136
+ git clone https://github.com/HarperZ9/forum
137
+ cd forum
138
+ python examples/demo.py # no install, nothing to download
139
+ ```
140
+
141
+ The demo routes a few requests, plans a small dependency graph, records every step,
142
+ and then does the interesting part. It quietly corrupts a stored result and checks
143
+ whether the ledger notices.
144
+
145
+ ```
146
+ 1. Routing (deterministic Tier-0; decides a lane or escalates)
147
+ 'build the database schema and the auth endpoint' -> backend
148
+ 'build the react component and css for the page' -> frontend
149
+ 'write the readme docs and the guide' -> docs
150
+ 'summon a unicorn' -> escalate -> needs an LLM classifier (confidence 0.00)
151
+
152
+ 2. Planning (DAG -> parallel waves, capped by policy max_parallel=2)
153
+ wave 0: ['T1']
154
+ wave 1: ['T2']
155
+ wave 2: ['T3', 'T4']
156
+
157
+ 4. Accountability: verify, tamper-detect, replay
158
+ verify() (chain) : True
159
+ verify(deep=True) : True
160
+ causal chain of last : request -> plan -> task -> result
161
+
162
+ ...now tamper with a stored payload body (seq 2)
163
+ verify() (chain only) : True <- chain hashes still link
164
+ verify(deep=True) : False <- body tamper caught
165
+ ```
166
+
167
+ Look at those last two lines. The chain of hashes still links, so a quick check
168
+ passes. But the contents of one record no longer match what was promised, and the
169
+ deeper check says so. You don't have to trust the record. You can check it.
170
+
171
+ To see the engine run a whole plan instead of just the ledger, there's a second
172
+ example:
173
+
174
+ ```bash
175
+ python examples/run.py
176
+ ```
177
+
178
+ It routes a request, runs a three-step plan across agents (with a stub standing in for
179
+ a real model), and verifies the entire run from the ledger at the end.
180
+
181
+ ## From the command line
182
+
183
+ Installed, Forum gives you a `forum` command:
184
+
185
+ ```bash
186
+ forum route "build the auth endpoint and the database schema" # which lane, no model needed
187
+ forum submit "ship a login API" --cmd "ollama run llama3" # plan, run, answer with a local model, no account
188
+ forum serve --chat-url http://localhost:11434/v1/chat/completions --model llama3 # the HTTP daemon
189
+ forum mcp --cmd "ollama run llama3" # the MCP stdio server
190
+ forum ledger verify # check the record
191
+ forum ledger show --limit 20 # the last 20 entries
192
+ ```
193
+
194
+ `submit`, `serve`, and `mcp` reach a model, and Forum is model-agnostic about which.
195
+ `--cmd "<any command>"` runs any model (a local CLI needs no account), `--chat-url`
196
+ talks to any OpenAI-compatible server (local or cloud), and `--api` is one specific
197
+ provider (Anthropic). Routing and the ledger commands need no model at all. See
198
+ [RUNNING.md](RUNNING.md).
199
+
200
+ ## How the ledger works
201
+
202
+ A log tells you what a program says it did. A ledger lets you prove it. Two old ideas
203
+ do most of the work.
204
+
205
+ The first is a hash chain. Every entry carries a fingerprint of the one before it.
206
+ Edit a past entry, drop one, or shuffle the order, and the fingerprints stop lining
207
+ up. `verify()` walks the chain and tells you where.
208
+
209
+ The second is content addressing. The bulky parts, the prompts and the outputs, are
210
+ stored under a fingerprint of their own bytes rather than inline. That keeps the chain
211
+ small, and it has a useful side effect: you can redact a sensitive body down to its
212
+ fingerprint and the chain still checks out. When the bodies are there,
213
+ `verify(deep=True)` re-hashes each one to make sure it still matches. That's what
214
+ catches the swapped result in the demo.
215
+
216
+ Everything else falls out of those two. `replay(until=...)` rebuilds the exact state
217
+ at any past point, which works because the core is pure and entries never change.
218
+ `causal_chain(seq)` follows the parent links to answer the question every postmortem
219
+ comes back to: why did this happen? And `checkpoint()` folds the whole history into
220
+ one Merkle root. The leaves and the internal nodes are tagged differently, and odd
221
+ nodes get carried up rather than duplicated, so it avoids the second-preimage
222
+ collision (CVE-2012-2459) that naive Merkle code runs into.
223
+
224
+ None of this is worth much if the record dies with the process. By default the
225
+ ledger lives in memory, which is right for a test or a single run. Point it at a
226
+ `FileStorage` instead and every entry is appended to a file and fsynced before the
227
+ next one, so the ledger survives a restart and still verifies, replays, and
228
+ checkpoints exactly. If a crash cuts the final write short, that half-written line
229
+ is dropped on reload and the rest of the record stands. Tampering does not get a
230
+ quieter treatment: a reordered file still loads, and `verify()` still says no.
231
+
232
+ ## What's here
233
+
234
+ - `forum.ledger`: the record. Hash chain, content-addressed bodies, `verify` / `verify(deep=True)`, `replay`, `causal_chain`, Merkle `checkpoint`.
235
+ - `forum.storage`: where the record lives. An in-memory store for tests and short runs, and a durable `FileStorage` (append-only JSONL) so a ledger survives a restart and stays verifiable.
236
+ - `forum.routing`: a router that reads a request, picks a lane, and only falls back to a model when the keywords genuinely can't decide.
237
+ - `forum.plan`: a task graph compiled into parallel waves, with cycles and missing dependencies caught up front.
238
+ - `forum.roster`: the cast of specialists, written as plain data in a TOML file and validated on load. Ships with a built-in default roster of 24 plain capability lanes (`load_default()`), so a fresh install has a real roster out of the box.
239
+ - `forum.policy`: the rules of the room. Which work can run, and how much at once.
240
+ - `forum.executor` / `forum.chat_executor` / `forum.api_executor`: how work actually runs, model-agnostic. A stub for tests, a `SubprocessExecutor` that runs any command (a local model CLI needs no account), a `ChatExecutor` for any OpenAI-compatible server (local or cloud), and an `ApiExecutor` for the Anthropic API. A failing task is witnessed, not fatal; each result records which model produced it, and a failed task can escalate up a ladder of stronger executors, witnessed.
241
+ - `forum.control` and `Orchestrator.submit`: the control loop. A Coordinator turns a plain request into a plan, a Classifier picks an agent when keywords can't, a Validator judges each result, and a Synthesizer writes one answer. Every step is witnessed.
242
+ - `forum.context` and `forum.budget`: the run contract. A `ContextProvider` seam so a run plans on organized context from a brain (the index flagship), witnessed as the exact context that shaped it; and a `RunBudget` that bounds a run and witnesses where it stopped.
243
+ - `forum.daemon` / `forum.http_surface`: an always-on HTTP service (stdlib asyncio, no framework) over one long-lived, durable ledger. Submit a request, read a witnessed answer, and verify or replay the record over HTTP.
244
+ - `forum.mcp_surface`: the same tools over MCP (JSON-RPC on stdio), the lone optional edge. It is a thin adapter over the HTTP surface, so the two can never drift.
245
+ - `forum.intent`: did the run answer the request? After synthesis, a deterministic, reproducible coverage of the request's vocabulary by the final answer is witnessed as its own entry, so a completed run carries an auditable drift signal. A lexical floor that flags a run for review, not a verdict that blocks it.
246
+ - `forum.report`: reading the record. `summarize(ledger)` aggregates a witnessed run into counts, model calls, the checkpoint, and the verify result, reading only what was witnessed; `compare(a, b)` (and `forum bench A B`) is the delta between two runs, so you can prove a change helped instead of asserting it.
247
+
248
+ Pure standard library. No third-party runtime dependencies. The tests run the
249
+ primitives directly, tamper detection and the Merkle property included.
250
+
251
+ ## Roadmap
252
+
253
+ - **Done, the foundation.** Ledger, router, roster, planner, policy. Tested and runnable.
254
+ - **Done, the runtime.** An asyncio dispatcher that runs a plan's waves with bounded concurrency, a mailbox actor and a restart supervisor, and an Orchestrator that ties routing, planning, and witnessed dispatch into one call. The engine runs end to end against a stub executor today.
255
+ - **Done, real executors.** A `SubprocessExecutor` that runs any command (so any CLI, including a model CLI), and an `ApiExecutor` that drives a model over the Anthropic API, both behind the one executor seam. A failing task is witnessed, not fatal.
256
+ - **Done, the control loop.** A Coordinator that turns a plain request into a plan, a Classifier, a Validator that judges each result (a failed task is witnessed, not blessed), and a Synthesizer that writes one answer. `Orchestrator.submit` runs the whole loop, witnessed.
257
+ - **Done, durable storage.** A file-backed `FileStorage` (append-only JSONL) so a ledger outlives the process: it recovers exactly on restart, tolerates a crash-torn final write, and stays tamper-evident.
258
+ - **Done, the default roster.** 24 domain-neutral capability lanes (engineering, graphics, support, research) shipped in the box and loaded with `roster.load_default()`. Plain capability names, every lane keyword-routable.
259
+ - **Done, the daemon (HTTP).** A stdlib-asyncio HTTP service over one durable ledger: route, plan, submit, and verify or replay the record over HTTP. Every request witnessed into the same record.
260
+ - **Done, the MCP surface.** The same tools over MCP (JSON-RPC on stdio), a thin adapter over the HTTP surface so the two cannot drift. The lone optional edge.
261
+ - **Done, the CLI.** A `forum` command: route, submit, serve, mcp, and ledger verify / show / replay / get. Pick a model with `--api` or `--cmd`.
262
+ - **Done, hardened and proven.** Each verdict chains to the result it judged, the routing ladder reaches the Classifier on escalation (`assign` / `submit_one`), and a gated test proves the whole loop against a real model. See [RUNNING.md](RUNNING.md).
263
+ - **1.0.** Durable, verifiable, daemonized, installable, documented. The functional engine is complete.
264
+ - **1.1, the run contract.** A ContextProvider seam (plan on a brain's organized context, witnessed) and a RunBudget that bounds a run. Research-informed.
265
+ - **1.2, witnessed escalation.** Model identity in the ledger and validator-driven escalation up a ladder of stronger executors, on a verifiable signal not model confidence. Research-informed.
266
+ - **1.3, reading the record.** A run summary aggregated purely from the witnessed ledger (`forum ledger summary`), and a ledger A/B (`forum bench`) so an improvement is measured from the record, not claimed.
267
+ - **1.4, did the run answer?** A witnessed intent check: how much of the request the final answer covers, recorded and surfaced in the summary and A/B. A reproducible lexical floor; a grounded model intent-judge is the next rung.
268
+ - **Beyond.** Typed DAG edges, a grounded model intent-judge, the verification seam, and a ledger-reading dashboard.
269
+
270
+ ## Docs
271
+
272
+ - [ARCHITECTURE.md](ARCHITECTURE.md): the layers, the ledger, and the surfaces.
273
+ - [RUNNING.md](RUNNING.md): run it against a real model, over the API or a model CLI.
274
+ - [SECURITY.md](SECURITY.md): the trust model, the no-shell guarantee, and sandboxing.
275
+ - [RELEASING.md](RELEASING.md): how a release is built and published.
276
+
277
+ ## License
278
+
279
+ Forum is fair-source: the code is open to read, run, and build on, with commercial
280
+ use reserved so the project can fund its own development. Copyright stays with the
281
+ author. See [LICENSE](LICENSE) for the exact terms.
@@ -0,0 +1,184 @@
1
+ # Forum
2
+
3
+ [![CI](https://github.com/HarperZ9/forum/actions/workflows/ci.yml/badge.svg)](https://github.com/HarperZ9/forum/actions/workflows/ci.yml)
4
+ ![license: fair-source](https://img.shields.io/badge/license-fair--source-blue.svg)
5
+ ![python: 3.11+](https://img.shields.io/badge/python-3.11%2B-blue.svg)
6
+ ![deps: none](https://img.shields.io/badge/deps-none-success.svg)
7
+
8
+ Every few months there's a new framework for orchestrating AI agents. You wire one
9
+ up, hand it a task, and it works. Then you try to run it for real, and you hit the
10
+ question that actually matters: what happened on that run, and can you prove it?
11
+ Usually all you've got is a pile of model output and a log you're supposed to trust.
12
+
13
+ Forum starts from that question. It's an orchestration engine for fleets of agents,
14
+ and the idea underneath it is simple. The record of what happened isn't a side effect
15
+ of the work. It is the work. Every routing decision, every task, every result goes
16
+ into a ledger you can verify, replay, and trace. Think of how a bank reconciles its
17
+ books instead of trusting the teller's memory.
18
+
19
+ Here's why it's built this way. A language model has no memory of its own. Each call
20
+ starts from nothing. If you want to build something dependable on top of that, you
21
+ have to give a forgetful mind two things it can't supply for itself: a record that
22
+ outlives the conversation, and a way to check that record instead of trusting it. You
23
+ also need reach, the ability to act across a lot of agents at once. That's the real
24
+ project. The small zero-dependency pieces in this repo aren't the goal. They're the
25
+ bricks.
26
+
27
+ Everything here is built and runs. The foundation (the ledger, the router, the
28
+ planner), the runtime that executes a plan across agents and witnesses every step,
29
+ real executors (a task can shell out to any command, including a model CLI, or call a
30
+ model over the API), the control loop that turns a plain request into a plan and a
31
+ single verified answer, a durable ledger that survives a restart, an always-on daemon
32
+ over HTTP and MCP, and a `forum` command to drive it all. Every routing decision,
33
+ plan, task, result, and verdict goes into a ledger you can verify, replay, and trace.
34
+ The examples below show it, and the small zero-dependency pieces are still the bricks.
35
+
36
+ ## Watch it work
37
+
38
+ ```bash
39
+ git clone https://github.com/HarperZ9/forum
40
+ cd forum
41
+ python examples/demo.py # no install, nothing to download
42
+ ```
43
+
44
+ The demo routes a few requests, plans a small dependency graph, records every step,
45
+ and then does the interesting part. It quietly corrupts a stored result and checks
46
+ whether the ledger notices.
47
+
48
+ ```
49
+ 1. Routing (deterministic Tier-0; decides a lane or escalates)
50
+ 'build the database schema and the auth endpoint' -> backend
51
+ 'build the react component and css for the page' -> frontend
52
+ 'write the readme docs and the guide' -> docs
53
+ 'summon a unicorn' -> escalate -> needs an LLM classifier (confidence 0.00)
54
+
55
+ 2. Planning (DAG -> parallel waves, capped by policy max_parallel=2)
56
+ wave 0: ['T1']
57
+ wave 1: ['T2']
58
+ wave 2: ['T3', 'T4']
59
+
60
+ 4. Accountability: verify, tamper-detect, replay
61
+ verify() (chain) : True
62
+ verify(deep=True) : True
63
+ causal chain of last : request -> plan -> task -> result
64
+
65
+ ...now tamper with a stored payload body (seq 2)
66
+ verify() (chain only) : True <- chain hashes still link
67
+ verify(deep=True) : False <- body tamper caught
68
+ ```
69
+
70
+ Look at those last two lines. The chain of hashes still links, so a quick check
71
+ passes. But the contents of one record no longer match what was promised, and the
72
+ deeper check says so. You don't have to trust the record. You can check it.
73
+
74
+ To see the engine run a whole plan instead of just the ledger, there's a second
75
+ example:
76
+
77
+ ```bash
78
+ python examples/run.py
79
+ ```
80
+
81
+ It routes a request, runs a three-step plan across agents (with a stub standing in for
82
+ a real model), and verifies the entire run from the ledger at the end.
83
+
84
+ ## From the command line
85
+
86
+ Installed, Forum gives you a `forum` command:
87
+
88
+ ```bash
89
+ forum route "build the auth endpoint and the database schema" # which lane, no model needed
90
+ forum submit "ship a login API" --cmd "ollama run llama3" # plan, run, answer with a local model, no account
91
+ forum serve --chat-url http://localhost:11434/v1/chat/completions --model llama3 # the HTTP daemon
92
+ forum mcp --cmd "ollama run llama3" # the MCP stdio server
93
+ forum ledger verify # check the record
94
+ forum ledger show --limit 20 # the last 20 entries
95
+ ```
96
+
97
+ `submit`, `serve`, and `mcp` reach a model, and Forum is model-agnostic about which.
98
+ `--cmd "<any command>"` runs any model (a local CLI needs no account), `--chat-url`
99
+ talks to any OpenAI-compatible server (local or cloud), and `--api` is one specific
100
+ provider (Anthropic). Routing and the ledger commands need no model at all. See
101
+ [RUNNING.md](RUNNING.md).
102
+
103
+ ## How the ledger works
104
+
105
+ A log tells you what a program says it did. A ledger lets you prove it. Two old ideas
106
+ do most of the work.
107
+
108
+ The first is a hash chain. Every entry carries a fingerprint of the one before it.
109
+ Edit a past entry, drop one, or shuffle the order, and the fingerprints stop lining
110
+ up. `verify()` walks the chain and tells you where.
111
+
112
+ The second is content addressing. The bulky parts, the prompts and the outputs, are
113
+ stored under a fingerprint of their own bytes rather than inline. That keeps the chain
114
+ small, and it has a useful side effect: you can redact a sensitive body down to its
115
+ fingerprint and the chain still checks out. When the bodies are there,
116
+ `verify(deep=True)` re-hashes each one to make sure it still matches. That's what
117
+ catches the swapped result in the demo.
118
+
119
+ Everything else falls out of those two. `replay(until=...)` rebuilds the exact state
120
+ at any past point, which works because the core is pure and entries never change.
121
+ `causal_chain(seq)` follows the parent links to answer the question every postmortem
122
+ comes back to: why did this happen? And `checkpoint()` folds the whole history into
123
+ one Merkle root. The leaves and the internal nodes are tagged differently, and odd
124
+ nodes get carried up rather than duplicated, so it avoids the second-preimage
125
+ collision (CVE-2012-2459) that naive Merkle code runs into.
126
+
127
+ None of this is worth much if the record dies with the process. By default the
128
+ ledger lives in memory, which is right for a test or a single run. Point it at a
129
+ `FileStorage` instead and every entry is appended to a file and fsynced before the
130
+ next one, so the ledger survives a restart and still verifies, replays, and
131
+ checkpoints exactly. If a crash cuts the final write short, that half-written line
132
+ is dropped on reload and the rest of the record stands. Tampering does not get a
133
+ quieter treatment: a reordered file still loads, and `verify()` still says no.
134
+
135
+ ## What's here
136
+
137
+ - `forum.ledger`: the record. Hash chain, content-addressed bodies, `verify` / `verify(deep=True)`, `replay`, `causal_chain`, Merkle `checkpoint`.
138
+ - `forum.storage`: where the record lives. An in-memory store for tests and short runs, and a durable `FileStorage` (append-only JSONL) so a ledger survives a restart and stays verifiable.
139
+ - `forum.routing`: a router that reads a request, picks a lane, and only falls back to a model when the keywords genuinely can't decide.
140
+ - `forum.plan`: a task graph compiled into parallel waves, with cycles and missing dependencies caught up front.
141
+ - `forum.roster`: the cast of specialists, written as plain data in a TOML file and validated on load. Ships with a built-in default roster of 24 plain capability lanes (`load_default()`), so a fresh install has a real roster out of the box.
142
+ - `forum.policy`: the rules of the room. Which work can run, and how much at once.
143
+ - `forum.executor` / `forum.chat_executor` / `forum.api_executor`: how work actually runs, model-agnostic. A stub for tests, a `SubprocessExecutor` that runs any command (a local model CLI needs no account), a `ChatExecutor` for any OpenAI-compatible server (local or cloud), and an `ApiExecutor` for the Anthropic API. A failing task is witnessed, not fatal; each result records which model produced it, and a failed task can escalate up a ladder of stronger executors, witnessed.
144
+ - `forum.control` and `Orchestrator.submit`: the control loop. A Coordinator turns a plain request into a plan, a Classifier picks an agent when keywords can't, a Validator judges each result, and a Synthesizer writes one answer. Every step is witnessed.
145
+ - `forum.context` and `forum.budget`: the run contract. A `ContextProvider` seam so a run plans on organized context from a brain (the index flagship), witnessed as the exact context that shaped it; and a `RunBudget` that bounds a run and witnesses where it stopped.
146
+ - `forum.daemon` / `forum.http_surface`: an always-on HTTP service (stdlib asyncio, no framework) over one long-lived, durable ledger. Submit a request, read a witnessed answer, and verify or replay the record over HTTP.
147
+ - `forum.mcp_surface`: the same tools over MCP (JSON-RPC on stdio), the lone optional edge. It is a thin adapter over the HTTP surface, so the two can never drift.
148
+ - `forum.intent`: did the run answer the request? After synthesis, a deterministic, reproducible coverage of the request's vocabulary by the final answer is witnessed as its own entry, so a completed run carries an auditable drift signal. A lexical floor that flags a run for review, not a verdict that blocks it.
149
+ - `forum.report`: reading the record. `summarize(ledger)` aggregates a witnessed run into counts, model calls, the checkpoint, and the verify result, reading only what was witnessed; `compare(a, b)` (and `forum bench A B`) is the delta between two runs, so you can prove a change helped instead of asserting it.
150
+
151
+ Pure standard library. No third-party runtime dependencies. The tests run the
152
+ primitives directly, tamper detection and the Merkle property included.
153
+
154
+ ## Roadmap
155
+
156
+ - **Done, the foundation.** Ledger, router, roster, planner, policy. Tested and runnable.
157
+ - **Done, the runtime.** An asyncio dispatcher that runs a plan's waves with bounded concurrency, a mailbox actor and a restart supervisor, and an Orchestrator that ties routing, planning, and witnessed dispatch into one call. The engine runs end to end against a stub executor today.
158
+ - **Done, real executors.** A `SubprocessExecutor` that runs any command (so any CLI, including a model CLI), and an `ApiExecutor` that drives a model over the Anthropic API, both behind the one executor seam. A failing task is witnessed, not fatal.
159
+ - **Done, the control loop.** A Coordinator that turns a plain request into a plan, a Classifier, a Validator that judges each result (a failed task is witnessed, not blessed), and a Synthesizer that writes one answer. `Orchestrator.submit` runs the whole loop, witnessed.
160
+ - **Done, durable storage.** A file-backed `FileStorage` (append-only JSONL) so a ledger outlives the process: it recovers exactly on restart, tolerates a crash-torn final write, and stays tamper-evident.
161
+ - **Done, the default roster.** 24 domain-neutral capability lanes (engineering, graphics, support, research) shipped in the box and loaded with `roster.load_default()`. Plain capability names, every lane keyword-routable.
162
+ - **Done, the daemon (HTTP).** A stdlib-asyncio HTTP service over one durable ledger: route, plan, submit, and verify or replay the record over HTTP. Every request witnessed into the same record.
163
+ - **Done, the MCP surface.** The same tools over MCP (JSON-RPC on stdio), a thin adapter over the HTTP surface so the two cannot drift. The lone optional edge.
164
+ - **Done, the CLI.** A `forum` command: route, submit, serve, mcp, and ledger verify / show / replay / get. Pick a model with `--api` or `--cmd`.
165
+ - **Done, hardened and proven.** Each verdict chains to the result it judged, the routing ladder reaches the Classifier on escalation (`assign` / `submit_one`), and a gated test proves the whole loop against a real model. See [RUNNING.md](RUNNING.md).
166
+ - **1.0.** Durable, verifiable, daemonized, installable, documented. The functional engine is complete.
167
+ - **1.1, the run contract.** A ContextProvider seam (plan on a brain's organized context, witnessed) and a RunBudget that bounds a run. Research-informed.
168
+ - **1.2, witnessed escalation.** Model identity in the ledger and validator-driven escalation up a ladder of stronger executors, on a verifiable signal not model confidence. Research-informed.
169
+ - **1.3, reading the record.** A run summary aggregated purely from the witnessed ledger (`forum ledger summary`), and a ledger A/B (`forum bench`) so an improvement is measured from the record, not claimed.
170
+ - **1.4, did the run answer?** A witnessed intent check: how much of the request the final answer covers, recorded and surfaced in the summary and A/B. A reproducible lexical floor; a grounded model intent-judge is the next rung.
171
+ - **Beyond.** Typed DAG edges, a grounded model intent-judge, the verification seam, and a ledger-reading dashboard.
172
+
173
+ ## Docs
174
+
175
+ - [ARCHITECTURE.md](ARCHITECTURE.md): the layers, the ledger, and the surfaces.
176
+ - [RUNNING.md](RUNNING.md): run it against a real model, over the API or a model CLI.
177
+ - [SECURITY.md](SECURITY.md): the trust model, the no-shell guarantee, and sandboxing.
178
+ - [RELEASING.md](RELEASING.md): how a release is built and published.
179
+
180
+ ## License
181
+
182
+ Forum is fair-source: the code is open to read, run, and build on, with commercial
183
+ use reserved so the project can fund its own development. Copyright stays with the
184
+ author. See [LICENSE](LICENSE) for the exact terms.
@@ -0,0 +1,44 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "forum-engine"
7
+ version = "1.4.0"
8
+ description = "An orchestration engine for AI agents that records every step in a ledger you can verify"
9
+ readme = "README.md"
10
+ requires-python = ">=3.11"
11
+ license = { file = "LICENSE" }
12
+ authors = [{ name = "Zain Dana Harper" }]
13
+ keywords = ["agents", "orchestration", "ledger", "accountability", "multi-agent", "audit", "zero-dependency"]
14
+ dependencies = []
15
+
16
+ [project.optional-dependencies]
17
+ dev = ["pytest>=8", "pytest-cov>=5", "ruff>=0.6", "mypy>=1.10"]
18
+
19
+ [project.scripts]
20
+ forum = "forum.cli:main"
21
+
22
+ [project.urls]
23
+ Homepage = "https://github.com/HarperZ9/forum"
24
+ Repository = "https://github.com/HarperZ9/forum"
25
+
26
+ [tool.setuptools.packages.find]
27
+ where = ["src"]
28
+
29
+ [tool.setuptools.package-data]
30
+ forum = ["manifests/*.toml"]
31
+
32
+ [tool.pytest.ini_options]
33
+ pythonpath = ["src"]
34
+ testpaths = ["tests"]
35
+
36
+ [tool.ruff]
37
+ target-version = "py311"
38
+
39
+ [tool.ruff.lint]
40
+ select = ["E4", "E7", "E9", "F", "I"]
41
+
42
+ [tool.mypy]
43
+ python_version = "3.11"
44
+ files = ["src/forum"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,3 @@
1
+ """Forum: accountable multi-agent orchestration engine (pure core)."""
2
+
3
+ __version__ = "1.4.0"
@@ -0,0 +1,43 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ from typing import Any
5
+
6
+ _STOP = object()
7
+
8
+
9
+ class Actor:
10
+ """A minimal mailbox actor: an async receive loop over a queue."""
11
+
12
+ def __init__(self, name: str) -> None:
13
+ self.name = name
14
+ self.inbox: asyncio.Queue[Any] = asyncio.Queue()
15
+ self._task: asyncio.Task | None = None
16
+ self.error: BaseException | None = None
17
+
18
+ async def on_message(self, message: Any) -> None:
19
+ raise NotImplementedError
20
+
21
+ async def _loop(self) -> None:
22
+ while True:
23
+ message = await self.inbox.get()
24
+ if message is _STOP:
25
+ break
26
+ try:
27
+ await self.on_message(message)
28
+ except Exception as exc: # let-it-crash, but observable
29
+ self.error = exc
30
+ break
31
+
32
+ def start(self) -> "Actor":
33
+ self._task = asyncio.create_task(self._loop())
34
+ return self
35
+
36
+ async def send(self, message: Any) -> None:
37
+ await self.inbox.put(message)
38
+
39
+ async def stop(self) -> None:
40
+ if self._task is None:
41
+ return
42
+ await self.inbox.put(_STOP)
43
+ await self._task