asktheboard 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- asktheboard-0.2.0/LICENSE +21 -0
- asktheboard-0.2.0/PKG-INFO +256 -0
- asktheboard-0.2.0/README.md +231 -0
- asktheboard-0.2.0/asktheboard/__init__.py +67 -0
- asktheboard-0.2.0/asktheboard/adr.py +83 -0
- asktheboard-0.2.0/asktheboard/cli.py +198 -0
- asktheboard-0.2.0/asktheboard/convene.py +141 -0
- asktheboard-0.2.0/asktheboard/decision_types.py +87 -0
- asktheboard-0.2.0/asktheboard/http_client.py +67 -0
- asktheboard-0.2.0/asktheboard/ledger.py +73 -0
- asktheboard-0.2.0/asktheboard/llm.py +35 -0
- asktheboard-0.2.0/asktheboard/model.py +265 -0
- asktheboard-0.2.0/asktheboard/roster.py +127 -0
- asktheboard-0.2.0/asktheboard.egg-info/PKG-INFO +256 -0
- asktheboard-0.2.0/asktheboard.egg-info/SOURCES.txt +26 -0
- asktheboard-0.2.0/asktheboard.egg-info/dependency_links.txt +1 -0
- asktheboard-0.2.0/asktheboard.egg-info/entry_points.txt +2 -0
- asktheboard-0.2.0/asktheboard.egg-info/requires.txt +3 -0
- asktheboard-0.2.0/asktheboard.egg-info/top_level.txt +1 -0
- asktheboard-0.2.0/pyproject.toml +42 -0
- asktheboard-0.2.0/setup.cfg +4 -0
- asktheboard-0.2.0/tests/test_adr.py +49 -0
- asktheboard-0.2.0/tests/test_convene.py +115 -0
- asktheboard-0.2.0/tests/test_decision_types.py +35 -0
- asktheboard-0.2.0/tests/test_http_client.py +71 -0
- asktheboard-0.2.0/tests/test_ledger.py +60 -0
- asktheboard-0.2.0/tests/test_model.py +120 -0
- asktheboard-0.2.0/tests/test_roster.py +91 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Dan Ilushin
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: asktheboard
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: A board of expert personas whose every decision is a pre-registered, time-anchored, reality-graded bet. BYOK; the board that keeps score, before the fact.
|
|
5
|
+
Author-email: Dan Ilushin <dilushin@chu6a.dev>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/danilushin/asktheboard
|
|
8
|
+
Project-URL: Repository, https://github.com/danilushin/asktheboard
|
|
9
|
+
Project-URL: Issues, https://github.com/danilushin/asktheboard/issues
|
|
10
|
+
Project-URL: Changelog, https://github.com/danilushin/asktheboard/blob/main/CHANGELOG.md
|
|
11
|
+
Keywords: llm,decision,calibration,brier,adr,byok,board
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
24
|
+
Dynamic: license-file
|
|
25
|
+
|
|
26
|
+
# ask-the-board
|
|
27
|
+
|
|
28
|
+
[](https://github.com/danilushin/asktheboard/actions/workflows/ci.yml)
|
|
29
|
+
[](https://pypi.org/project/asktheboard/)
|
|
30
|
+

|
|
31
|
+
[](LICENSE)
|
|
32
|
+
|
|
33
|
+
**A board of expert personas whose every decision is a pre-registered,
|
|
34
|
+
time-anchored, reality-graded bet.** Not a chatbot that agrees with you -- a board
|
|
35
|
+
that keeps score, *before* the fact.
|
|
36
|
+
|
|
37
|
+
> Status: Phase-0 core + live convening. The **foresight engine** (data model +
|
|
38
|
+
> grading + committable ADR) and the **BYOK LLM fan-out** that *produces* a
|
|
39
|
+
> board-minute (`asktheboard.convene`, behind the `asktheboard.llm` Protocol) are
|
|
40
|
+
> both in. No provider is bundled -- you plug in your own key.
|
|
41
|
+
|
|
42
|
+
## Why this exists
|
|
43
|
+
|
|
44
|
+
Anyone can clone a "panel of AI personas" in a weekend, and a dozen have. The
|
|
45
|
+
debate mechanic is a commodity. What it leaves out is the thing that makes advice
|
|
46
|
+
worth trusting: a record of having been right *before the outcome was knowable*.
|
|
47
|
+
That record is **hard to fake** -- you can buy model outputs, but you can't
|
|
48
|
+
back-date a timestamp. It only accrues the slow way: by calling decisions in
|
|
49
|
+
advance and letting reality grade them, one resolution date at a time.
|
|
50
|
+
|
|
51
|
+
So ask-the-board records, for every decision:
|
|
52
|
+
|
|
53
|
+
1. your **stated prior** (what you believed going in),
|
|
54
|
+
2. the **per-seat dissent vector** -- each seat's stance + its own probability,
|
|
55
|
+
3. a **dated, falsifiable prediction**, anchored *before* the outcome is knowable,
|
|
56
|
+
4. on the resolution date, reality's **realized outcome**, auto-reconciled into a
|
|
57
|
+
**Brier/calibration score per seat**.
|
|
58
|
+
|
|
59
|
+
The board-minute is a **git-committable ADR**. Your git history is the external
|
|
60
|
+
attestation of the anchor timestamp. The accumulating, reality-graded record is
|
|
61
|
+
the durable asset.
|
|
62
|
+
|
|
63
|
+
## See it keep score (60s, no API key)
|
|
64
|
+
|
|
65
|
+
`create -> resolve -> score` is pure data -- no LLM, no key, no network. The
|
|
66
|
+
[`examples/`](examples/) folder holds a **real** resolved board-minute: the
|
|
67
|
+
affirming seat called it right, the dissenting `skeptic` got it wrong, and the
|
|
68
|
+
scoreboard ranks them by Brier score (lower is better).
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
# pip-installed (no repo)? paste the sample spec below. Cloned the repo?
|
|
72
|
+
# skip the heredoc and use --spec tests/sample_minute.json instead.
|
|
73
|
+
cat > sample_minute.json <<'JSON'
|
|
74
|
+
{
|
|
75
|
+
"id": "2026-01-postgres-vs-vectordb",
|
|
76
|
+
"question": "Adopt Postgres + pgvector, or a dedicated vector DB?",
|
|
77
|
+
"prior": "Leaning toward a dedicated vector DB for the embeddings workload.",
|
|
78
|
+
"decision": "Stay on Postgres + pgvector for now.",
|
|
79
|
+
"prediction": {
|
|
80
|
+
"statement": "We will NOT migrate off Postgres for vectors within 3 months.",
|
|
81
|
+
"resolution_date": "2026-04-01",
|
|
82
|
+
"board_probability": 0.75
|
|
83
|
+
},
|
|
84
|
+
"seats": [
|
|
85
|
+
{"seat": "karpathy", "stance": "affirm", "probability": 0.8, "rationale": "Boring tech; pgvector is enough at this scale."},
|
|
86
|
+
{"seat": "skeptic", "stance": "dissent", "probability": 0.35, "rationale": "Recall/latency will bite once the corpus 10x's."}
|
|
87
|
+
],
|
|
88
|
+
"created_at": "2026-01-05T10:30:00"
|
|
89
|
+
}
|
|
90
|
+
JSON
|
|
91
|
+
|
|
92
|
+
python -m asktheboard.cli create --spec sample_minute.json
|
|
93
|
+
python -m asktheboard.cli resolve --id 2026-01-postgres-vs-vectordb --outcome true
|
|
94
|
+
python -m asktheboard.cli score
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
```
|
|
98
|
+
seat n mean_brier wins losses
|
|
99
|
+
----------------------------------------------
|
|
100
|
+
karpathy 1 0.040 0 0
|
|
101
|
+
skeptic 1 0.423 0 1
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
Full walkthrough + committed artifacts: [`examples/README.md`](examples/README.md).
|
|
105
|
+
|
|
106
|
+
## BYOK (bring your own API key)
|
|
107
|
+
|
|
108
|
+
The engine ships no provider and makes no calls of its own. You supply your own
|
|
109
|
+
LLM key; you pay your own inference. The open-source core therefore costs nothing
|
|
110
|
+
to run at any scale -- the cost lives with the user, not a host. (A managed,
|
|
111
|
+
capped hosted tier -- for people who would rather not manage keys -- is the
|
|
112
|
+
separate, paid product.)
|
|
113
|
+
|
|
114
|
+
## Integrity guarantees (enforced in code)
|
|
115
|
+
|
|
116
|
+
- A prediction **cannot be pre-registered to resolve in the past** (no backfilling
|
|
117
|
+
an "old" call onto a known outcome).
|
|
118
|
+
- A minute **cannot be graded before its resolution date** -- the outcome must not
|
|
119
|
+
be knowable yet. That is what makes it *foresight*.
|
|
120
|
+
- The anchor timestamp and the prediction are **frozen** once created; grading
|
|
121
|
+
never moves them.
|
|
122
|
+
|
|
123
|
+
See `tests/test_model.py` -- these are the load-bearing tests.
|
|
124
|
+
|
|
125
|
+
## Quick start
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
python -m pytest # run the suite
|
|
129
|
+
|
|
130
|
+
# pre-register a decision (the board-minute spec is JSON)
|
|
131
|
+
python -m asktheboard.cli create --spec tests/sample_minute.json
|
|
132
|
+
|
|
133
|
+
# ... months later, on/after the resolution date, grade it against reality
|
|
134
|
+
python -m asktheboard.cli resolve --id 2026-01-postgres-vs-vectordb --outcome false
|
|
135
|
+
|
|
136
|
+
# per-seat calibration scoreboard, best-calibrated first
|
|
137
|
+
python -m asktheboard.cli score
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
`create` writes both `<id>.json` (the record) and `<id>.md` (the committable ADR)
|
|
141
|
+
into `board-minutes/`.
|
|
142
|
+
|
|
143
|
+
## Convene a board (BYOK)
|
|
144
|
+
|
|
145
|
+
`create` pre-registers a minute you wrote by hand. `convene` runs the **live LLM
|
|
146
|
+
fan-out**: every seat answers through *your* key, and the board's consensus
|
|
147
|
+
probability is the mean of the seats' calls. It ships no provider -- bring an
|
|
148
|
+
OpenAI-compatible endpoint (`HTTPLLMClient` is stdlib-only, zero dependencies).
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
from asktheboard import convene, Seat, HTTPLLMClient
|
|
152
|
+
|
|
153
|
+
minute = convene(
|
|
154
|
+
id="pgvector-scale",
|
|
155
|
+
question="Will pgvector hold our scale, or do we need a dedicated vector DB?",
|
|
156
|
+
prior="leaning postgres to avoid a new service",
|
|
157
|
+
decision="stay on postgres + pgvector",
|
|
158
|
+
statement="pgvector serves p95<150ms at 50M embeddings without a dedicated DB",
|
|
159
|
+
seats=[Seat("karpathy", "ML researcher"), Seat("skeptic", "find the failure mode")],
|
|
160
|
+
client=HTTPLLMClient(model="gpt-4o-mini"), # reads OPENAI_API_KEY
|
|
161
|
+
decision_type="library", # -> 90-day resolution horizon
|
|
162
|
+
)
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
Or from the CLI (key in `OPENAI_API_KEY`):
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
python -m asktheboard.cli convene --spec convene.json --model gpt-4o-mini
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
Any OpenAI-compatible API works -- point `--base-url` (or `HTTPLLMClient(base_url=...)`)
|
|
172
|
+
at OpenRouter, Together, or a local server. The engine still makes no calls of its
|
|
173
|
+
own; it only ever speaks through the client you pass.
|
|
174
|
+
|
|
175
|
+
### Bundled roster -- seat a board by name
|
|
176
|
+
|
|
177
|
+
You can always hand-write `Seat(name, persona)`. But a sensible default board ships
|
|
178
|
+
in the box: a curated set of *role archetypes* (the architect, the skeptic, the
|
|
179
|
+
operator -- functions, not impersonations of real people) and a few named panels,
|
|
180
|
+
so seating one is a single lookup.
|
|
181
|
+
|
|
182
|
+
```python
|
|
183
|
+
from asktheboard import convene, panel, seats, HTTPLLMClient
|
|
184
|
+
|
|
185
|
+
minute = convene(
|
|
186
|
+
id="pgvector-scale",
|
|
187
|
+
question="Will pgvector hold our scale, or do we need a dedicated vector DB?",
|
|
188
|
+
prior="leaning postgres",
|
|
189
|
+
decision="stay on postgres + pgvector",
|
|
190
|
+
statement="pgvector serves p95<150ms at 50M embeddings without a dedicated DB",
|
|
191
|
+
seats=panel("tech"), # architect + skeptic + pragmatist
|
|
192
|
+
# seats=seats(["architect", "operator", "skeptic"]), # or pick your own
|
|
193
|
+
client=HTTPLLMClient(model="gpt-4o-mini"),
|
|
194
|
+
decision_type="library",
|
|
195
|
+
)
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
From the CLI, pass `--panel` or `--seats` instead of putting seats in the spec:
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
python -m asktheboard.cli roster # list seats + panels
|
|
202
|
+
python -m asktheboard.cli convene --spec d.json --model gpt-4o-mini --panel tech
|
|
203
|
+
python -m asktheboard.cli convene --spec d.json --model gpt-4o-mini --seats architect,skeptic
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
| seat | voice |
|
|
207
|
+
|---|---|
|
|
208
|
+
| `architect` | shape, maintenance cost, what breaks at scale, build-vs-buy |
|
|
209
|
+
| `skeptic` | forced dissent -- the most likely failure first, then the deeper objection |
|
|
210
|
+
| `pragmatist` | simplest thing that ships; YAGNI; opportunity cost |
|
|
211
|
+
| `researcher` | what the data actually says; base rate before anecdote |
|
|
212
|
+
| `operator` | run cost, failure budget, who gets paged at 3am |
|
|
213
|
+
| `strategist` | base rates, second-order effects, one-way vs reversible doors |
|
|
214
|
+
|
|
215
|
+
Panels: `tech` (architect/skeptic/pragmatist), `decision` (strategist/skeptic/researcher),
|
|
216
|
+
`ops` (operator/architect/skeptic), `default` (architect/skeptic/pragmatist/strategist).
|
|
217
|
+
`skeptic` sits on every panel by design -- a board with no dissent keeps no honest score.
|
|
218
|
+
|
|
219
|
+
### Decision types -> default horizons
|
|
220
|
+
|
|
221
|
+
A minute is only foresight if it has a date by which reality can grade it.
|
|
222
|
+
`decision_type` picks a sensible default horizon so the common case is one lookup
|
|
223
|
+
(and a 5-year horizon on a library swap stands out as dishonest):
|
|
224
|
+
|
|
225
|
+
| type | horizon | when |
|
|
226
|
+
|---|---|---|
|
|
227
|
+
| `library` | 90d | adopt/swap/drop a dependency |
|
|
228
|
+
| `migration` | 180d | move a datastore, platform, or pipeline |
|
|
229
|
+
| `architecture` | 365d | a structural design bet you live with |
|
|
230
|
+
|
|
231
|
+
Short-latency first on purpose: a fresh board earns a track record on fast `library`
|
|
232
|
+
calls before anyone trusts its slow `architecture` bets. Pass an explicit
|
|
233
|
+
`resolution_date=` to override.
|
|
234
|
+
|
|
235
|
+
## A contrarian win
|
|
236
|
+
|
|
237
|
+
When a seat **dissents** from the board and turns out more right than the
|
|
238
|
+
consensus, that is a *contrarian win* -- the gold the public scoreboard is built
|
|
239
|
+
from. The board changed (or should have changed) its mind, and reality later
|
|
240
|
+
stamped the dissenter vindicated.
|
|
241
|
+
|
|
242
|
+
## Stability
|
|
243
|
+
|
|
244
|
+
The public API is **`0.x` / unstable**. The `LLMClient` / `HTTPLLMClient` surface
|
|
245
|
+
and the board-minute JSON schema may change before `1.0` -- pin a version if you
|
|
246
|
+
depend on them.
|
|
247
|
+
|
|
248
|
+
## Built with
|
|
249
|
+
|
|
250
|
+
Built by [Dan Ilushin](https://github.com/danilushin) with Claude (Anthropic) in
|
|
251
|
+
the loop. Contributions welcome -- see [CONTRIBUTING.md](CONTRIBUTING.md)
|
|
252
|
+
(DCO sign-off) and [SECURITY.md](SECURITY.md).
|
|
253
|
+
|
|
254
|
+
## License
|
|
255
|
+
|
|
256
|
+
MIT. (c) 2026 Dan Ilushin.
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
# ask-the-board
|
|
2
|
+
|
|
3
|
+
[](https://github.com/danilushin/asktheboard/actions/workflows/ci.yml)
|
|
4
|
+
[](https://pypi.org/project/asktheboard/)
|
|
5
|
+

|
|
6
|
+
[](LICENSE)
|
|
7
|
+
|
|
8
|
+
**A board of expert personas whose every decision is a pre-registered,
|
|
9
|
+
time-anchored, reality-graded bet.** Not a chatbot that agrees with you -- a board
|
|
10
|
+
that keeps score, *before* the fact.
|
|
11
|
+
|
|
12
|
+
> Status: Phase-0 core + live convening. The **foresight engine** (data model +
|
|
13
|
+
> grading + committable ADR) and the **BYOK LLM fan-out** that *produces* a
|
|
14
|
+
> board-minute (`asktheboard.convene`, behind the `asktheboard.llm` Protocol) are
|
|
15
|
+
> both in. No provider is bundled -- you plug in your own key.
|
|
16
|
+
|
|
17
|
+
## Why this exists
|
|
18
|
+
|
|
19
|
+
Anyone can clone a "panel of AI personas" in a weekend, and a dozen have. The
|
|
20
|
+
debate mechanic is a commodity. What it leaves out is the thing that makes advice
|
|
21
|
+
worth trusting: a record of having been right *before the outcome was knowable*.
|
|
22
|
+
That record is **hard to fake** -- you can buy model outputs, but you can't
|
|
23
|
+
back-date a timestamp. It only accrues the slow way: by calling decisions in
|
|
24
|
+
advance and letting reality grade them, one resolution date at a time.
|
|
25
|
+
|
|
26
|
+
So ask-the-board records, for every decision:
|
|
27
|
+
|
|
28
|
+
1. your **stated prior** (what you believed going in),
|
|
29
|
+
2. the **per-seat dissent vector** -- each seat's stance + its own probability,
|
|
30
|
+
3. a **dated, falsifiable prediction**, anchored *before* the outcome is knowable,
|
|
31
|
+
4. on the resolution date, reality's **realized outcome**, auto-reconciled into a
|
|
32
|
+
**Brier/calibration score per seat**.
|
|
33
|
+
|
|
34
|
+
The board-minute is a **git-committable ADR**. Your git history is the external
|
|
35
|
+
attestation of the anchor timestamp. The accumulating, reality-graded record is
|
|
36
|
+
the durable asset.
|
|
37
|
+
|
|
38
|
+
## See it keep score (60s, no API key)
|
|
39
|
+
|
|
40
|
+
`create -> resolve -> score` is pure data -- no LLM, no key, no network. The
|
|
41
|
+
[`examples/`](examples/) folder holds a **real** resolved board-minute: the
|
|
42
|
+
affirming seat called it right, the dissenting `skeptic` got it wrong, and the
|
|
43
|
+
scoreboard ranks them by Brier score (lower is better).
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
# pip-installed (no repo)? paste the sample spec below. Cloned the repo?
|
|
47
|
+
# skip the heredoc and use --spec tests/sample_minute.json instead.
|
|
48
|
+
cat > sample_minute.json <<'JSON'
|
|
49
|
+
{
|
|
50
|
+
"id": "2026-01-postgres-vs-vectordb",
|
|
51
|
+
"question": "Adopt Postgres + pgvector, or a dedicated vector DB?",
|
|
52
|
+
"prior": "Leaning toward a dedicated vector DB for the embeddings workload.",
|
|
53
|
+
"decision": "Stay on Postgres + pgvector for now.",
|
|
54
|
+
"prediction": {
|
|
55
|
+
"statement": "We will NOT migrate off Postgres for vectors within 3 months.",
|
|
56
|
+
"resolution_date": "2026-04-01",
|
|
57
|
+
"board_probability": 0.75
|
|
58
|
+
},
|
|
59
|
+
"seats": [
|
|
60
|
+
{"seat": "karpathy", "stance": "affirm", "probability": 0.8, "rationale": "Boring tech; pgvector is enough at this scale."},
|
|
61
|
+
{"seat": "skeptic", "stance": "dissent", "probability": 0.35, "rationale": "Recall/latency will bite once the corpus 10x's."}
|
|
62
|
+
],
|
|
63
|
+
"created_at": "2026-01-05T10:30:00"
|
|
64
|
+
}
|
|
65
|
+
JSON
|
|
66
|
+
|
|
67
|
+
python -m asktheboard.cli create --spec sample_minute.json
|
|
68
|
+
python -m asktheboard.cli resolve --id 2026-01-postgres-vs-vectordb --outcome true
|
|
69
|
+
python -m asktheboard.cli score
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
```
|
|
73
|
+
seat n mean_brier wins losses
|
|
74
|
+
----------------------------------------------
|
|
75
|
+
karpathy 1 0.040 0 0
|
|
76
|
+
skeptic 1 0.423 0 1
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Full walkthrough + committed artifacts: [`examples/README.md`](examples/README.md).
|
|
80
|
+
|
|
81
|
+
## BYOK (bring your own API key)
|
|
82
|
+
|
|
83
|
+
The engine ships no provider and makes no calls of its own. You supply your own
|
|
84
|
+
LLM key; you pay your own inference. The open-source core therefore costs nothing
|
|
85
|
+
to run at any scale -- the cost lives with the user, not a host. (A managed,
|
|
86
|
+
capped hosted tier -- for people who would rather not manage keys -- is the
|
|
87
|
+
separate, paid product.)
|
|
88
|
+
|
|
89
|
+
## Integrity guarantees (enforced in code)
|
|
90
|
+
|
|
91
|
+
- A prediction **cannot be pre-registered to resolve in the past** (no backfilling
|
|
92
|
+
an "old" call onto a known outcome).
|
|
93
|
+
- A minute **cannot be graded before its resolution date** -- the outcome must not
|
|
94
|
+
be knowable yet. That is what makes it *foresight*.
|
|
95
|
+
- The anchor timestamp and the prediction are **frozen** once created; grading
|
|
96
|
+
never moves them.
|
|
97
|
+
|
|
98
|
+
See `tests/test_model.py` -- these are the load-bearing tests.
|
|
99
|
+
|
|
100
|
+
## Quick start
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
python -m pytest # run the suite
|
|
104
|
+
|
|
105
|
+
# pre-register a decision (the board-minute spec is JSON)
|
|
106
|
+
python -m asktheboard.cli create --spec tests/sample_minute.json
|
|
107
|
+
|
|
108
|
+
# ... months later, on/after the resolution date, grade it against reality
|
|
109
|
+
python -m asktheboard.cli resolve --id 2026-01-postgres-vs-vectordb --outcome false
|
|
110
|
+
|
|
111
|
+
# per-seat calibration scoreboard, best-calibrated first
|
|
112
|
+
python -m asktheboard.cli score
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
`create` writes both `<id>.json` (the record) and `<id>.md` (the committable ADR)
|
|
116
|
+
into `board-minutes/`.
|
|
117
|
+
|
|
118
|
+
## Convene a board (BYOK)
|
|
119
|
+
|
|
120
|
+
`create` pre-registers a minute you wrote by hand. `convene` runs the **live LLM
|
|
121
|
+
fan-out**: every seat answers through *your* key, and the board's consensus
|
|
122
|
+
probability is the mean of the seats' calls. It ships no provider -- bring an
|
|
123
|
+
OpenAI-compatible endpoint (`HTTPLLMClient` is stdlib-only, zero dependencies).
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
from asktheboard import convene, Seat, HTTPLLMClient
|
|
127
|
+
|
|
128
|
+
minute = convene(
|
|
129
|
+
id="pgvector-scale",
|
|
130
|
+
question="Will pgvector hold our scale, or do we need a dedicated vector DB?",
|
|
131
|
+
prior="leaning postgres to avoid a new service",
|
|
132
|
+
decision="stay on postgres + pgvector",
|
|
133
|
+
statement="pgvector serves p95<150ms at 50M embeddings without a dedicated DB",
|
|
134
|
+
seats=[Seat("karpathy", "ML researcher"), Seat("skeptic", "find the failure mode")],
|
|
135
|
+
client=HTTPLLMClient(model="gpt-4o-mini"), # reads OPENAI_API_KEY
|
|
136
|
+
decision_type="library", # -> 90-day resolution horizon
|
|
137
|
+
)
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
Or from the CLI (key in `OPENAI_API_KEY`):
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
python -m asktheboard.cli convene --spec convene.json --model gpt-4o-mini
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
Any OpenAI-compatible API works -- point `--base-url` (or `HTTPLLMClient(base_url=...)`)
|
|
147
|
+
at OpenRouter, Together, or a local server. The engine still makes no calls of its
|
|
148
|
+
own; it only ever speaks through the client you pass.
|
|
149
|
+
|
|
150
|
+
### Bundled roster -- seat a board by name
|
|
151
|
+
|
|
152
|
+
You can always hand-write `Seat(name, persona)`. But a sensible default board ships
|
|
153
|
+
in the box: a curated set of *role archetypes* (the architect, the skeptic, the
|
|
154
|
+
operator -- functions, not impersonations of real people) and a few named panels,
|
|
155
|
+
so seating one is a single lookup.
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
from asktheboard import convene, panel, seats, HTTPLLMClient
|
|
159
|
+
|
|
160
|
+
minute = convene(
|
|
161
|
+
id="pgvector-scale",
|
|
162
|
+
question="Will pgvector hold our scale, or do we need a dedicated vector DB?",
|
|
163
|
+
prior="leaning postgres",
|
|
164
|
+
decision="stay on postgres + pgvector",
|
|
165
|
+
statement="pgvector serves p95<150ms at 50M embeddings without a dedicated DB",
|
|
166
|
+
seats=panel("tech"), # architect + skeptic + pragmatist
|
|
167
|
+
# seats=seats(["architect", "operator", "skeptic"]), # or pick your own
|
|
168
|
+
client=HTTPLLMClient(model="gpt-4o-mini"),
|
|
169
|
+
decision_type="library",
|
|
170
|
+
)
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
From the CLI, pass `--panel` or `--seats` instead of putting seats in the spec:
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
python -m asktheboard.cli roster # list seats + panels
|
|
177
|
+
python -m asktheboard.cli convene --spec d.json --model gpt-4o-mini --panel tech
|
|
178
|
+
python -m asktheboard.cli convene --spec d.json --model gpt-4o-mini --seats architect,skeptic
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
| seat | voice |
|
|
182
|
+
|---|---|
|
|
183
|
+
| `architect` | shape, maintenance cost, what breaks at scale, build-vs-buy |
|
|
184
|
+
| `skeptic` | forced dissent -- the most likely failure first, then the deeper objection |
|
|
185
|
+
| `pragmatist` | simplest thing that ships; YAGNI; opportunity cost |
|
|
186
|
+
| `researcher` | what the data actually says; base rate before anecdote |
|
|
187
|
+
| `operator` | run cost, failure budget, who gets paged at 3am |
|
|
188
|
+
| `strategist` | base rates, second-order effects, one-way vs reversible doors |
|
|
189
|
+
|
|
190
|
+
Panels: `tech` (architect/skeptic/pragmatist), `decision` (strategist/skeptic/researcher),
|
|
191
|
+
`ops` (operator/architect/skeptic), `default` (architect/skeptic/pragmatist/strategist).
|
|
192
|
+
`skeptic` sits on every panel by design -- a board with no dissent keeps no honest score.
|
|
193
|
+
|
|
194
|
+
### Decision types -> default horizons
|
|
195
|
+
|
|
196
|
+
A minute is only foresight if it has a date by which reality can grade it.
|
|
197
|
+
`decision_type` picks a sensible default horizon so the common case is one lookup
|
|
198
|
+
(and a 5-year horizon on a library swap stands out as dishonest):
|
|
199
|
+
|
|
200
|
+
| type | horizon | when |
|
|
201
|
+
|---|---|---|
|
|
202
|
+
| `library` | 90d | adopt/swap/drop a dependency |
|
|
203
|
+
| `migration` | 180d | move a datastore, platform, or pipeline |
|
|
204
|
+
| `architecture` | 365d | a structural design bet you live with |
|
|
205
|
+
|
|
206
|
+
Short-latency first on purpose: a fresh board earns a track record on fast `library`
|
|
207
|
+
calls before anyone trusts its slow `architecture` bets. Pass an explicit
|
|
208
|
+
`resolution_date=` to override.
|
|
209
|
+
|
|
210
|
+
## A contrarian win
|
|
211
|
+
|
|
212
|
+
When a seat **dissents** from the board and turns out more right than the
|
|
213
|
+
consensus, that is a *contrarian win* -- the gold the public scoreboard is built
|
|
214
|
+
from. The board changed (or should have changed) its mind, and reality later
|
|
215
|
+
stamped the dissenter vindicated.
|
|
216
|
+
|
|
217
|
+
## Stability
|
|
218
|
+
|
|
219
|
+
The public API is **`0.x` / unstable**. The `LLMClient` / `HTTPLLMClient` surface
|
|
220
|
+
and the board-minute JSON schema may change before `1.0` -- pin a version if you
|
|
221
|
+
depend on them.
|
|
222
|
+
|
|
223
|
+
## Built with
|
|
224
|
+
|
|
225
|
+
Built by [Dan Ilushin](https://github.com/danilushin) with Claude (Anthropic) in
|
|
226
|
+
the loop. Contributions welcome -- see [CONTRIBUTING.md](CONTRIBUTING.md)
|
|
227
|
+
(DCO sign-off) and [SECURITY.md](SECURITY.md).
|
|
228
|
+
|
|
229
|
+
## License
|
|
230
|
+
|
|
231
|
+
MIT. (c) 2026 Dan Ilushin.
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""ask-the-board (OSS) -- a board of expert personas whose every decision is a
|
|
2
|
+
pre-registered, time-anchored, reality-graded bet.
|
|
3
|
+
|
|
4
|
+
BYOK (bring your own API key): you supply your own LLM key, you pay your own
|
|
5
|
+
inference, the engine costs nothing to run at any scale. The durable asset is the
|
|
6
|
+
accumulating, externally attested track record -- the board that keeps score,
|
|
7
|
+
before the fact.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from .model import (
|
|
11
|
+
BoardMinute,
|
|
12
|
+
IntegrityError,
|
|
13
|
+
Prediction,
|
|
14
|
+
Resolution,
|
|
15
|
+
SeatCall,
|
|
16
|
+
brier,
|
|
17
|
+
)
|
|
18
|
+
from .ledger import Ledger, SeatScore
|
|
19
|
+
from .llm import LLMClient, NoProviderConfigured, require_client
|
|
20
|
+
from .convene import ConveneError, Seat, convene
|
|
21
|
+
from .http_client import HTTPLLMClient
|
|
22
|
+
from .decision_types import (
|
|
23
|
+
CATALOG,
|
|
24
|
+
DecisionType,
|
|
25
|
+
UnknownDecisionType,
|
|
26
|
+
resolution_date_for,
|
|
27
|
+
)
|
|
28
|
+
from .roster import (
|
|
29
|
+
UnknownPanel,
|
|
30
|
+
UnknownSeat,
|
|
31
|
+
panel,
|
|
32
|
+
panel_names,
|
|
33
|
+
seat,
|
|
34
|
+
seat_slugs,
|
|
35
|
+
seats,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
__all__ = [
|
|
39
|
+
"BoardMinute",
|
|
40
|
+
"Prediction",
|
|
41
|
+
"Resolution",
|
|
42
|
+
"SeatCall",
|
|
43
|
+
"IntegrityError",
|
|
44
|
+
"brier",
|
|
45
|
+
"Ledger",
|
|
46
|
+
"SeatScore",
|
|
47
|
+
"LLMClient",
|
|
48
|
+
"NoProviderConfigured",
|
|
49
|
+
"require_client",
|
|
50
|
+
"Seat",
|
|
51
|
+
"convene",
|
|
52
|
+
"ConveneError",
|
|
53
|
+
"HTTPLLMClient",
|
|
54
|
+
"DecisionType",
|
|
55
|
+
"CATALOG",
|
|
56
|
+
"UnknownDecisionType",
|
|
57
|
+
"resolution_date_for",
|
|
58
|
+
"seat",
|
|
59
|
+
"seats",
|
|
60
|
+
"panel",
|
|
61
|
+
"seat_slugs",
|
|
62
|
+
"panel_names",
|
|
63
|
+
"UnknownSeat",
|
|
64
|
+
"UnknownPanel",
|
|
65
|
+
]
|
|
66
|
+
|
|
67
|
+
__version__ = "0.2.0"
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Render a BoardMinute as a git-committable Architecture Decision Record.
|
|
2
|
+
|
|
3
|
+
The board-minute IS the artifact: a markdown ADR a user commits to their own
|
|
4
|
+
repo. Git history then provides the external attestation of the anchor timestamp
|
|
5
|
+
-- the thing a funded competitor cannot retroactively manufacture.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .model import BoardMinute
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _pct(p: float) -> str:
|
|
12
|
+
return f"{round(p * 100)}%"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def render_adr(minute: BoardMinute) -> str:
|
|
16
|
+
m = minute
|
|
17
|
+
if m.resolution is None:
|
|
18
|
+
status = f"Pre-registered (resolves {m.prediction.resolution_date.isoformat()})"
|
|
19
|
+
else:
|
|
20
|
+
verdict = "VINDICATED" if m.vindicated else "REFUTED"
|
|
21
|
+
status = f"Resolved {m.resolution.resolved_at.date().isoformat()} -- {verdict}"
|
|
22
|
+
|
|
23
|
+
lines: list[str] = []
|
|
24
|
+
lines.append(f"# ADR-{m.id}: {m.question}")
|
|
25
|
+
lines.append("")
|
|
26
|
+
lines.append(f"- **Status:** {status}")
|
|
27
|
+
lines.append(f"- **Anchored:** {m.created_at.isoformat()}")
|
|
28
|
+
lines.append(f"- **Resolution date:** {m.prediction.resolution_date.isoformat()}")
|
|
29
|
+
lines.append("")
|
|
30
|
+
lines.append("## Context (stated prior)")
|
|
31
|
+
lines.append("")
|
|
32
|
+
lines.append(m.prior.strip() or "_none recorded_")
|
|
33
|
+
lines.append("")
|
|
34
|
+
lines.append("## Decision")
|
|
35
|
+
lines.append("")
|
|
36
|
+
lines.append(m.decision.strip() or "_none recorded_")
|
|
37
|
+
lines.append("")
|
|
38
|
+
lines.append("## Pre-registered prediction")
|
|
39
|
+
lines.append("")
|
|
40
|
+
lines.append(f"> {m.prediction.statement.strip()}")
|
|
41
|
+
lines.append("")
|
|
42
|
+
lines.append(f"- **Board confidence:** {_pct(m.prediction.board_probability)} that this resolves TRUE")
|
|
43
|
+
lines.append(f"- **Resolves:** {m.prediction.resolution_date.isoformat()}")
|
|
44
|
+
lines.append("")
|
|
45
|
+
lines.append("## Board seats (dissent vector)")
|
|
46
|
+
lines.append("")
|
|
47
|
+
if m.seats:
|
|
48
|
+
resolved = m.resolution is not None
|
|
49
|
+
header = "| Seat | Stance | P(true) | "
|
|
50
|
+
sep = "|---|---|---|"
|
|
51
|
+
if resolved:
|
|
52
|
+
header += "Brier | "
|
|
53
|
+
sep += "---|"
|
|
54
|
+
header += "Rationale |"
|
|
55
|
+
sep += "---|"
|
|
56
|
+
lines.append(header)
|
|
57
|
+
lines.append(sep)
|
|
58
|
+
briers = m.seat_briers()
|
|
59
|
+
for s in m.seats:
|
|
60
|
+
row = f"| {s.seat} | {s.stance} | {_pct(s.probability)} | "
|
|
61
|
+
if resolved:
|
|
62
|
+
row += f"{briers[s.seat]:.3f} | "
|
|
63
|
+
row += f"{s.rationale.strip().replace(chr(10), ' ')} |"
|
|
64
|
+
lines.append(row)
|
|
65
|
+
else:
|
|
66
|
+
lines.append("_no seats recorded_")
|
|
67
|
+
lines.append("")
|
|
68
|
+
|
|
69
|
+
if m.resolution is not None:
|
|
70
|
+
o = "TRUE" if m.resolution.realized_outcome else "FALSE"
|
|
71
|
+
lines.append("## Resolution")
|
|
72
|
+
lines.append("")
|
|
73
|
+
lines.append(f"- **Realized outcome:** {o}")
|
|
74
|
+
lines.append(f"- **Board Brier:** {m.board_brier():.3f} (lower is better)")
|
|
75
|
+
winners = m.contrarian_winners()
|
|
76
|
+
if winners:
|
|
77
|
+
lines.append(f"- **Contrarian wins:** {', '.join(winners)} "
|
|
78
|
+
f"(dissented and beat the consensus)")
|
|
79
|
+
if m.resolution.note.strip():
|
|
80
|
+
lines.append(f"- **Note:** {m.resolution.note.strip()}")
|
|
81
|
+
lines.append("")
|
|
82
|
+
|
|
83
|
+
return "\n".join(lines).rstrip() + "\n"
|