probability-flow 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- probability_flow-0.1.0/.gitignore +8 -0
- probability_flow-0.1.0/LICENSE +21 -0
- probability_flow-0.1.0/PKG-INFO +304 -0
- probability_flow-0.1.0/README.md +270 -0
- probability_flow-0.1.0/probability_flow/__init__.py +41 -0
- probability_flow-0.1.0/probability_flow/aspic/__init__.py +21 -0
- probability_flow-0.1.0/probability_flow/aspic/argument.py +182 -0
- probability_flow-0.1.0/probability_flow/aspic/calibrate.py +178 -0
- probability_flow-0.1.0/probability_flow/aspic/compile.py +175 -0
- probability_flow-0.1.0/probability_flow/aspic/generate.py +397 -0
- probability_flow-0.1.0/probability_flow/aspic/handle.py +281 -0
- probability_flow-0.1.0/probability_flow/aspic/visualization.py +128 -0
- probability_flow-0.1.0/probability_flow/core/__init__.py +29 -0
- probability_flow-0.1.0/probability_flow/core/_logmath.py +40 -0
- probability_flow-0.1.0/probability_flow/core/bp/__init__.py +5 -0
- probability_flow-0.1.0/probability_flow/core/bp/engine.py +198 -0
- probability_flow-0.1.0/probability_flow/core/bp/message.py +30 -0
- probability_flow-0.1.0/probability_flow/core/cpd/__init__.py +13 -0
- probability_flow-0.1.0/probability_flow/core/cpd/base.py +84 -0
- probability_flow-0.1.0/probability_flow/core/cpd/independent_evidence.py +155 -0
- probability_flow-0.1.0/probability_flow/core/cpd/noisy_and.py +113 -0
- probability_flow-0.1.0/probability_flow/core/cpd/noisy_or.py +109 -0
- probability_flow-0.1.0/probability_flow/core/cpd/tabular.py +111 -0
- probability_flow-0.1.0/probability_flow/core/exact.py +67 -0
- probability_flow-0.1.0/probability_flow/core/network.py +111 -0
- probability_flow-0.1.0/probability_flow/core/node.py +125 -0
- probability_flow-0.1.0/probability_flow/metrics/__init__.py +64 -0
- probability_flow-0.1.0/probability_flow/metrics/_util.py +42 -0
- probability_flow-0.1.0/probability_flow/metrics/difficulty.py +87 -0
- probability_flow-0.1.0/probability_flow/metrics/dseparation.py +83 -0
- probability_flow-0.1.0/probability_flow/metrics/loopiness.py +82 -0
- probability_flow-0.1.0/probability_flow/metrics/manipulability.py +207 -0
- probability_flow-0.1.0/probability_flow/metrics/structure.py +49 -0
- probability_flow-0.1.0/probability_flow/py.typed +0 -0
- probability_flow-0.1.0/probability_flow/visualization/__init__.py +11 -0
- probability_flow-0.1.0/probability_flow/visualization/image.py +402 -0
- probability_flow-0.1.0/probability_flow/visualization/style.py +58 -0
- probability_flow-0.1.0/pyproject.toml +72 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Alex Roman
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: probability-flow
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A from-scratch, modular discrete Bayesian-network library.
|
|
5
|
+
Project-URL: Homepage, https://github.com/scalable-oversight-benchmarks/probability-flow
|
|
6
|
+
Project-URL: Repository, https://github.com/scalable-oversight-benchmarks/probability-flow
|
|
7
|
+
Project-URL: Issues, https://github.com/scalable-oversight-benchmarks/probability-flow/issues
|
|
8
|
+
Author-email: Alex Roman <dr.alexroman@gmail.com>
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: argumentation,aspic,bayesian-network,belief-propagation,inference,probabilistic-graphical-models
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
22
|
+
Classifier: Typing :: Typed
|
|
23
|
+
Requires-Python: >=3.11
|
|
24
|
+
Requires-Dist: numpy
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: matplotlib; extra == 'dev'
|
|
27
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
28
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
29
|
+
Provides-Extra: jax
|
|
30
|
+
Requires-Dist: jax; extra == 'jax'
|
|
31
|
+
Provides-Extra: viz
|
|
32
|
+
Requires-Dist: matplotlib; extra == 'viz'
|
|
33
|
+
Description-Content-Type: text/markdown
|
|
34
|
+
|
|
35
|
+
# probability-flow
|
|
36
|
+
|
|
37
|
+
A small discrete Bayesian-network library written from scratch in NumPy, with no
|
|
38
|
+
external inference dependency. The goal is full control over and understanding of
|
|
39
|
+
the inference stack rather than a black box: every distribution and every solver
|
|
40
|
+
is plain, readable code that can be checked against a brute-force reference.
|
|
41
|
+
|
|
42
|
+
On top of that core it carries an **argumentation layer (ASPIC)** that compiles
|
|
43
|
+
typed argument graphs into networks, a domain-free **metrics** seam that measures
|
|
44
|
+
a compiled network, and a **generator** that samples random argument graphs with
|
|
45
|
+
controllable difficulty — the substrate for a benchmark of debate scenarios with
|
|
46
|
+
known ground truth. Two further capabilities are opt-in extras: matplotlib
|
|
47
|
+
**renderers** (`[viz]`) and JAX **posterior calibration and sensitivities**
|
|
48
|
+
(`[jax]`).
|
|
49
|
+
|
|
50
|
+
The whole stack is layered and decoupled: `core/` knows nothing about arguments,
|
|
51
|
+
`metrics/` knows nothing about the domain, and the optional extras pull in nothing
|
|
52
|
+
unless you ask for them.
|
|
53
|
+
|
|
54
|
+
## Installing
|
|
55
|
+
|
|
56
|
+
```
|
|
57
|
+
pip install probability-flow # core: NumPy only
|
|
58
|
+
pip install 'probability-flow[viz]' # + matplotlib renderers
|
|
59
|
+
pip install 'probability-flow[jax]' # + JAX calibration / sensitivities
|
|
60
|
+
pip install 'probability-flow[viz,jax]' # both
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Python 3.11 or newer. The import name is `probability_flow`; the base install
|
|
64
|
+
depends only on **NumPy** — matplotlib and JAX are optional extras, pulled in only
|
|
65
|
+
by `[viz]` and `[jax]`. Calling a renderer or the calibration module without its
|
|
66
|
+
extra raises an `ImportError` telling you which one to install. `[dev]` adds pytest
|
|
67
|
+
and ruff.
|
|
68
|
+
|
|
69
|
+
From a checkout, the package also runs in place with no install step: the
|
|
70
|
+
importable package is the inner `probability_flow/` directory, so working from the
|
|
71
|
+
repository root (which pytest puts on the path) makes `import probability_flow`
|
|
72
|
+
resolve. Run the tests from this directory with `pytest`. The example notebooks add
|
|
73
|
+
the path themselves, so they run as-is. CI runs ruff and the test suite on Python
|
|
74
|
+
3.11–3.13 on every push.
|
|
75
|
+
|
|
76
|
+
## Quickstart
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
from probability_flow import Node, ExactSolver, LoopySolver
|
|
80
|
+
|
|
81
|
+
guilty = Node("Guilty") # the target; prior 0.5 by default
|
|
82
|
+
guilty.add_input(Node("Seen at scene"), lr=9) # supporting evidence
|
|
83
|
+
guilty.add_input(Node("Alibi"), lr=0.25) # evidence against (lr < 1)
|
|
84
|
+
|
|
85
|
+
bn = guilty.compile()
|
|
86
|
+
ExactSolver(bn).prob(guilty, 1) # 0.5731, by enumerating the joint
|
|
87
|
+
LoopySolver(bn).prob(guilty, 1) # 0.5731, by message passing
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Build, compile, solve
|
|
91
|
+
|
|
92
|
+
Inference happens in three stages.
|
|
93
|
+
|
|
94
|
+
1. **Build.** You assemble a mutable graph of `Node` objects. `add_input` wires
|
|
95
|
+
one node as an input of another and returns the input, so upstream structure
|
|
96
|
+
can be built inline. It takes a single node or a list, and a per-edge weight
|
|
97
|
+
that is forwarded to the node's distribution.
|
|
98
|
+
2. **Compile.** `target.compile()` walks the graph from the target, checks it
|
|
99
|
+
(acyclic, consistent shapes, one distribution per node), assigns a
|
|
100
|
+
topological order, and freezes everything into an immutable
|
|
101
|
+
`BayesianNetwork`. Authoring is ergonomic; the compiled form is vectorized.
|
|
102
|
+
3. **Solve.** A solver answers `marginal(node)` and `prob(node, state)` queries
|
|
103
|
+
on the compiled network, optionally conditioned on `evidence`.
|
|
104
|
+
|
|
105
|
+
An edge `Y -> X` declares only that X's distribution is `P(X | Y)`. It is a
|
|
106
|
+
conditioning relationship, not a claim about causation. A causal model points its
|
|
107
|
+
edges with causation (cause to effect); an argument graph points them against it
|
|
108
|
+
(evidence to claim, since the claim is what produces the evidence). The library
|
|
109
|
+
does not care which: it only ever works with `P(node | inputs)`.
|
|
110
|
+
|
|
111
|
+
## Distributions
|
|
112
|
+
|
|
113
|
+
Each node carries a conditional probability distribution (CPD) describing
|
|
114
|
+
`P(node | its inputs)`. The library distinguishes the combination *rule* (the
|
|
115
|
+
combiner) from the *object* that implements it (the CPD).
|
|
116
|
+
|
|
117
|
+
- **IndependentEvidenceCPD** (the default). Treats inputs as uncorrelated
|
|
118
|
+
sources of evidence: `logit P(node=1) = logit(prior) + sum of log(lr)` over the
|
|
119
|
+
active inputs. Adding weights of evidence is Bayes' rule for independent
|
|
120
|
+
likelihood ratios. Set per edge with `add_input(x, lr=...)`.
|
|
121
|
+
- **NoisyOrCPD**. "Any one cause can fire the effect":
|
|
122
|
+
`P(node=0) = (1 - leak) * product of (1 - activation)` over present causes.
|
|
123
|
+
Declared with `node.noisy_or(leak=...)` and `add_input(cause, activation=...)`.
|
|
124
|
+
- **NoisyAndCPD**. A native conjunction: the effect needs every condition
|
|
125
|
+
present, each succeeding with its own `activation`, with `leak` acting as a
|
|
126
|
+
global inhibitor. Declared with `node.noisy_and(leak=...)`.
|
|
127
|
+
- **TabularCPD**. An arbitrary dense table, the fallback and the reference behind
|
|
128
|
+
every other distribution. Set with `node.set_cpd(TabularCPD(...))`. Its `repr`
|
|
129
|
+
prints the full table as a readable bordered grid.
|
|
130
|
+
|
|
131
|
+
`IndependentEvidence`, `NoisyOr`, and `NoisyAnd` all belong to the same family:
|
|
132
|
+
each input contributes independently, and they differ only in how the
|
|
133
|
+
contributions combine.
|
|
134
|
+
|
|
135
|
+
## Inference
|
|
136
|
+
|
|
137
|
+
- **ExactSolver**. Brute-force enumeration of the joint distribution. Obviously
|
|
138
|
+
correct, exponential in the number of nodes, and intended for small graphs and
|
|
139
|
+
as the ground truth that everything else is tested against. Supports evidence,
|
|
140
|
+
so colliders and explaining-away can be exercised.
|
|
141
|
+
- **LoopySolver**. Loopy belief propagation. It treats each distribution as a
|
|
142
|
+
factor and passes messages on the factor graph. It is exact on loop-free graphs
|
|
143
|
+
(it converges to the true marginals) and approximate on graphs with loops,
|
|
144
|
+
where it iterates to a fixed point. It has the same query surface as the exact
|
|
145
|
+
solver, plus a bulk `marginals()`, optional `damping` to settle oscillation,
|
|
146
|
+
and per-evidence caching.
|
|
147
|
+
|
|
148
|
+
## Why it stays small
|
|
149
|
+
|
|
150
|
+
A node with k binary inputs has a table of size 2^k. The design keeps that cost
|
|
151
|
+
off the production path:
|
|
152
|
+
|
|
153
|
+
- `as_tabular()` materializes a distribution's full table. It is used only by the
|
|
154
|
+
exact solver and the tests, on deliberately small graphs, never during normal
|
|
155
|
+
inference.
|
|
156
|
+
- `LoopySolver` talks to a distribution only through two message methods
|
|
157
|
+
(`message_to_output` and `message_to_input`). The engine never inspects a
|
|
158
|
+
distribution's type.
|
|
159
|
+
|
|
160
|
+
This means a structured distribution computes its messages in time linear in the
|
|
161
|
+
number of inputs, without ever building the 2^k table, and the engine does not
|
|
162
|
+
change. These linear-time messages are implemented: exact for the gates
|
|
163
|
+
(`NoisyOr` / `NoisyAnd`), and a hybrid for the default `IndependentEvidence` —
|
|
164
|
+
exact below a fan-in threshold, a capped-bucket approximation above it (within
|
|
165
|
+
1e-3 of the exact solver). An arbitrary
|
|
166
|
+
`TabularCPD` still falls back to the dense default, which is correct and
|
|
167
|
+
unavoidable. See `docs/fast_messages.md`.
|
|
168
|
+
|
|
169
|
+
## The ASPIC argument layer
|
|
170
|
+
|
|
171
|
+
`probability_flow.aspic` builds a typed argument out of premises and conclusions
|
|
172
|
+
joined by support / rebut / undermine / undercut / strict edges, then **compiles
|
|
173
|
+
it to an ordinary `BayesianNetwork`**. All argumentation vocabulary lives here;
|
|
174
|
+
`core/` stays free of it (ASPIC is the first of several planned domain wrappers —
|
|
175
|
+
legal, medical, AI-safety — over the same core). Every argumentative edge is a
|
|
176
|
+
method on its downstream node, so the whole graph traverses from the root target.
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
from probability_flow.aspic import Premise, Conclusion
|
|
180
|
+
|
|
181
|
+
guilty = Conclusion("Guilty", prior=0.5)
|
|
182
|
+
seen = guilty.support(Premise("Seen at scene", 0.5), lr=9) # supporting argument
|
|
183
|
+
guilty.rebut(Premise("Alibi", 0.5), lr=0.25) # attacking argument (lr < 1)
|
|
184
|
+
guilty.undercut(seen, by=Premise("Unreliable witness", 0.5)) # attack the support *edge*
|
|
185
|
+
|
|
186
|
+
bn = guilty.compile() # an ordinary BayesianNetwork to solve as above
|
|
187
|
+
arg = guilty.assemble() # a serializable Argument handle
|
|
188
|
+
arg.posterior(guilty) # 0.4615
|
|
189
|
+
arg.save("guilty.json") # to_json / from_json / save / load — round-trips to identical posteriors
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
`assemble()` returns an `Argument` handle that owns the cached compile, the
|
|
193
|
+
posterior queries, JSON serialization (typed at the argument level), and the
|
|
194
|
+
metric methods below. See `docs/aspic.md`.
|
|
195
|
+
|
|
196
|
+
## Metrics
|
|
197
|
+
|
|
198
|
+
`probability_flow.metrics` measures a compiled network and **depends only on
|
|
199
|
+
`core`** — no argumentation vocabulary, so every wrapper reuses it. Every function
|
|
200
|
+
is pure and accepts either a compiled `BayesianNetwork` or a target node (compiled
|
|
201
|
+
on the spot), and results carry your own `Node` objects, never ids.
|
|
202
|
+
|
|
203
|
+
```python
|
|
204
|
+
from probability_flow import metrics
|
|
205
|
+
|
|
206
|
+
metrics.d_separated_groups(bn, guilty) # independent evidence branches
|
|
207
|
+
metrics.upstream_size(guilty) # size of the evidence base (auto-compiles)
|
|
208
|
+
metrics.circuit_rank(bn) # distance from a tree (0 == polytree)
|
|
209
|
+
metrics.posterior_range(bn, guilty) # manipulability: posterior range over prunings
|
|
210
|
+
bn.max_depth(guilty) # trivial structural ones are also BN methods
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
The seam covers structure (depth, size), loopiness (circuit rank, polytree test),
|
|
214
|
+
difficulty (posterior gap, log-odds deltas, concentration Gini), and
|
|
215
|
+
manipulability (posterior range). See `docs/metrics.md`.
|
|
216
|
+
|
|
217
|
+
## Generating argument graphs
|
|
218
|
+
|
|
219
|
+
`probability_flow.aspic.generate` rejection-samples ASPIC arguments that meet a
|
|
220
|
+
difficulty target — random support/attack chains, optional undercutters, axiomatic
|
|
221
|
+
leaves, strict edges, and parent sharing, screened against the real metrics layer.
|
|
222
|
+
This is the substrate for a benchmark of debate scenarios with known ground truth.
|
|
223
|
+
|
|
224
|
+
```python
|
|
225
|
+
from probability_flow.aspic import generate, StructuralParams, DifficultyTargets
|
|
226
|
+
|
|
227
|
+
arg = generate(
|
|
228
|
+
seed=0,
|
|
229
|
+
structural=StructuralParams(n_support=2, n_attack=1),
|
|
230
|
+
targets=DifficultyTargets(target_posterior=0.7), # reached by calibrating the root prior
|
|
231
|
+
)
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
The structural shape is set directly; difficulty targets are screened and the
|
|
235
|
+
target posterior is hit by calibrating the root prior. See `docs/generation.md`.
|
|
236
|
+
|
|
237
|
+
## Visualization (optional, `[viz]`)
|
|
238
|
+
|
|
239
|
+
With the `[viz]` extra installed, a compiled network and an argument both render to
|
|
240
|
+
a matplotlib figure with an in-house layered layout (likelihood-ratio edges
|
|
241
|
+
coloured red→blue by their LR):
|
|
242
|
+
|
|
243
|
+
```python
|
|
244
|
+
bn.render() # or render(bn) from probability_flow.visualization
|
|
245
|
+
guilty.assemble().render() # the argument view
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
matplotlib is imported lazily, only when you draw, so importing `probability_flow`
|
|
249
|
+
never pulls it in; without the extra these calls raise an `ImportError` pointing at
|
|
250
|
+
`pip install 'probability-flow[viz]'`.
|
|
251
|
+
|
|
252
|
+
## Calibration and sensitivities (optional, `[jax]`)
|
|
253
|
+
|
|
254
|
+
With the `[jax]` extra, `probability_flow.aspic.calibrate` differentiates the root
|
|
255
|
+
posterior through the compiled argument: `sensitivities(arg)` gives a
|
|
256
|
+
per-parameter importance signal (via `jax.grad`), and `calibrate_posterior(arg,
|
|
257
|
+
target)` solves for parameters that drive the root posterior to a chosen value.
|
|
258
|
+
JAX is imported lazily inside these calls, so without the extra they raise an
|
|
259
|
+
`ImportError` pointing at `pip install 'probability-flow[jax]'`.
|
|
260
|
+
|
|
261
|
+
## Layout
|
|
262
|
+
|
|
263
|
+
```
|
|
264
|
+
probability_flow/ the importable package
|
|
265
|
+
core/
|
|
266
|
+
node.py authoring layer (Node, add_input, compile)
|
|
267
|
+
network.py the compiled, immutable network
|
|
268
|
+
exact.py ExactSolver
|
|
269
|
+
cpd/ the distributions (base contract + the four CPDs)
|
|
270
|
+
bp/ loopy belief propagation (LoopySolver)
|
|
271
|
+
_logmath.py log-space numerics
|
|
272
|
+
aspic/ the ASPIC argument layer: authoring + compilation,
|
|
273
|
+
serialization, a random argument generator, and optional
|
|
274
|
+
JAX posterior calibration
|
|
275
|
+
metrics/ graph metrics (d-separation, depth/size, loopiness,
|
|
276
|
+
difficulty, manipulability)
|
|
277
|
+
visualization/ matplotlib renderers (arg.render() / bn.render()), [viz]
|
|
278
|
+
docs/ design notes (SPEC, DECISIONS, ROADMAP, and more)
|
|
279
|
+
tests/ checked against the exact solver
|
|
280
|
+
demos/ walkthrough notebooks
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
## Status and next steps
|
|
284
|
+
|
|
285
|
+
Working today: the build/compile flow, all four distributions, both solvers with
|
|
286
|
+
linear-time structured messages, evidence, the ASPIC argument-compilation layer,
|
|
287
|
+
argument serialization, the metrics seam (d-separation grouping, depth/size,
|
|
288
|
+
loopiness, difficulty, manipulability), a random argument generator with structural
|
|
289
|
+
and difficulty controls, optional matplotlib renderers, and optional JAX-based
|
|
290
|
+
posterior calibration and parameter sensitivities. Planned (see `docs/ROADMAP.md`):
|
|
291
|
+
a core-network serializer, the loopy-BP "topology zoo" robustness harness, and the
|
|
292
|
+
exact manipulability range.
|
|
293
|
+
|
|
294
|
+
## Learning more
|
|
295
|
+
|
|
296
|
+
The notebooks in `demos/` walk through the library end to end:
|
|
297
|
+
`walkthrough.ipynb` for the core, `noisy_gates_walkthrough.ipynb` for the gate
|
|
298
|
+
distributions, `loopy_bp_walkthrough.ipynb` for belief propagation,
|
|
299
|
+
`visualization_demo.ipynb` for the renderers, `metrics_walkthrough.ipynb` for
|
|
300
|
+
the metrics seam, and `generation_demo.ipynb` for the argument generator. The
|
|
301
|
+
`docs/` directory holds the design rationale, the settled decisions, and the
|
|
302
|
+
roadmap.
|
|
303
|
+
```
|
|
304
|
+
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
# probability-flow
|
|
2
|
+
|
|
3
|
+
A small discrete Bayesian-network library written from scratch in NumPy, with no
|
|
4
|
+
external inference dependency. The goal is full control over and understanding of
|
|
5
|
+
the inference stack rather than a black box: every distribution and every solver
|
|
6
|
+
is plain, readable code that can be checked against a brute-force reference.
|
|
7
|
+
|
|
8
|
+
On top of that core it carries an **argumentation layer (ASPIC)** that compiles
|
|
9
|
+
typed argument graphs into networks, a domain-free **metrics** seam that measures
|
|
10
|
+
a compiled network, and a **generator** that samples random argument graphs with
|
|
11
|
+
controllable difficulty — the substrate for a benchmark of debate scenarios with
|
|
12
|
+
known ground truth. Two further capabilities are opt-in extras: matplotlib
|
|
13
|
+
**renderers** (`[viz]`) and JAX **posterior calibration and sensitivities**
|
|
14
|
+
(`[jax]`).
|
|
15
|
+
|
|
16
|
+
The whole stack is layered and decoupled: `core/` knows nothing about arguments,
|
|
17
|
+
`metrics/` knows nothing about the domain, and the optional extras pull in nothing
|
|
18
|
+
unless you ask for them.
|
|
19
|
+
|
|
20
|
+
## Installing
|
|
21
|
+
|
|
22
|
+
```
|
|
23
|
+
pip install probability-flow # core: NumPy only
|
|
24
|
+
pip install 'probability-flow[viz]' # + matplotlib renderers
|
|
25
|
+
pip install 'probability-flow[jax]' # + JAX calibration / sensitivities
|
|
26
|
+
pip install 'probability-flow[viz,jax]' # both
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Python 3.11 or newer. The import name is `probability_flow`; the base install
|
|
30
|
+
depends only on **NumPy** — matplotlib and JAX are optional extras, pulled in only
|
|
31
|
+
by `[viz]` and `[jax]`. Calling a renderer or the calibration module without its
|
|
32
|
+
extra raises an `ImportError` telling you which one to install. `[dev]` adds pytest
|
|
33
|
+
and ruff.
|
|
34
|
+
|
|
35
|
+
From a checkout, the package also runs in place with no install step: the
|
|
36
|
+
importable package is the inner `probability_flow/` directory, so working from the
|
|
37
|
+
repository root (which pytest puts on the path) makes `import probability_flow`
|
|
38
|
+
resolve. Run the tests from this directory with `pytest`. The example notebooks add
|
|
39
|
+
the path themselves, so they run as-is. CI runs ruff and the test suite on Python
|
|
40
|
+
3.11–3.13 on every push.
|
|
41
|
+
|
|
42
|
+
## Quickstart
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from probability_flow import Node, ExactSolver, LoopySolver
|
|
46
|
+
|
|
47
|
+
guilty = Node("Guilty") # the target; prior 0.5 by default
|
|
48
|
+
guilty.add_input(Node("Seen at scene"), lr=9) # supporting evidence
|
|
49
|
+
guilty.add_input(Node("Alibi"), lr=0.25) # evidence against (lr < 1)
|
|
50
|
+
|
|
51
|
+
bn = guilty.compile()
|
|
52
|
+
ExactSolver(bn).prob(guilty, 1) # 0.5731, by enumerating the joint
|
|
53
|
+
LoopySolver(bn).prob(guilty, 1) # 0.5731, by message passing
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Build, compile, solve
|
|
57
|
+
|
|
58
|
+
Inference happens in three stages.
|
|
59
|
+
|
|
60
|
+
1. **Build.** You assemble a mutable graph of `Node` objects. `add_input` wires
|
|
61
|
+
one node as an input of another and returns the input, so upstream structure
|
|
62
|
+
can be built inline. It takes a single node or a list, and a per-edge weight
|
|
63
|
+
that is forwarded to the node's distribution.
|
|
64
|
+
2. **Compile.** `target.compile()` walks the graph from the target, checks it
|
|
65
|
+
(acyclic, consistent shapes, one distribution per node), assigns a
|
|
66
|
+
topological order, and freezes everything into an immutable
|
|
67
|
+
`BayesianNetwork`. Authoring is ergonomic; the compiled form is vectorized.
|
|
68
|
+
3. **Solve.** A solver answers `marginal(node)` and `prob(node, state)` queries
|
|
69
|
+
on the compiled network, optionally conditioned on `evidence`.
|
|
70
|
+
|
|
71
|
+
An edge `Y -> X` declares only that X's distribution is `P(X | Y)`. It is a
|
|
72
|
+
conditioning relationship, not a claim about causation. A causal model points its
|
|
73
|
+
edges with causation (cause to effect); an argument graph points them against it
|
|
74
|
+
(evidence to claim, since the claim is what produces the evidence). The library
|
|
75
|
+
does not care which: it only ever works with `P(node | inputs)`.
|
|
76
|
+
|
|
77
|
+
## Distributions
|
|
78
|
+
|
|
79
|
+
Each node carries a conditional probability distribution (CPD) describing
|
|
80
|
+
`P(node | its inputs)`. The library distinguishes the combination *rule* (the
|
|
81
|
+
combiner) from the *object* that implements it (the CPD).
|
|
82
|
+
|
|
83
|
+
- **IndependentEvidenceCPD** (the default). Treats inputs as uncorrelated
|
|
84
|
+
sources of evidence: `logit P(node=1) = logit(prior) + sum of log(lr)` over the
|
|
85
|
+
active inputs. Adding weights of evidence is Bayes' rule for independent
|
|
86
|
+
likelihood ratios. Set per edge with `add_input(x, lr=...)`.
|
|
87
|
+
- **NoisyOrCPD**. "Any one cause can fire the effect":
|
|
88
|
+
`P(node=0) = (1 - leak) * product of (1 - activation)` over present causes.
|
|
89
|
+
Declared with `node.noisy_or(leak=...)` and `add_input(cause, activation=...)`.
|
|
90
|
+
- **NoisyAndCPD**. A native conjunction: the effect needs every condition
|
|
91
|
+
present, each succeeding with its own `activation`, with `leak` acting as a
|
|
92
|
+
global inhibitor. Declared with `node.noisy_and(leak=...)`.
|
|
93
|
+
- **TabularCPD**. An arbitrary dense table, the fallback and the reference behind
|
|
94
|
+
every other distribution. Set with `node.set_cpd(TabularCPD(...))`. Its `repr`
|
|
95
|
+
prints the full table as a readable bordered grid.
|
|
96
|
+
|
|
97
|
+
`IndependentEvidence`, `NoisyOr`, and `NoisyAnd` all belong to the same family:
|
|
98
|
+
each input contributes independently, and they differ only in how the
|
|
99
|
+
contributions combine.
|
|
100
|
+
|
|
101
|
+
## Inference
|
|
102
|
+
|
|
103
|
+
- **ExactSolver**. Brute-force enumeration of the joint distribution. Obviously
|
|
104
|
+
correct, exponential in the number of nodes, and intended for small graphs and
|
|
105
|
+
as the ground truth that everything else is tested against. Supports evidence,
|
|
106
|
+
so colliders and explaining-away can be exercised.
|
|
107
|
+
- **LoopySolver**. Loopy belief propagation. It treats each distribution as a
|
|
108
|
+
factor and passes messages on the factor graph. It is exact on loop-free graphs
|
|
109
|
+
(it converges to the true marginals) and approximate on graphs with loops,
|
|
110
|
+
where it iterates to a fixed point. It has the same query surface as the exact
|
|
111
|
+
solver, plus a bulk `marginals()`, optional `damping` to settle oscillation,
|
|
112
|
+
and per-evidence caching.
|
|
113
|
+
|
|
114
|
+
## Why it stays small
|
|
115
|
+
|
|
116
|
+
A node with k binary inputs has a table of size 2^k. The design keeps that cost
|
|
117
|
+
off the production path:
|
|
118
|
+
|
|
119
|
+
- `as_tabular()` materializes a distribution's full table. It is used only by the
|
|
120
|
+
exact solver and the tests, on deliberately small graphs, never during normal
|
|
121
|
+
inference.
|
|
122
|
+
- `LoopySolver` talks to a distribution only through two message methods
|
|
123
|
+
(`message_to_output` and `message_to_input`). The engine never inspects a
|
|
124
|
+
distribution's type.
|
|
125
|
+
|
|
126
|
+
This means a structured distribution computes its messages in time linear in the
|
|
127
|
+
number of inputs, without ever building the 2^k table, and the engine does not
|
|
128
|
+
change. These linear-time messages are implemented: exact for the gates
|
|
129
|
+
(`NoisyOr` / `NoisyAnd`), and a hybrid for the default `IndependentEvidence` —
|
|
130
|
+
exact below a fan-in threshold, a capped-bucket approximation above it (within
|
|
131
|
+
1e-3 of the exact solver). An arbitrary
|
|
132
|
+
`TabularCPD` still falls back to the dense default, which is correct and
|
|
133
|
+
unavoidable. See `docs/fast_messages.md`.
|
|
134
|
+
|
|
135
|
+
## The ASPIC argument layer
|
|
136
|
+
|
|
137
|
+
`probability_flow.aspic` builds a typed argument out of premises and conclusions
|
|
138
|
+
joined by support / rebut / undermine / undercut / strict edges, then **compiles
|
|
139
|
+
it to an ordinary `BayesianNetwork`**. All argumentation vocabulary lives here;
|
|
140
|
+
`core/` stays free of it (ASPIC is the first of several planned domain wrappers —
|
|
141
|
+
legal, medical, AI-safety — over the same core). Every argumentative edge is a
|
|
142
|
+
method on its downstream node, so the whole graph traverses from the root target.
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
from probability_flow.aspic import Premise, Conclusion
|
|
146
|
+
|
|
147
|
+
guilty = Conclusion("Guilty", prior=0.5)
|
|
148
|
+
seen = guilty.support(Premise("Seen at scene", 0.5), lr=9) # supporting argument
|
|
149
|
+
guilty.rebut(Premise("Alibi", 0.5), lr=0.25) # attacking argument (lr < 1)
|
|
150
|
+
guilty.undercut(seen, by=Premise("Unreliable witness", 0.5)) # attack the support *edge*
|
|
151
|
+
|
|
152
|
+
bn = guilty.compile() # an ordinary BayesianNetwork to solve as above
|
|
153
|
+
arg = guilty.assemble() # a serializable Argument handle
|
|
154
|
+
arg.posterior(guilty) # 0.4615
|
|
155
|
+
arg.save("guilty.json") # to_json / from_json / save / load — round-trips to identical posteriors
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
`assemble()` returns an `Argument` handle that owns the cached compile, the
|
|
159
|
+
posterior queries, JSON serialization (typed at the argument level), and the
|
|
160
|
+
metric methods below. See `docs/aspic.md`.
|
|
161
|
+
|
|
162
|
+
## Metrics
|
|
163
|
+
|
|
164
|
+
`probability_flow.metrics` measures a compiled network and **depends only on
|
|
165
|
+
`core`** — no argumentation vocabulary, so every wrapper reuses it. Every function
|
|
166
|
+
is pure and accepts either a compiled `BayesianNetwork` or a target node (compiled
|
|
167
|
+
on the spot), and results carry your own `Node` objects, never ids.
|
|
168
|
+
|
|
169
|
+
```python
|
|
170
|
+
from probability_flow import metrics
|
|
171
|
+
|
|
172
|
+
metrics.d_separated_groups(bn, guilty) # independent evidence branches
|
|
173
|
+
metrics.upstream_size(guilty) # size of the evidence base (auto-compiles)
|
|
174
|
+
metrics.circuit_rank(bn) # distance from a tree (0 == polytree)
|
|
175
|
+
metrics.posterior_range(bn, guilty) # manipulability: posterior range over prunings
|
|
176
|
+
bn.max_depth(guilty) # trivial structural ones are also BN methods
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
The seam covers structure (depth, size), loopiness (circuit rank, polytree test),
|
|
180
|
+
difficulty (posterior gap, log-odds deltas, concentration Gini), and
|
|
181
|
+
manipulability (posterior range). See `docs/metrics.md`.
|
|
182
|
+
|
|
183
|
+
## Generating argument graphs
|
|
184
|
+
|
|
185
|
+
`probability_flow.aspic.generate` rejection-samples ASPIC arguments that meet a
|
|
186
|
+
difficulty target — random support/attack chains, optional undercutters, axiomatic
|
|
187
|
+
leaves, strict edges, and parent sharing, screened against the real metrics layer.
|
|
188
|
+
This is the substrate for a benchmark of debate scenarios with known ground truth.
|
|
189
|
+
|
|
190
|
+
```python
|
|
191
|
+
from probability_flow.aspic import generate, StructuralParams, DifficultyTargets
|
|
192
|
+
|
|
193
|
+
arg = generate(
|
|
194
|
+
seed=0,
|
|
195
|
+
structural=StructuralParams(n_support=2, n_attack=1),
|
|
196
|
+
targets=DifficultyTargets(target_posterior=0.7), # reached by calibrating the root prior
|
|
197
|
+
)
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
The structural shape is set directly; difficulty targets are screened and the
|
|
201
|
+
target posterior is hit by calibrating the root prior. See `docs/generation.md`.
|
|
202
|
+
|
|
203
|
+
## Visualization (optional, `[viz]`)
|
|
204
|
+
|
|
205
|
+
With the `[viz]` extra installed, a compiled network and an argument both render to
|
|
206
|
+
a matplotlib figure with an in-house layered layout (likelihood-ratio edges
|
|
207
|
+
coloured red→blue by their LR):
|
|
208
|
+
|
|
209
|
+
```python
|
|
210
|
+
bn.render() # or render(bn) from probability_flow.visualization
|
|
211
|
+
guilty.assemble().render() # the argument view
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
matplotlib is imported lazily, only when you draw, so importing `probability_flow`
|
|
215
|
+
never pulls it in; without the extra these calls raise an `ImportError` pointing at
|
|
216
|
+
`pip install 'probability-flow[viz]'`.
|
|
217
|
+
|
|
218
|
+
## Calibration and sensitivities (optional, `[jax]`)
|
|
219
|
+
|
|
220
|
+
With the `[jax]` extra, `probability_flow.aspic.calibrate` differentiates the root
|
|
221
|
+
posterior through the compiled argument: `sensitivities(arg)` gives a
|
|
222
|
+
per-parameter importance signal (via `jax.grad`), and `calibrate_posterior(arg,
|
|
223
|
+
target)` solves for parameters that drive the root posterior to a chosen value.
|
|
224
|
+
JAX is imported lazily inside these calls, so without the extra they raise an
|
|
225
|
+
`ImportError` pointing at `pip install 'probability-flow[jax]'`.
|
|
226
|
+
|
|
227
|
+
## Layout
|
|
228
|
+
|
|
229
|
+
```
|
|
230
|
+
probability_flow/ the importable package
|
|
231
|
+
core/
|
|
232
|
+
node.py authoring layer (Node, add_input, compile)
|
|
233
|
+
network.py the compiled, immutable network
|
|
234
|
+
exact.py ExactSolver
|
|
235
|
+
cpd/ the distributions (base contract + the four CPDs)
|
|
236
|
+
bp/ loopy belief propagation (LoopySolver)
|
|
237
|
+
_logmath.py log-space numerics
|
|
238
|
+
aspic/ the ASPIC argument layer: authoring + compilation,
|
|
239
|
+
serialization, a random argument generator, and optional
|
|
240
|
+
JAX posterior calibration
|
|
241
|
+
metrics/ graph metrics (d-separation, depth/size, loopiness,
|
|
242
|
+
difficulty, manipulability)
|
|
243
|
+
visualization/ matplotlib renderers (arg.render() / bn.render()), [viz]
|
|
244
|
+
docs/ design notes (SPEC, DECISIONS, ROADMAP, and more)
|
|
245
|
+
tests/ checked against the exact solver
|
|
246
|
+
demos/ walkthrough notebooks
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
## Status and next steps
|
|
250
|
+
|
|
251
|
+
Working today: the build/compile flow, all four distributions, both solvers with
|
|
252
|
+
linear-time structured messages, evidence, the ASPIC argument-compilation layer,
|
|
253
|
+
argument serialization, the metrics seam (d-separation grouping, depth/size,
|
|
254
|
+
loopiness, difficulty, manipulability), a random argument generator with structural
|
|
255
|
+
and difficulty controls, optional matplotlib renderers, and optional JAX-based
|
|
256
|
+
posterior calibration and parameter sensitivities. Planned (see `docs/ROADMAP.md`):
|
|
257
|
+
a core-network serializer, the loopy-BP "topology zoo" robustness harness, and the
|
|
258
|
+
exact manipulability range.
|
|
259
|
+
|
|
260
|
+
## Learning more
|
|
261
|
+
|
|
262
|
+
The notebooks in `demos/` walk through the library end to end:
|
|
263
|
+
`walkthrough.ipynb` for the core, `noisy_gates_walkthrough.ipynb` for the gate
|
|
264
|
+
distributions, `loopy_bp_walkthrough.ipynb` for belief propagation,
|
|
265
|
+
`visualization_demo.ipynb` for the renderers, `metrics_walkthrough.ipynb` for
|
|
266
|
+
the metrics seam, and `generation_demo.ipynb` for the argument generator. The
|
|
267
|
+
`docs/` directory holds the design rationale, the settled decisions, and the
|
|
268
|
+
roadmap.
|
|
269
|
+
```
|
|
270
|
+
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""probability_flow: a from-scratch, modular discrete Bayesian-network library.
|
|
2
|
+
|
|
3
|
+
The design is in `docs/SPEC.md`; settled choices in `docs/DECISIONS.md` and
|
|
4
|
+
milestones in `docs/ROADMAP.md`. The public API currently lives in
|
|
5
|
+
`probability_flow.core` and is re-exported here for convenience:
|
|
6
|
+
|
|
7
|
+
from probability_flow import Node, ExactSolver
|
|
8
|
+
"""
|
|
9
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
10
|
+
|
|
11
|
+
from .core import (
|
|
12
|
+
CPD,
|
|
13
|
+
BayesianNetwork,
|
|
14
|
+
CompiledCPD,
|
|
15
|
+
ExactSolver,
|
|
16
|
+
IndependentEvidenceCPD,
|
|
17
|
+
LoopySolver,
|
|
18
|
+
Node,
|
|
19
|
+
NoisyAndCPD,
|
|
20
|
+
NoisyOrCPD,
|
|
21
|
+
TabularCPD,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
__version__ = version("probability-flow")
|
|
26
|
+
except PackageNotFoundError: # running from a source checkout, not installed
|
|
27
|
+
__version__ = "0.0.0+unknown"
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
"__version__",
|
|
31
|
+
"Node",
|
|
32
|
+
"BayesianNetwork",
|
|
33
|
+
"CompiledCPD",
|
|
34
|
+
"ExactSolver",
|
|
35
|
+
"LoopySolver",
|
|
36
|
+
"CPD",
|
|
37
|
+
"TabularCPD",
|
|
38
|
+
"IndependentEvidenceCPD",
|
|
39
|
+
"NoisyOrCPD",
|
|
40
|
+
"NoisyAndCPD",
|
|
41
|
+
]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""probability_flow.aspic: the ASPIC argument-compilation layer.
|
|
2
|
+
|
|
3
|
+
The first of several planned domain wrappers (legal, medical, AI-safety) over the
|
|
4
|
+
pure-BN `core`. Build an argument out of premises, conclusions, and attacks, then
|
|
5
|
+
`compile()` the target to an ordinary `BayesianNetwork`. See `docs/aspic.md`.
|
|
6
|
+
|
|
7
|
+
from probability_flow.aspic import Premise, Axiom, Conclusion
|
|
8
|
+
"""
|
|
9
|
+
from .argument import ArgumentWarning, Axiom, Conclusion, Premise
|
|
10
|
+
from .generate import (
|
|
11
|
+
ArgumentGenerator,
|
|
12
|
+
DifficultyTargets,
|
|
13
|
+
StructuralParams,
|
|
14
|
+
generate,
|
|
15
|
+
)
|
|
16
|
+
from .handle import Argument
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"Premise", "Axiom", "Conclusion", "ArgumentWarning", "Argument",
|
|
20
|
+
"ArgumentGenerator", "StructuralParams", "DifficultyTargets", "generate",
|
|
21
|
+
]
|