probability-flow 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. probability_flow-0.1.0/.gitignore +8 -0
  2. probability_flow-0.1.0/LICENSE +21 -0
  3. probability_flow-0.1.0/PKG-INFO +304 -0
  4. probability_flow-0.1.0/README.md +270 -0
  5. probability_flow-0.1.0/probability_flow/__init__.py +41 -0
  6. probability_flow-0.1.0/probability_flow/aspic/__init__.py +21 -0
  7. probability_flow-0.1.0/probability_flow/aspic/argument.py +182 -0
  8. probability_flow-0.1.0/probability_flow/aspic/calibrate.py +178 -0
  9. probability_flow-0.1.0/probability_flow/aspic/compile.py +175 -0
  10. probability_flow-0.1.0/probability_flow/aspic/generate.py +397 -0
  11. probability_flow-0.1.0/probability_flow/aspic/handle.py +281 -0
  12. probability_flow-0.1.0/probability_flow/aspic/visualization.py +128 -0
  13. probability_flow-0.1.0/probability_flow/core/__init__.py +29 -0
  14. probability_flow-0.1.0/probability_flow/core/_logmath.py +40 -0
  15. probability_flow-0.1.0/probability_flow/core/bp/__init__.py +5 -0
  16. probability_flow-0.1.0/probability_flow/core/bp/engine.py +198 -0
  17. probability_flow-0.1.0/probability_flow/core/bp/message.py +30 -0
  18. probability_flow-0.1.0/probability_flow/core/cpd/__init__.py +13 -0
  19. probability_flow-0.1.0/probability_flow/core/cpd/base.py +84 -0
  20. probability_flow-0.1.0/probability_flow/core/cpd/independent_evidence.py +155 -0
  21. probability_flow-0.1.0/probability_flow/core/cpd/noisy_and.py +113 -0
  22. probability_flow-0.1.0/probability_flow/core/cpd/noisy_or.py +109 -0
  23. probability_flow-0.1.0/probability_flow/core/cpd/tabular.py +111 -0
  24. probability_flow-0.1.0/probability_flow/core/exact.py +67 -0
  25. probability_flow-0.1.0/probability_flow/core/network.py +111 -0
  26. probability_flow-0.1.0/probability_flow/core/node.py +125 -0
  27. probability_flow-0.1.0/probability_flow/metrics/__init__.py +64 -0
  28. probability_flow-0.1.0/probability_flow/metrics/_util.py +42 -0
  29. probability_flow-0.1.0/probability_flow/metrics/difficulty.py +87 -0
  30. probability_flow-0.1.0/probability_flow/metrics/dseparation.py +83 -0
  31. probability_flow-0.1.0/probability_flow/metrics/loopiness.py +82 -0
  32. probability_flow-0.1.0/probability_flow/metrics/manipulability.py +207 -0
  33. probability_flow-0.1.0/probability_flow/metrics/structure.py +49 -0
  34. probability_flow-0.1.0/probability_flow/py.typed +0 -0
  35. probability_flow-0.1.0/probability_flow/visualization/__init__.py +11 -0
  36. probability_flow-0.1.0/probability_flow/visualization/image.py +402 -0
  37. probability_flow-0.1.0/probability_flow/visualization/style.py +58 -0
  38. probability_flow-0.1.0/pyproject.toml +72 -0
@@ -0,0 +1,8 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ .pytest_cache/
4
+ .ruff_cache/
5
+ .venv/
6
+ *.egg-info/
7
+ .DS_Store
8
+ _previews/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Alex Roman
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,304 @@
1
+ Metadata-Version: 2.4
2
+ Name: probability-flow
3
+ Version: 0.1.0
4
+ Summary: A from-scratch, modular discrete Bayesian-network library.
5
+ Project-URL: Homepage, https://github.com/scalable-oversight-benchmarks/probability-flow
6
+ Project-URL: Repository, https://github.com/scalable-oversight-benchmarks/probability-flow
7
+ Project-URL: Issues, https://github.com/scalable-oversight-benchmarks/probability-flow/issues
8
+ Author-email: Alex Roman <dr.alexroman@gmail.com>
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: argumentation,aspic,bayesian-network,belief-propagation,inference,probabilistic-graphical-models
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Classifier: Topic :: Scientific/Engineering :: Mathematics
22
+ Classifier: Typing :: Typed
23
+ Requires-Python: >=3.11
24
+ Requires-Dist: numpy
25
+ Provides-Extra: dev
26
+ Requires-Dist: matplotlib; extra == 'dev'
27
+ Requires-Dist: pytest; extra == 'dev'
28
+ Requires-Dist: ruff; extra == 'dev'
29
+ Provides-Extra: jax
30
+ Requires-Dist: jax; extra == 'jax'
31
+ Provides-Extra: viz
32
+ Requires-Dist: matplotlib; extra == 'viz'
33
+ Description-Content-Type: text/markdown
34
+
35
+ # probability-flow
36
+
37
+ A small discrete Bayesian-network library written from scratch in NumPy, with no
38
+ external inference dependency. The goal is full control over and understanding of
39
+ the inference stack rather than a black box: every distribution and every solver
40
+ is plain, readable code that can be checked against a brute-force reference.
41
+
42
+ On top of that core it carries an **argumentation layer (ASPIC)** that compiles
43
+ typed argument graphs into networks, a domain-free **metrics** seam that measures
44
+ a compiled network, and a **generator** that samples random argument graphs with
45
+ controllable difficulty — the substrate for a benchmark of debate scenarios with
46
+ known ground truth. Two further capabilities are opt-in extras: matplotlib
47
+ **renderers** (`[viz]`) and JAX **posterior calibration and sensitivities**
48
+ (`[jax]`).
49
+
50
+ The whole stack is layered and decoupled: `core/` knows nothing about arguments,
51
+ `metrics/` knows nothing about the domain, and the optional extras pull in nothing
52
+ unless you ask for them.
53
+
54
+ ## Installing
55
+
56
+ ```
57
+ pip install probability-flow # core: NumPy only
58
+ pip install 'probability-flow[viz]' # + matplotlib renderers
59
+ pip install 'probability-flow[jax]' # + JAX calibration / sensitivities
60
+ pip install 'probability-flow[viz,jax]' # both
61
+ ```
62
+
63
+ Python 3.11 or newer. The import name is `probability_flow`; the base install
64
+ depends only on **NumPy** — matplotlib and JAX are optional extras, pulled in only
65
+ by `[viz]` and `[jax]`. Calling a renderer or the calibration module without its
66
+ extra raises an `ImportError` telling you which one to install. `[dev]` adds pytest
67
+ and ruff.
68
+
69
+ From a checkout, the package also runs in place with no install step: the
70
+ importable package is the inner `probability_flow/` directory, so working from the
71
+ repository root (which pytest puts on the path) makes `import probability_flow`
72
+ resolve. Run the tests from this directory with `pytest`. The example notebooks add
73
+ the path themselves, so they run as-is. CI runs ruff and the test suite on Python
74
+ 3.11–3.13 on every push.
75
+
76
+ ## Quickstart
77
+
78
+ ```python
79
+ from probability_flow import Node, ExactSolver, LoopySolver
80
+
81
+ guilty = Node("Guilty") # the target; prior 0.5 by default
82
+ guilty.add_input(Node("Seen at scene"), lr=9) # supporting evidence
83
+ guilty.add_input(Node("Alibi"), lr=0.25) # evidence against (lr < 1)
84
+
85
+ bn = guilty.compile()
86
+ ExactSolver(bn).prob(guilty, 1) # 0.5731, by enumerating the joint
87
+ LoopySolver(bn).prob(guilty, 1) # 0.5731, by message passing
88
+ ```
89
+
90
+ ## Build, compile, solve
91
+
92
+ Inference happens in three stages.
93
+
94
+ 1. **Build.** You assemble a mutable graph of `Node` objects. `add_input` wires
95
+ one node as an input of another and returns the input, so upstream structure
96
+ can be built inline. It takes a single node or a list, and a per-edge weight
97
+ that is forwarded to the node's distribution.
98
+ 2. **Compile.** `target.compile()` walks the graph from the target, checks it
99
+ (acyclic, consistent shapes, one distribution per node), assigns a
100
+ topological order, and freezes everything into an immutable
101
+ `BayesianNetwork`. Authoring is ergonomic; the compiled form is vectorized.
102
+ 3. **Solve.** A solver answers `marginal(node)` and `prob(node, state)` queries
103
+ on the compiled network, optionally conditioned on `evidence`.
104
+
105
+ An edge `Y -> X` declares only that X's distribution is `P(X | Y)`. It is a
106
+ conditioning relationship, not a claim about causation. A causal model points its
107
+ edges with causation (cause to effect); an argument graph points them against it
108
+ (evidence to claim, since the claim is what produces the evidence). The library
109
+ does not care which: it only ever works with `P(node | inputs)`.
110
+
111
+ ## Distributions
112
+
113
+ Each node carries a conditional probability distribution (CPD) describing
114
+ `P(node | its inputs)`. The library distinguishes the combination *rule* (the
115
+ combiner) from the *object* that implements it (the CPD).
116
+
117
+ - **IndependentEvidenceCPD** (the default). Treats inputs as uncorrelated
118
+ sources of evidence: `logit P(node=1) = logit(prior) + sum of log(lr)` over the
119
+ active inputs. Adding weights of evidence is Bayes' rule for independent
120
+ likelihood ratios. Set per edge with `add_input(x, lr=...)`.
121
+ - **NoisyOrCPD**. "Any one cause can fire the effect":
122
+ `P(node=0) = (1 - leak) * product of (1 - activation)` over present causes.
123
+ Declared with `node.noisy_or(leak=...)` and `add_input(cause, activation=...)`.
124
+ - **NoisyAndCPD**. A native conjunction: the effect needs every condition
125
+ present, each succeeding with its own `activation`, with `leak` acting as a
126
+ global inhibitor. Declared with `node.noisy_and(leak=...)`.
127
+ - **TabularCPD**. An arbitrary dense table, the fallback and the reference behind
128
+ every other distribution. Set with `node.set_cpd(TabularCPD(...))`. Its `repr`
129
+ prints the full table as a readable bordered grid.
130
+
131
+ `IndependentEvidence`, `NoisyOr`, and `NoisyAnd` all belong to the same family:
132
+ each input contributes independently, and they differ only in how the
133
+ contributions combine.
134
+
135
+ ## Inference
136
+
137
+ - **ExactSolver**. Brute-force enumeration of the joint distribution. Obviously
138
+ correct, exponential in the number of nodes, and intended for small graphs and
139
+ as the ground truth that everything else is tested against. Supports evidence,
140
+ so colliders and explaining-away can be exercised.
141
+ - **LoopySolver**. Loopy belief propagation. It treats each distribution as a
142
+ factor and passes messages on the factor graph. It is exact on loop-free graphs
143
+ (it converges to the true marginals) and approximate on graphs with loops,
144
+ where it iterates to a fixed point. It has the same query surface as the exact
145
+ solver, plus a bulk `marginals()`, optional `damping` to settle oscillation,
146
+ and per-evidence caching.
147
+
148
+ ## Why it stays small
149
+
150
+ A node with k binary inputs has a table of size 2^k. The design keeps that cost
151
+ off the production path:
152
+
153
+ - `as_tabular()` materializes a distribution's full table. It is used only by the
154
+ exact solver and the tests, on deliberately small graphs, never during normal
155
+ inference.
156
+ - `LoopySolver` talks to a distribution only through two message methods
157
+ (`message_to_output` and `message_to_input`). The engine never inspects a
158
+ distribution's type.
159
+
160
+ This means a structured distribution computes its messages in time linear in the
161
+ number of inputs, without ever building the 2^k table, and the engine does not
162
+ change. These linear-time messages are implemented: exact for the gates
163
+ (`NoisyOr` / `NoisyAnd`), and a hybrid for the default `IndependentEvidence` —
164
+ exact below a fan-in threshold, a capped-bucket approximation above it (within
165
+ 1e-3 of the exact solver). An arbitrary
166
+ `TabularCPD` still falls back to the dense default, which is correct and
167
+ unavoidable. See `docs/fast_messages.md`.
168
+
169
+ ## The ASPIC argument layer
170
+
171
+ `probability_flow.aspic` builds a typed argument out of premises and conclusions
172
+ joined by support / rebut / undermine / undercut / strict edges, then **compiles
173
+ it to an ordinary `BayesianNetwork`**. All argumentation vocabulary lives here;
174
+ `core/` stays free of it (ASPIC is the first of several planned domain wrappers —
175
+ legal, medical, AI-safety — over the same core). Every argumentative edge is a
176
+ method on its downstream node, so the whole graph traverses from the root target.
177
+
178
+ ```python
179
+ from probability_flow.aspic import Premise, Conclusion
180
+
181
+ guilty = Conclusion("Guilty", prior=0.5)
182
+ seen = guilty.support(Premise("Seen at scene", 0.5), lr=9) # supporting argument
183
+ guilty.rebut(Premise("Alibi", 0.5), lr=0.25) # attacking argument (lr < 1)
184
+ guilty.undercut(seen, by=Premise("Unreliable witness", 0.5)) # attack the support *edge*
185
+
186
+ bn = guilty.compile() # an ordinary BayesianNetwork to solve as above
187
+ arg = guilty.assemble() # a serializable Argument handle
188
+ arg.posterior(guilty) # 0.4615
189
+ arg.save("guilty.json") # to_json / from_json / save / load — round-trips to identical posteriors
190
+ ```
191
+
192
+ `assemble()` returns an `Argument` handle that owns the cached compile, the
193
+ posterior queries, JSON serialization (typed at the argument level), and the
194
+ metric methods below. See `docs/aspic.md`.
195
+
196
+ ## Metrics
197
+
198
+ `probability_flow.metrics` measures a compiled network and **depends only on
199
+ `core`** — no argumentation vocabulary, so every wrapper reuses it. Every function
200
+ is pure and accepts either a compiled `BayesianNetwork` or a target node (compiled
201
+ on the spot), and results carry your own `Node` objects, never ids.
202
+
203
+ ```python
204
+ from probability_flow import metrics
205
+
206
+ metrics.d_separated_groups(bn, guilty) # independent evidence branches
207
+ metrics.upstream_size(guilty) # size of the evidence base (auto-compiles)
208
+ metrics.circuit_rank(bn) # distance from a tree (0 == polytree)
209
+ metrics.posterior_range(bn, guilty) # manipulability: posterior range over prunings
210
+ bn.max_depth(guilty) # trivial structural ones are also BN methods
211
+ ```
212
+
213
+ The seam covers structure (depth, size), loopiness (circuit rank, polytree test),
214
+ difficulty (posterior gap, log-odds deltas, concentration Gini), and
215
+ manipulability (posterior range). See `docs/metrics.md`.
216
+
217
+ ## Generating argument graphs
218
+
219
+ `probability_flow.aspic.generate` rejection-samples ASPIC arguments that meet a
220
+ difficulty target — random support/attack chains, optional undercutters, axiomatic
221
+ leaves, strict edges, and parent sharing, screened against the real metrics layer.
222
+ This is the substrate for a benchmark of debate scenarios with known ground truth.
223
+
224
+ ```python
225
+ from probability_flow.aspic import generate, StructuralParams, DifficultyTargets
226
+
227
+ arg = generate(
228
+ seed=0,
229
+ structural=StructuralParams(n_support=2, n_attack=1),
230
+ targets=DifficultyTargets(target_posterior=0.7), # reached by calibrating the root prior
231
+ )
232
+ ```
233
+
234
+ The structural shape is set directly; difficulty targets are screened and the
235
+ target posterior is hit by calibrating the root prior. See `docs/generation.md`.
236
+
237
+ ## Visualization (optional, `[viz]`)
238
+
239
+ With the `[viz]` extra installed, a compiled network and an argument both render to
240
+ a matplotlib figure with an in-house layered layout (likelihood-ratio edges
241
+ coloured red→blue by their LR):
242
+
243
+ ```python
244
+ bn.render() # or render(bn) from probability_flow.visualization
245
+ guilty.assemble().render() # the argument view
246
+ ```
247
+
248
+ matplotlib is imported lazily, only when you draw, so importing `probability_flow`
249
+ never pulls it in; without the extra these calls raise an `ImportError` pointing at
250
+ `pip install 'probability-flow[viz]'`.
251
+
252
+ ## Calibration and sensitivities (optional, `[jax]`)
253
+
254
+ With the `[jax]` extra, `probability_flow.aspic.calibrate` differentiates the root
255
+ posterior through the compiled argument: `sensitivities(arg)` gives a
256
+ per-parameter importance signal (via `jax.grad`), and `calibrate_posterior(arg,
257
+ target)` solves for parameters that drive the root posterior to a chosen value.
258
+ JAX is imported lazily inside these calls, so without the extra they raise an
259
+ `ImportError` pointing at `pip install 'probability-flow[jax]'`.
260
+
261
+ ## Layout
262
+
263
+ ```
264
+ probability_flow/ the importable package
265
+ core/
266
+ node.py authoring layer (Node, add_input, compile)
267
+ network.py the compiled, immutable network
268
+ exact.py ExactSolver
269
+ cpd/ the distributions (base contract + the four CPDs)
270
+ bp/ loopy belief propagation (LoopySolver)
271
+ _logmath.py log-space numerics
272
+ aspic/ the ASPIC argument layer: authoring + compilation,
273
+ serialization, a random argument generator, and optional
274
+ JAX posterior calibration
275
+ metrics/ graph metrics (d-separation, depth/size, loopiness,
276
+ difficulty, manipulability)
277
+ visualization/ matplotlib renderers (arg.render() / bn.render()), [viz]
278
+ docs/ design notes (SPEC, DECISIONS, ROADMAP, and more)
279
+ tests/ checked against the exact solver
280
+ demos/ walkthrough notebooks
281
+ ```
282
+
283
+ ## Status and next steps
284
+
285
+ Working today: the build/compile flow, all four distributions, both solvers with
286
+ linear-time structured messages, evidence, the ASPIC argument-compilation layer,
287
+ argument serialization, the metrics seam (d-separation grouping, depth/size,
288
+ loopiness, difficulty, manipulability), a random argument generator with structural
289
+ and difficulty controls, optional matplotlib renderers, and optional JAX-based
290
+ posterior calibration and parameter sensitivities. Planned (see `docs/ROADMAP.md`):
291
+ a core-network serializer, the loopy-BP "topology zoo" robustness harness, and the
292
+ exact manipulability range.
293
+
294
+ ## Learning more
295
+
296
+ The notebooks in `demos/` walk through the library end to end:
297
+ `walkthrough.ipynb` for the core, `noisy_gates_walkthrough.ipynb` for the gate
298
+ distributions, `loopy_bp_walkthrough.ipynb` for belief propagation,
299
+ `visualization_demo.ipynb` for the renderers, `metrics_walkthrough.ipynb` for
300
+ the metrics seam, and `generation_demo.ipynb` for the argument generator. The
301
+ `docs/` directory holds the design rationale, the settled decisions, and the
302
+ roadmap.
303
+ ```
304
+
@@ -0,0 +1,270 @@
1
+ # probability-flow
2
+
3
+ A small discrete Bayesian-network library written from scratch in NumPy, with no
4
+ external inference dependency. The goal is full control over and understanding of
5
+ the inference stack rather than a black box: every distribution and every solver
6
+ is plain, readable code that can be checked against a brute-force reference.
7
+
8
+ On top of that core it carries an **argumentation layer (ASPIC)** that compiles
9
+ typed argument graphs into networks, a domain-free **metrics** seam that measures
10
+ a compiled network, and a **generator** that samples random argument graphs with
11
+ controllable difficulty — the substrate for a benchmark of debate scenarios with
12
+ known ground truth. Two further capabilities are opt-in extras: matplotlib
13
+ **renderers** (`[viz]`) and JAX **posterior calibration and sensitivities**
14
+ (`[jax]`).
15
+
16
+ The whole stack is layered and decoupled: `core/` knows nothing about arguments,
17
+ `metrics/` knows nothing about the domain, and the optional extras pull in nothing
18
+ unless you ask for them.
19
+
20
+ ## Installing
21
+
22
+ ```
23
+ pip install probability-flow # core: NumPy only
24
+ pip install 'probability-flow[viz]' # + matplotlib renderers
25
+ pip install 'probability-flow[jax]' # + JAX calibration / sensitivities
26
+ pip install 'probability-flow[viz,jax]' # both
27
+ ```
28
+
29
+ Python 3.11 or newer. The import name is `probability_flow`; the base install
30
+ depends only on **NumPy** — matplotlib and JAX are optional extras, pulled in only
31
+ by `[viz]` and `[jax]`. Calling a renderer or the calibration module without its
32
+ extra raises an `ImportError` telling you which one to install. `[dev]` adds pytest
33
+ and ruff.
34
+
35
+ From a checkout, the package also runs in place with no install step: the
36
+ importable package is the inner `probability_flow/` directory, so working from the
37
+ repository root (which pytest puts on the path) makes `import probability_flow`
38
+ resolve. Run the tests from this directory with `pytest`. The example notebooks add
39
+ the path themselves, so they run as-is. CI runs ruff and the test suite on Python
40
+ 3.11–3.13 on every push.
41
+
42
+ ## Quickstart
43
+
44
+ ```python
45
+ from probability_flow import Node, ExactSolver, LoopySolver
46
+
47
+ guilty = Node("Guilty") # the target; prior 0.5 by default
48
+ guilty.add_input(Node("Seen at scene"), lr=9) # supporting evidence
49
+ guilty.add_input(Node("Alibi"), lr=0.25) # evidence against (lr < 1)
50
+
51
+ bn = guilty.compile()
52
+ ExactSolver(bn).prob(guilty, 1) # 0.5731, by enumerating the joint
53
+ LoopySolver(bn).prob(guilty, 1) # 0.5731, by message passing
54
+ ```
55
+
56
+ ## Build, compile, solve
57
+
58
+ Inference happens in three stages.
59
+
60
+ 1. **Build.** You assemble a mutable graph of `Node` objects. `add_input` wires
61
+ one node as an input of another and returns the input, so upstream structure
62
+ can be built inline. It takes a single node or a list, and a per-edge weight
63
+ that is forwarded to the node's distribution.
64
+ 2. **Compile.** `target.compile()` walks the graph from the target, checks it
65
+ (acyclic, consistent shapes, one distribution per node), assigns a
66
+ topological order, and freezes everything into an immutable
67
+ `BayesianNetwork`. Authoring is ergonomic; the compiled form is vectorized.
68
+ 3. **Solve.** A solver answers `marginal(node)` and `prob(node, state)` queries
69
+ on the compiled network, optionally conditioned on `evidence`.
70
+
71
+ An edge `Y -> X` declares only that X's distribution is `P(X | Y)`. It is a
72
+ conditioning relationship, not a claim about causation. A causal model points its
73
+ edges with causation (cause to effect); an argument graph points them against it
74
+ (evidence to claim, since the claim is what produces the evidence). The library
75
+ does not care which: it only ever works with `P(node | inputs)`.
76
+
77
+ ## Distributions
78
+
79
+ Each node carries a conditional probability distribution (CPD) describing
80
+ `P(node | its inputs)`. The library distinguishes the combination *rule* (the
81
+ combiner) from the *object* that implements it (the CPD).
82
+
83
+ - **IndependentEvidenceCPD** (the default). Treats inputs as uncorrelated
84
+ sources of evidence: `logit P(node=1) = logit(prior) + sum of log(lr)` over the
85
+ active inputs. Adding weights of evidence is Bayes' rule for independent
86
+ likelihood ratios. Set per edge with `add_input(x, lr=...)`.
87
+ - **NoisyOrCPD**. "Any one cause can fire the effect":
88
+ `P(node=0) = (1 - leak) * product of (1 - activation)` over present causes.
89
+ Declared with `node.noisy_or(leak=...)` and `add_input(cause, activation=...)`.
90
+ - **NoisyAndCPD**. A native conjunction: the effect needs every condition
91
+ present, each succeeding with its own `activation`, with `leak` acting as a
92
+ global inhibitor. Declared with `node.noisy_and(leak=...)`.
93
+ - **TabularCPD**. An arbitrary dense table, the fallback and the reference behind
94
+ every other distribution. Set with `node.set_cpd(TabularCPD(...))`. Its `repr`
95
+ prints the full table as a readable bordered grid.
96
+
97
+ `IndependentEvidence`, `NoisyOr`, and `NoisyAnd` all belong to the same family:
98
+ each input contributes independently, and they differ only in how the
99
+ contributions combine.
100
+
101
+ ## Inference
102
+
103
+ - **ExactSolver**. Brute-force enumeration of the joint distribution. Obviously
104
+ correct, exponential in the number of nodes, and intended for small graphs and
105
+ as the ground truth that everything else is tested against. Supports evidence,
106
+ so colliders and explaining-away can be exercised.
107
+ - **LoopySolver**. Loopy belief propagation. It treats each distribution as a
108
+ factor and passes messages on the factor graph. It is exact on loop-free graphs
109
+ (it converges to the true marginals) and approximate on graphs with loops,
110
+ where it iterates to a fixed point. It has the same query surface as the exact
111
+ solver, plus a bulk `marginals()`, optional `damping` to settle oscillation,
112
+ and per-evidence caching.
113
+
114
+ ## Why it stays small
115
+
116
+ A node with k binary inputs has a table of size 2^k. The design keeps that cost
117
+ off the production path:
118
+
119
+ - `as_tabular()` materializes a distribution's full table. It is used only by the
120
+ exact solver and the tests, on deliberately small graphs, never during normal
121
+ inference.
122
+ - `LoopySolver` talks to a distribution only through two message methods
123
+ (`message_to_output` and `message_to_input`). The engine never inspects a
124
+ distribution's type.
125
+
126
+ This means a structured distribution computes its messages in time linear in the
127
+ number of inputs, without ever building the 2^k table, and the engine does not
128
+ change. These linear-time messages are implemented: exact for the gates
129
+ (`NoisyOr` / `NoisyAnd`), and a hybrid for the default `IndependentEvidence` —
130
+ exact below a fan-in threshold, a capped-bucket approximation above it (within
131
+ 1e-3 of the exact solver). An arbitrary
132
+ `TabularCPD` still falls back to the dense default, which is correct and
133
+ unavoidable. See `docs/fast_messages.md`.
134
+
135
+ ## The ASPIC argument layer
136
+
137
+ `probability_flow.aspic` builds a typed argument out of premises and conclusions
138
+ joined by support / rebut / undermine / undercut / strict edges, then **compiles
139
+ it to an ordinary `BayesianNetwork`**. All argumentation vocabulary lives here;
140
+ `core/` stays free of it (ASPIC is the first of several planned domain wrappers —
141
+ legal, medical, AI-safety — over the same core). Every argumentative edge is a
142
+ method on its downstream node, so the whole graph traverses from the root target.
143
+
144
+ ```python
145
+ from probability_flow.aspic import Premise, Conclusion
146
+
147
+ guilty = Conclusion("Guilty", prior=0.5)
148
+ seen = guilty.support(Premise("Seen at scene", 0.5), lr=9) # supporting argument
149
+ guilty.rebut(Premise("Alibi", 0.5), lr=0.25) # attacking argument (lr < 1)
150
+ guilty.undercut(seen, by=Premise("Unreliable witness", 0.5)) # attack the support *edge*
151
+
152
+ bn = guilty.compile() # an ordinary BayesianNetwork to solve as above
153
+ arg = guilty.assemble() # a serializable Argument handle
154
+ arg.posterior(guilty) # 0.4615
155
+ arg.save("guilty.json") # to_json / from_json / save / load — round-trips to identical posteriors
156
+ ```
157
+
158
+ `assemble()` returns an `Argument` handle that owns the cached compile, the
159
+ posterior queries, JSON serialization (typed at the argument level), and the
160
+ metric methods below. See `docs/aspic.md`.
161
+
162
+ ## Metrics
163
+
164
+ `probability_flow.metrics` measures a compiled network and **depends only on
165
+ `core`** — no argumentation vocabulary, so every wrapper reuses it. Every function
166
+ is pure and accepts either a compiled `BayesianNetwork` or a target node (compiled
167
+ on the spot), and results carry your own `Node` objects, never ids.
168
+
169
+ ```python
170
+ from probability_flow import metrics
171
+
172
+ metrics.d_separated_groups(bn, guilty) # independent evidence branches
173
+ metrics.upstream_size(guilty) # size of the evidence base (auto-compiles)
174
+ metrics.circuit_rank(bn) # distance from a tree (0 == polytree)
175
+ metrics.posterior_range(bn, guilty) # manipulability: posterior range over prunings
176
+ bn.max_depth(guilty) # trivial structural ones are also BN methods
177
+ ```
178
+
179
+ The seam covers structure (depth, size), loopiness (circuit rank, polytree test),
180
+ difficulty (posterior gap, log-odds deltas, concentration Gini), and
181
+ manipulability (posterior range). See `docs/metrics.md`.
182
+
183
+ ## Generating argument graphs
184
+
185
+ `probability_flow.aspic.generate` rejection-samples ASPIC arguments that meet a
186
+ difficulty target — random support/attack chains, optional undercutters, axiomatic
187
+ leaves, strict edges, and parent sharing, screened against the real metrics layer.
188
+ This is the substrate for a benchmark of debate scenarios with known ground truth.
189
+
190
+ ```python
191
+ from probability_flow.aspic import generate, StructuralParams, DifficultyTargets
192
+
193
+ arg = generate(
194
+ seed=0,
195
+ structural=StructuralParams(n_support=2, n_attack=1),
196
+ targets=DifficultyTargets(target_posterior=0.7), # reached by calibrating the root prior
197
+ )
198
+ ```
199
+
200
+ The structural shape is set directly; difficulty targets are screened and the
201
+ target posterior is hit by calibrating the root prior. See `docs/generation.md`.
202
+
203
+ ## Visualization (optional, `[viz]`)
204
+
205
+ With the `[viz]` extra installed, a compiled network and an argument both render to
206
+ a matplotlib figure with an in-house layered layout (likelihood-ratio edges
207
+ coloured red→blue by their LR):
208
+
209
+ ```python
210
+ bn.render() # or render(bn) from probability_flow.visualization
211
+ guilty.assemble().render() # the argument view
212
+ ```
213
+
214
+ matplotlib is imported lazily, only when you draw, so importing `probability_flow`
215
+ never pulls it in; without the extra these calls raise an `ImportError` pointing at
216
+ `pip install 'probability-flow[viz]'`.
217
+
218
+ ## Calibration and sensitivities (optional, `[jax]`)
219
+
220
+ With the `[jax]` extra, `probability_flow.aspic.calibrate` differentiates the root
221
+ posterior through the compiled argument: `sensitivities(arg)` gives a
222
+ per-parameter importance signal (via `jax.grad`), and `calibrate_posterior(arg,
223
+ target)` solves for parameters that drive the root posterior to a chosen value.
224
+ JAX is imported lazily inside these calls, so without the extra they raise an
225
+ `ImportError` pointing at `pip install 'probability-flow[jax]'`.
226
+
227
+ ## Layout
228
+
229
+ ```
230
+ probability_flow/ the importable package
231
+ core/
232
+ node.py authoring layer (Node, add_input, compile)
233
+ network.py the compiled, immutable network
234
+ exact.py ExactSolver
235
+ cpd/ the distributions (base contract + the four CPDs)
236
+ bp/ loopy belief propagation (LoopySolver)
237
+ _logmath.py log-space numerics
238
+ aspic/ the ASPIC argument layer: authoring + compilation,
239
+ serialization, a random argument generator, and optional
240
+ JAX posterior calibration
241
+ metrics/ graph metrics (d-separation, depth/size, loopiness,
242
+ difficulty, manipulability)
243
+ visualization/ matplotlib renderers (arg.render() / bn.render()), [viz]
244
+ docs/ design notes (SPEC, DECISIONS, ROADMAP, and more)
245
+ tests/ checked against the exact solver
246
+ demos/ walkthrough notebooks
247
+ ```
248
+
249
+ ## Status and next steps
250
+
251
+ Working today: the build/compile flow, all four distributions, both solvers with
252
+ linear-time structured messages, evidence, the ASPIC argument-compilation layer,
253
+ argument serialization, the metrics seam (d-separation grouping, depth/size,
254
+ loopiness, difficulty, manipulability), a random argument generator with structural
255
+ and difficulty controls, optional matplotlib renderers, and optional JAX-based
256
+ posterior calibration and parameter sensitivities. Planned (see `docs/ROADMAP.md`):
257
+ a core-network serializer, the loopy-BP "topology zoo" robustness harness, and the
258
+ exact manipulability range.
259
+
260
+ ## Learning more
261
+
262
+ The notebooks in `demos/` walk through the library end to end:
263
+ `walkthrough.ipynb` for the core, `noisy_gates_walkthrough.ipynb` for the gate
264
+ distributions, `loopy_bp_walkthrough.ipynb` for belief propagation,
265
+ `visualization_demo.ipynb` for the renderers, `metrics_walkthrough.ipynb` for
266
+ the metrics seam, and `generation_demo.ipynb` for the argument generator. The
267
+ `docs/` directory holds the design rationale, the settled decisions, and the
268
+ roadmap.
269
+ ```
270
+
@@ -0,0 +1,41 @@
1
+ """probability_flow: a from-scratch, modular discrete Bayesian-network library.
2
+
3
+ The design is in `docs/SPEC.md`; settled choices in `docs/DECISIONS.md` and
4
+ milestones in `docs/ROADMAP.md`. The public API currently lives in
5
+ `probability_flow.core` and is re-exported here for convenience:
6
+
7
+ from probability_flow import Node, ExactSolver
8
+ """
9
+ from importlib.metadata import PackageNotFoundError, version
10
+
11
+ from .core import (
12
+ CPD,
13
+ BayesianNetwork,
14
+ CompiledCPD,
15
+ ExactSolver,
16
+ IndependentEvidenceCPD,
17
+ LoopySolver,
18
+ Node,
19
+ NoisyAndCPD,
20
+ NoisyOrCPD,
21
+ TabularCPD,
22
+ )
23
+
24
+ try:
25
+ __version__ = version("probability-flow")
26
+ except PackageNotFoundError: # running from a source checkout, not installed
27
+ __version__ = "0.0.0+unknown"
28
+
29
+ __all__ = [
30
+ "__version__",
31
+ "Node",
32
+ "BayesianNetwork",
33
+ "CompiledCPD",
34
+ "ExactSolver",
35
+ "LoopySolver",
36
+ "CPD",
37
+ "TabularCPD",
38
+ "IndependentEvidenceCPD",
39
+ "NoisyOrCPD",
40
+ "NoisyAndCPD",
41
+ ]
@@ -0,0 +1,21 @@
1
+ """probability_flow.aspic: the ASPIC argument-compilation layer.
2
+
3
+ The first of several planned domain wrappers (legal, medical, AI-safety) over the
4
+ pure-BN `core`. Build an argument out of premises, conclusions, and attacks, then
5
+ `compile()` the target to an ordinary `BayesianNetwork`. See `docs/aspic.md`.
6
+
7
+ from probability_flow.aspic import Premise, Axiom, Conclusion
8
+ """
9
+ from .argument import ArgumentWarning, Axiom, Conclusion, Premise
10
+ from .generate import (
11
+ ArgumentGenerator,
12
+ DifficultyTargets,
13
+ StructuralParams,
14
+ generate,
15
+ )
16
+ from .handle import Argument
17
+
18
+ __all__ = [
19
+ "Premise", "Axiom", "Conclusion", "ArgumentWarning", "Argument",
20
+ "ArgumentGenerator", "StructuralParams", "DifficultyTargets", "generate",
21
+ ]