gabp-sparse-inv 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. gabp_sparse_inv-0.3.0/LICENSE +21 -0
  2. gabp_sparse_inv-0.3.0/PKG-INFO +545 -0
  3. gabp_sparse_inv-0.3.0/README.md +496 -0
  4. gabp_sparse_inv-0.3.0/gabp_sparse_inv/__init__.py +146 -0
  5. gabp_sparse_inv-0.3.0/gabp_sparse_inv/_linalg.py +41 -0
  6. gabp_sparse_inv-0.3.0/gabp_sparse_inv/autodiff.py +226 -0
  7. gabp_sparse_inv-0.3.0/gabp_sparse_inv/bench/__init__.py +1 -0
  8. gabp_sparse_inv-0.3.0/gabp_sparse_inv/bench/confirmatory.py +432 -0
  9. gabp_sparse_inv-0.3.0/gabp_sparse_inv/bench/deq_breakeven.py +485 -0
  10. gabp_sparse_inv-0.3.0/gabp_sparse_inv/bench/deq_cross_eval.py +508 -0
  11. gabp_sparse_inv-0.3.0/gabp_sparse_inv/bench/deq_gradient_isolation.py +355 -0
  12. gabp_sparse_inv-0.3.0/gabp_sparse_inv/bench/deq_gradient_isolation_analysis.py +345 -0
  13. gabp_sparse_inv-0.3.0/gabp_sparse_inv/bench/gmrf_scaling.py +195 -0
  14. gabp_sparse_inv-0.3.0/gabp_sparse_inv/bench/harness.py +501 -0
  15. gabp_sparse_inv-0.3.0/gabp_sparse_inv/bench/matched_compute.py +234 -0
  16. gabp_sparse_inv-0.3.0/gabp_sparse_inv/bench/maze_cross_eval.py +165 -0
  17. gabp_sparse_inv-0.3.0/gabp_sparse_inv/bench/maze_extrapolation.py +339 -0
  18. gabp_sparse_inv-0.3.0/gabp_sparse_inv/bench/maze_symmetric_swap_control.py +468 -0
  19. gabp_sparse_inv-0.3.0/gabp_sparse_inv/bench/metrics.py +492 -0
  20. gabp_sparse_inv-0.3.0/gabp_sparse_inv/bench/nonsym_stability.py +254 -0
  21. gabp_sparse_inv-0.3.0/gabp_sparse_inv/bench/operator_cross_eval_analysis.py +452 -0
  22. gabp_sparse_inv-0.3.0/gabp_sparse_inv/bench/phase1_analysis.py +340 -0
  23. gabp_sparse_inv-0.3.0/gabp_sparse_inv/bench/precision.py +517 -0
  24. gabp_sparse_inv-0.3.0/gabp_sparse_inv/bench/run.py +207 -0
  25. gabp_sparse_inv-0.3.0/gabp_sparse_inv/bench/seeds.py +127 -0
  26. gabp_sparse_inv-0.3.0/gabp_sparse_inv/bench/stability.py +197 -0
  27. gabp_sparse_inv-0.3.0/gabp_sparse_inv/chain.py +140 -0
  28. gabp_sparse_inv-0.3.0/gabp_sparse_inv/demos/__init__.py +1 -0
  29. gabp_sparse_inv-0.3.0/gabp_sparse_inv/demos/deltanet_chunk.py +258 -0
  30. gabp_sparse_inv-0.3.0/gabp_sparse_inv/demos/deq_fixedpoint.py +354 -0
  31. gabp_sparse_inv-0.3.0/gabp_sparse_inv/demos/maze_baselines.py +380 -0
  32. gabp_sparse_inv-0.3.0/gabp_sparse_inv/demos/maze_grid.py +335 -0
  33. gabp_sparse_inv-0.3.0/gabp_sparse_inv/demos/maze_tree.py +256 -0
  34. gabp_sparse_inv-0.3.0/gabp_sparse_inv/generators.py +573 -0
  35. gabp_sparse_inv-0.3.0/gabp_sparse_inv/gmrf.py +369 -0
  36. gabp_sparse_inv-0.3.0/gabp_sparse_inv/gmrf_grid.py +180 -0
  37. gabp_sparse_inv-0.3.0/gabp_sparse_inv/junction.py +752 -0
  38. gabp_sparse_inv-0.3.0/gabp_sparse_inv/junction_autodiff.py +373 -0
  39. gabp_sparse_inv-0.3.0/gabp_sparse_inv/layout.py +832 -0
  40. gabp_sparse_inv-0.3.0/gabp_sparse_inv/nonsym.py +354 -0
  41. gabp_sparse_inv-0.3.0/gabp_sparse_inv/nonsym_junction.py +397 -0
  42. gabp_sparse_inv-0.3.0/gabp_sparse_inv/sampling.py +134 -0
  43. gabp_sparse_inv-0.3.0/gabp_sparse_inv/star.py +140 -0
  44. gabp_sparse_inv-0.3.0/gabp_sparse_inv/tree.py +248 -0
  45. gabp_sparse_inv-0.3.0/gabp_sparse_inv.egg-info/PKG-INFO +545 -0
  46. gabp_sparse_inv-0.3.0/gabp_sparse_inv.egg-info/SOURCES.txt +86 -0
  47. gabp_sparse_inv-0.3.0/gabp_sparse_inv.egg-info/dependency_links.txt +1 -0
  48. gabp_sparse_inv-0.3.0/gabp_sparse_inv.egg-info/requires.txt +6 -0
  49. gabp_sparse_inv-0.3.0/gabp_sparse_inv.egg-info/top_level.txt +1 -0
  50. gabp_sparse_inv-0.3.0/pyproject.toml +54 -0
  51. gabp_sparse_inv-0.3.0/setup.cfg +4 -0
  52. gabp_sparse_inv-0.3.0/tests/test_autodiff.py +350 -0
  53. gabp_sparse_inv-0.3.0/tests/test_chain.py +172 -0
  54. gabp_sparse_inv-0.3.0/tests/test_confirmatory.py +249 -0
  55. gabp_sparse_inv-0.3.0/tests/test_deltanet_chunk.py +123 -0
  56. gabp_sparse_inv-0.3.0/tests/test_deq_breakeven.py +124 -0
  57. gabp_sparse_inv-0.3.0/tests/test_deq_cross_eval.py +96 -0
  58. gabp_sparse_inv-0.3.0/tests/test_deq_fixedpoint.py +160 -0
  59. gabp_sparse_inv-0.3.0/tests/test_deq_gradient_isolation.py +65 -0
  60. gabp_sparse_inv-0.3.0/tests/test_deq_gradient_isolation_analysis.py +96 -0
  61. gabp_sparse_inv-0.3.0/tests/test_double_backward.py +133 -0
  62. gabp_sparse_inv-0.3.0/tests/test_generators.py +192 -0
  63. gabp_sparse_inv-0.3.0/tests/test_gmrf.py +222 -0
  64. gabp_sparse_inv-0.3.0/tests/test_gmrf_grid.py +196 -0
  65. gabp_sparse_inv-0.3.0/tests/test_junction.py +618 -0
  66. gabp_sparse_inv-0.3.0/tests/test_junction_autodiff.py +237 -0
  67. gabp_sparse_inv-0.3.0/tests/test_junction_batched.py +188 -0
  68. gabp_sparse_inv-0.3.0/tests/test_matched_compute.py +130 -0
  69. gabp_sparse_inv-0.3.0/tests/test_maze_baselines.py +118 -0
  70. gabp_sparse_inv-0.3.0/tests/test_maze_cross_eval.py +88 -0
  71. gabp_sparse_inv-0.3.0/tests/test_maze_extrapolation.py +110 -0
  72. gabp_sparse_inv-0.3.0/tests/test_maze_grid.py +72 -0
  73. gabp_sparse_inv-0.3.0/tests/test_maze_symmetric_swap_control.py +151 -0
  74. gabp_sparse_inv-0.3.0/tests/test_maze_tree.py +56 -0
  75. gabp_sparse_inv-0.3.0/tests/test_nonsym.py +297 -0
  76. gabp_sparse_inv-0.3.0/tests/test_nonsym_junction.py +314 -0
  77. gabp_sparse_inv-0.3.0/tests/test_nonsym_stability.py +80 -0
  78. gabp_sparse_inv-0.3.0/tests/test_nonsym_tree.py +307 -0
  79. gabp_sparse_inv-0.3.0/tests/test_operator_cross_eval_analysis.py +132 -0
  80. gabp_sparse_inv-0.3.0/tests/test_ordering.py +111 -0
  81. gabp_sparse_inv-0.3.0/tests/test_packaging.py +27 -0
  82. gabp_sparse_inv-0.3.0/tests/test_phase1_analysis.py +126 -0
  83. gabp_sparse_inv-0.3.0/tests/test_precision.py +161 -0
  84. gabp_sparse_inv-0.3.0/tests/test_sampling.py +111 -0
  85. gabp_sparse_inv-0.3.0/tests/test_seed_guard.py +89 -0
  86. gabp_sparse_inv-0.3.0/tests/test_stability.py +75 -0
  87. gabp_sparse_inv-0.3.0/tests/test_star.py +171 -0
  88. gabp_sparse_inv-0.3.0/tests/test_tree.py +306 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Vanshdeep Sehrawat
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,545 @@
1
+ Metadata-Version: 2.4
2
+ Name: gabp-sparse-inv
3
+ Version: 0.3.0
4
+ Summary: Differentiable selected inverse (plus log-det, Gaussian sampling, and solve) for sparse block-structured SPD and non-symmetric matrices, via Gaussian belief propagation / the Takahashi recurrence.
5
+ Author-email: Vanshdeep Sehrawat <vanshdeep.sehrawat@gmail.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Vanshdeep Sehrawat
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/vanshsehrawat14/gabp-sparse-inv
29
+ Project-URL: Repository, https://github.com/vanshsehrawat14/gabp-sparse-inv
30
+ Project-URL: Documentation, https://github.com/vanshsehrawat14/gabp-sparse-inv#readme
31
+ Project-URL: Bug Tracker, https://github.com/vanshsehrawat14/gabp-sparse-inv/issues
32
+ Project-URL: Changelog, https://github.com/vanshsehrawat14/gabp-sparse-inv/blob/main/CHANGELOG.md
33
+ Keywords: selected-inverse,selected-inversion,sparse-linear-algebra,gaussian-belief-propagation,takahashi,differentiable,pytorch,gmrf
34
+ Classifier: Development Status :: 3 - Alpha
35
+ Classifier: Intended Audience :: Science/Research
36
+ Classifier: License :: OSI Approved :: MIT License
37
+ Classifier: Programming Language :: Python :: 3
38
+ Classifier: Topic :: Scientific/Engineering :: Mathematics
39
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
40
+ Requires-Python: >=3.12
41
+ Description-Content-Type: text/markdown
42
+ License-File: LICENSE
43
+ Requires-Dist: torch>=2.2
44
+ Requires-Dist: numpy>=1.24
45
+ Provides-Extra: dev
46
+ Requires-Dist: pytest>=7; extra == "dev"
47
+ Requires-Dist: pytest-cov>=4; extra == "dev"
48
+ Dynamic: license-file
49
+
50
+ # gabp-sparse-inv
51
+
52
+ Sparse selected inverse kernels for block-structured matrices (SPD and structured
53
+ non-symmetric). For each supported pattern the package computes only the blocks of `A^-1`
54
+ that lie on `A`'s own (or its filled) sparsity pattern, without forming the dense inverse.
55
+
56
+ The organizing principle: when the block structure of `A` is a **tree**, selected
57
+ inversion is a two-pass collect/distribute schedule that is exactly **Gaussian
58
+ Belief Propagation** and equals the Takahashi recurrence. See
59
+ [docs/derivations.md](docs/derivations.md) for the theorem and proofs.
60
+
61
+ New to the codebase? [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) is the module map, the
62
+ conventions, and the pattern for adding a kernel.
63
+
64
+ ## Statement of need
65
+
66
+ Matrix inversion increasingly sits *inside* differentiable models: deep-equilibrium layers,
67
+ gated linear attention and the delta rule, and other implicit / fixed-point layers place a
68
+ structured linear solve in the forward pass and its adjoint in the backward pass. The dense
69
+ inverse is `O(N^3)`; the on-pattern *selected* inverse of a low-treewidth matrix is `O(n)`
70
+ (trees) to `O(n^1.5)` (2-D grids). Existing selected-inversion libraries (SelInv, PEXSI) target
71
+ compiled HPC and are not differentiable, and general sparse solvers return factorizations or
72
+ solves rather than the on-pattern inverse blocks with gradients. `gabp-sparse-inv` provides
73
+ drop-in PyTorch operators that return exact on-pattern inverse blocks, plus the log-determinant,
74
+ Gaussian samples, and solves that share the same factorization, with exact gradients at the same
75
+ asymptotic cost as the forward pass, across one uniform symmetric / non-symmetric interface. The
76
+ full statement of need is in [paper/joss/paper.md](paper/joss/paper.md).
77
+
78
+ Implemented and tested kernels:
79
+
80
+ - **Chain** (block-tridiagonal; a path). Block `LDL^T` factorization + Takahashi
81
+ back-recursion. `O(L * b^3)` time, `O(L * b^2)` storage for `L` blocks of size
82
+ `b` - linear in `N = L * b` at fixed block size.
83
+ - **Star** (block-arrowhead; a depth-1 tree). One center block coupled to `K`
84
+ leaf blocks, no leaf-leaf coupling. Leaves are eliminated in parallel.
85
+ `O(K * b^3)` time, `O(K * b^2)` storage.
86
+ - **Tree** (arbitrary rooted tree). The general kernel: a node `parent` array and
87
+ one edge block per non-root node; collect (leaves→root) then distribute
88
+ (root→leaves). Chain and star are its path and depth-1 special cases.
89
+ `O(n * b^3)` time, `O(n * b^2)` storage for `n` nodes.
90
+
91
+ - **Differentiable tree** (`selinv_tree`). The selected inverse with a hand-written
92
+ analytic backward: the reverse two-pass is itself a collect/distribute on the same
93
+ elimination tree (selected inversion is *self-adjoint*), `O((|V|+|E|) b^3)` like the
94
+ forward. Gradients flow to `diag` and `edge`; an optional level-set **batched** path
95
+ mirrors the forward batching. Proved in [docs/derivations.md](docs/derivations.md)
96
+ §8; the gradient identity itself is folklore (Dwyer-Macphail / Giles); the
97
+ contribution is the structure-preserving schedule + batched kernel.
98
+
99
+ - **Non-symmetric chain** (`selected_inverse_bidiag` / `selinv_bidiag`). The selected
100
+ inverse of a general (non-symmetric) block **lower-bidiagonal** matrix `M` -- the
101
+ non-symmetric analogue of the chain: `G_ii = M_ii^-1` and
102
+ `G_{i+1,i} = -M_{i+1,i+1}^-1 M_{i+1,i} M_ii^-1` on `M`'s pattern, with a hand-written
103
+ analytic backward (`selinv_bidiag`). Fully local -- no collect/distribute sweep -- so
104
+ forward and backward are each one batched block op, `O(n * b^3)` time, `O(n * b^2)`
105
+ storage. The first rung of the non-symmetric ladder; see
106
+ [docs/derivations.md](docs/derivations.md) §9.
107
+
108
+ - **Non-symmetric tree** (`selected_inverse_nonsym_tree`). The zero-fill non-symmetric rung
109
+ between the bidiagonal case and the general LU: a general block matrix whose off-diagonal
110
+ graph is a tree but whose two directed edge blocks are independent (`M_{p,v} != M_{v,p}^T`).
111
+ A two-sided Takahashi recurrence returns each node diagonal and both cross blocks exactly;
112
+ functional / autograd-traceable (first- and higher-order), and it reduces block-for-block to
113
+ the SPD tree kernel in the symmetric case. See [docs/derivations.md](docs/derivations.md) §9.5.
114
+
115
+ - **DeltaNet chunk inverse** (`selected_inverse_tril` / `selinv_tril`). The dense
116
+ triangular instance `T = (I - A)^-1` for strictly-lower `A` -- the chunk inverse of
117
+ DeltaNet / gated linear attention -- with the analytic self-adjoint backward
118
+ `bar_A = tril(T^T bar_T T^T, -1)` (`docs/derivations.md` §9.4). Here a blocked
119
+ triangular solve is the baseline, so the contribution is the explicit analytic backward.
120
+
121
+ - **Junction tree / general sparse** (`selected_inverse_junction` / `selinv_junction`).
122
+ The general sparse SPD case: an **arbitrary** block sparsity pattern, symbolically
123
+ completed to its chordal (filled) pattern `S = pattern(L + L^T)` by a min-degree
124
+ elimination order, then the multi-neighbour Takahashi recurrence (sparse block Cholesky +
125
+ clique back-substitution). Trees are the zero-fill special case. The functional forward is
126
+ autograd-differentiable, so `selinv_junction` yields exact gradients by reverse-mode
127
+ through the `S`-local self-adjoint schedule (`docs/derivations.md` §2.2, §8.4).
128
+ `O((|V| + fill) * b^3)` time.
129
+
130
+ The **junction-tree** kernel (above) ships the general sparse SPD forward and its autograd
131
+ adjoint (the §8.4 schedule realized through reverse-mode AD). The **tape-free hand-written
132
+ analytic** junction backward is available as `selinv_junction_analytic`; the non-symmetric
133
+ counterpart is `selinv_nonsym_junction_analytic`
134
+ (`gabp_sparse_inv/junction_autodiff.py`). They run the explicit reverse clique recurrence
135
+ (`docs/derivations.md` §8.5 / §10.3) with no autograd tape and are validated against the
136
+ functional path. The **general non-symmetric** selected inverse (LU / Erisman-Tinney;
137
+ `selected_inverse_nonsym_junction` / `selinv_nonsym_junction`, forward + autograd adjoint,
138
+ no pivoting) and its **solve sibling** `nonsym_junction_solve` (`A⁻¹b` / `A⁻ᵀb`) are also
139
+ included. The fixed-point and maze demonstrations are documented in [docs/DEQ.md](docs/DEQ.md)
140
+ and [docs/MAZE.md](docs/MAZE.md).
141
+
142
+ ## Install
143
+
144
+ Linux / macOS:
145
+
146
+ ```bash
147
+ python3.12 -m venv .venv && source .venv/bin/activate
148
+ pip install torch numpy pytest
149
+ pip install -e .
150
+ ```
151
+
152
+ Windows / PowerShell:
153
+
154
+ ```powershell
155
+ py -3.12 -m venv .venv; .\.venv\Scripts\Activate.ps1
156
+ pip install torch numpy pytest
157
+ pip install -e .
158
+ ```
159
+
160
+ GPU paths are used automatically when `torch.cuda.is_available()`.
161
+
162
+ ## Quickstart
163
+
164
+ ```python
165
+ import torch
166
+ from gabp_sparse_inv import random_spd_chain, selected_inverse_chain
167
+
168
+ bt = random_spd_chain(num_blocks=8, block_size=4, seed=0, diag_load=2.0)
169
+
170
+ G_diag, G_lower = selected_inverse_chain(bt.diag, bt.lower, check=True)
171
+
172
+ inv = torch.linalg.inv(bt.to_dense())
173
+ assert torch.allclose(G_diag[0], inv[0:4, 0:4], atol=1e-10)
174
+ ```
175
+
176
+ Inputs support optional leading batch dimensions: `[..., L, b, b]`. The chain
177
+ dimension `L` is sequential; each `b x b` block operation is batched over the
178
+ leading dimensions.
179
+
180
+ The **star** kernel takes the center block, the stacked leaf blocks, and the
181
+ center->leaf couplings, and returns the center, leaf, and cross inverse blocks:
182
+
183
+ ```python
184
+ from gabp_sparse_inv import random_spd_star, selected_inverse_star
185
+
186
+ st = random_spd_star(num_leaves=16, block_size=4, seed=0, diag_load=2.0)
187
+
188
+ # G_center = (A^-1)_00 ; G_leaf[j] = (A^-1)_jj ; G_cross[j] = (A^-1)_{j,0}
189
+ G_center, G_leaf, G_cross = selected_inverse_star(
190
+ st.center, st.leaf_diag, st.coupling, check=True
191
+ )
192
+
193
+ inv = torch.linalg.inv(st.to_dense())
194
+ assert torch.allclose(G_center, inv[0:4, 0:4], atol=1e-10)
195
+ ```
196
+
197
+ The off-pattern leaf-leaf inverse blocks `(A^-1)_{jk}` (`j != k`) are nonzero but
198
+ outside the selected pattern and are never formed.
199
+
200
+ The **tree** kernel takes the stacked node blocks, one edge block per non-root node
201
+ (`edge[v] = A_{parent(v), v}`), and the `parent` array:
202
+
203
+ ```python
204
+ from gabp_sparse_inv import random_spd_tree, selected_inverse_tree
205
+
206
+ bt = random_spd_tree(num_nodes=12, block_size=3, seed=0, diag_load=2.0, kind="random")
207
+
208
+ # G_diag[v] = (A^-1)_vv ; G_edge[v] = (A^-1)_{parent(v), v} (root slot is zero)
209
+ G_diag, G_edge = selected_inverse_tree(bt.diag, bt.edge, bt.parent, check=True)
210
+
211
+ inv = torch.linalg.inv(bt.to_dense())
212
+ assert torch.allclose(G_diag[0], inv[0:3, 0:3], atol=1e-10)
213
+ ```
214
+
215
+ `kind` selects the topology (`random`/`path`/`star`/`balanced`), or pass an explicit
216
+ `parent` array. The path and depth-1 trees reproduce the chain and star kernels
217
+ block-for-block (a root-invariance check, since the chain roots the path at its last
218
+ node and the tree roots it at node 0).
219
+
220
+ ### General sparse selected inverse (junction tree)
221
+
222
+ `selected_inverse_junction` handles an **arbitrary** block sparsity pattern. Pass the node
223
+ diagonals, the lower-triangular off-diagonal blocks (`edge_index` columns `(i, j)` with
224
+ `i > j`, `edge_val[k] = A_{i,j}`), and an optional elimination order (default: greedy
225
+ min-degree). It returns the selected inverse on the **filled** pattern `S`, a superset of
226
+ the input pattern wherever elimination creates fill:
227
+
228
+ ```python
229
+ import torch
230
+ from gabp_sparse_inv import random_spd_graph, grid_edges, selected_inverse_junction
231
+
232
+ # A 3x3 grid is loopy (treewidth 3): elimination fills in, unlike a tree.
233
+ sp = random_spd_graph(num_nodes=9, edges=grid_edges(3, 3), block_size=2, seed=0)
234
+
235
+ G_diag, S_index, G_lower = selected_inverse_junction(
236
+ sp.diag, sp.edge_index, sp.edge_val, check=True
237
+ )
238
+
239
+ inv = torch.linalg.inv(sp.to_dense())
240
+ assert torch.allclose(G_diag[0], inv[0:2, 0:2], atol=1e-10) # node-0 diagonal block
241
+ assert S_index.shape[1] > sp.edge_index.shape[1] # fill: S grew past the input
242
+ ```
243
+
244
+ `selinv_junction` is the autograd-connected form: gradients of any loss over `G_diag` /
245
+ `G_lower` flow to `diag` and `edge_val` through the self-adjoint `S`-local schedule
246
+ (`docs/derivations.md` §8.4). It reduces block-for-block to the tree kernel at zero fill.
247
+ Pass `batched=True` (to either entry point) for the level-set path: the Python loop runs
248
+ over elimination *levels* (`O(tree height)`: `~√n` on a 2-D grid) instead of nodes, each
249
+ level a few batched `index_add` block ops, the junction analogue of
250
+ `selected_inverse_tree(batched=True)`. Identical result and gradients (it stays functional,
251
+ so autograd gives the same backward); it amortizes kernel-launch latency on GPU.
252
+
253
+ `junction_solve` is the differentiable sparse SPD linear solve `x = A⁻¹ b` on the same
254
+ pattern, sharing one `LDL^T` factorization with the selected inverse:
255
+
256
+ ```python
257
+ from gabp_sparse_inv import random_spd_graph, grid_edges, junction_solve
258
+
259
+ sp = random_spd_graph(num_nodes=9, edges=grid_edges(3, 3), block_size=2, seed=0)
260
+ b = torch.randn(9, 2) # [n, b] (or [n, b, k] for k RHS)
261
+ x = junction_solve(sp.diag, sp.edge_index, sp.edge_val, b, check=True)
262
+ assert torch.allclose(x.reshape(-1), torch.linalg.solve(sp.to_dense(), b.reshape(-1)))
263
+ ```
264
+
265
+ `junction_logdet` returns `log det A` straight from the same `LDL^T` factorization
266
+ (differentiable, the junction sibling of `tree_logdet`):
267
+
268
+ ```python
269
+ from gabp_sparse_inv import junction_logdet
270
+
271
+ ld = junction_logdet(sp.diag, sp.edge_index, sp.edge_val)
272
+ assert torch.allclose(ld, torch.logdet(sp.to_dense()))
273
+ ```
274
+
275
+ ### Differentiable selected inverse
276
+
277
+ `selinv_tree` is the autograd-connected tree kernel: gradients of any loss over the
278
+ selected blocks flow back to the input blocks `diag` and `edge` through the analytic
279
+ two-pass backward (no autograd tape over the per-node loop).
280
+
281
+ ```python
282
+ import torch
283
+ from gabp_sparse_inv import random_spd_tree, selinv_tree
284
+
285
+ bt = random_spd_tree(num_nodes=64, block_size=3, seed=0, diag_load=2.0, kind="balanced")
286
+ diag = bt.diag.clone().requires_grad_(True)
287
+ edge = bt.edge.clone().requires_grad_(True)
288
+
289
+ G_diag, G_edge = selinv_tree(diag, edge, bt.parent) # or batched=True
290
+ loss = torch.diagonal(G_diag, dim1=-2, dim2=-1).sum() # sum of marginal variances
291
+ loss.backward()
292
+ # diag.grad, edge.grad are the exact on-pattern cotangents (gradcheck-verified).
293
+ ```
294
+
295
+ **First-order only** for `selinv_tree` (the hand-written analytic backward). For
296
+ Hessian-vector products use the functional junction kernels or
297
+ `selected_inverse_tree(batched=True)`; both pass `gradgradcheck`
298
+ (`tests/test_double_backward.py`). `batched=True` uses the level-set path for forward
299
+ and backward; on CUDA it amortizes kernel-launch latency (timing not benchmarked here).
300
+
301
+ ### Non-symmetric selected inverse
302
+
303
+ The non-symmetric ladder is hot-swappable with the SPD ops above. The block **lower-bidiagonal**
304
+ case (`selinv_bidiag`) is the non-symmetric analogue of the chain: `G_ii` and `G_{i+1,i}` on
305
+ `M`'s pattern, fully local:
306
+
307
+ ```python
308
+ from gabp_sparse_inv import random_nonsym_bidiag, selinv_bidiag
309
+
310
+ M = random_nonsym_bidiag(num_blocks=8, block_size=3, seed=0, diag_load=2.0)
311
+ G_diag, G_lower = selinv_bidiag(M.diag, M.lower) # general blocks, no SPD assumption
312
+ inv = torch.linalg.inv(M.to_dense())
313
+ assert torch.allclose(G_diag[0], inv[0:3, 0:3], atol=1e-10)
314
+ ```
315
+
316
+ The dense triangular chunk inverse `T = (I − A)⁻¹` for strictly-lower `A` (the DeltaNet /
317
+ gated-linear-attention primitive) is `selinv_tril`, with the analytic self-adjoint backward
318
+ `bar_A = tril(Tᵀ bar_T Tᵀ, −1)`:
319
+
320
+ ```python
321
+ from gabp_sparse_inv import selinv_tril
322
+
323
+ A = torch.tril(torch.randn(6, 6, dtype=torch.float64), -1) # a chunk operator
324
+ T = selinv_tril(A) # T = (I - A)^{-1}, differentiable
325
+ assert torch.allclose(T, torch.linalg.inv(torch.eye(6, dtype=torch.float64) - A), atol=1e-12)
326
+ ```
327
+
328
+ `nonsym_junction_solve` is the general non-symmetric sparse solve `A⁻¹b` (and `A⁻ᵀb`, the
329
+ DEQ/implicit-differentiation adjoint) on the filled `L+U` pattern. Pass **independent** lower and
330
+ upper edge blocks. On symmetric input (`edge_upper = edge_lower.mT`) it matches `junction_solve`:
331
+
332
+ ```python
333
+ from gabp_sparse_inv import random_spd_graph, grid_edges, junction_solve, nonsym_junction_solve
334
+
335
+ sp = random_spd_graph(num_nodes=9, edges=grid_edges(3, 3), block_size=2, seed=0)
336
+ rhs = torch.randn(9, 2, dtype=torch.float64)
337
+ x = nonsym_junction_solve(sp.diag, sp.edge_index, sp.edge_val, sp.edge_val.mT, rhs)
338
+ assert torch.allclose(x, junction_solve(sp.diag, sp.edge_index, sp.edge_val, rhs), atol=1e-10)
339
+
340
+ # transpose=True reuses the same LDU factors transposed -- the exact DEQ backward (A^T u = g).
341
+ u = nonsym_junction_solve(sp.diag, sp.edge_index, sp.edge_val, sp.edge_val.mT, rhs, transpose=True)
342
+ ```
343
+
344
+ The full selected inverse on the `L+U` pattern is `selinv_nonsym_junction` (forward + adjoint), and
345
+ the zero-fill tree rung is `selected_inverse_nonsym_tree`; both keep the two directed edge blocks
346
+ independent. See [docs/derivations.md](docs/derivations.md) §9-§10.
347
+
348
+ ### Gaussian sampling
349
+
350
+ `sample_gaussian_tree` / `sample_gaussian_junction` draw `x ~ N(0, A⁻¹)` from any tree- or
351
+ junction-structured SPD **precision** `A`, reusing the same factorization (covariance is exactly
352
+ `A⁻¹`, verified by the deterministic transform on the standard basis):
353
+
354
+ ```python
355
+ from gabp_sparse_inv import random_spd_tree, sample_gaussian_tree
356
+
357
+ bt = random_spd_tree(num_nodes=16, block_size=2, seed=0, diag_load=2.0, kind="balanced")
358
+ x = sample_gaussian_tree(bt.diag, bt.edge, bt.parent, num_samples=8) # [num_samples, n, b]
359
+ ```
360
+
361
+ `junction_logdet` / `tree_logdet` (above) and these samplers are the statistical ops that fall out
362
+ of the shared `LDL^T` factorization. See [docs/APPLICATIONS.md](docs/APPLICATIONS.md).
363
+
364
+ ### Application: hierarchical tree-GMRF learning
365
+
366
+ `gabp_sparse_inv/gmrf.py` builds on `selinv_tree` to learn the hyperparameters of a
367
+ hierarchical (tree-structured) Gaussian Markov random field by exact marginal
368
+ likelihood and a posterior-variance objective, all `O(n)`, where a dense-autograd
369
+ baseline is `O(N^3)` time / `O(N^2)` memory. The **batched** schedule (`batched=True`)
370
+ beats a naive dense-autograd baseline at every measured size on CPU (113× at n=1023 in one
371
+ fp64 / 1-thread run with 16 fields; a diagnostic, not CI-gated; see
372
+ [docs/APPLICATIONS.md](docs/APPLICATIONS.md)). The per-node reference loop is slower than
373
+ dense at small n; batched is the path intended for scale.
374
+
375
+ ```python
376
+ from gabp_sparse_inv import sample_tree_gmrf, fit_marginal_likelihood
377
+
378
+ parent = [-1, 0, 0, 1, 1, 2, 2]
379
+ y = sample_tree_gmrf(parent, a=0.7, kappa=1.5, root_prec=2.0, seed=0)[None] # one field
380
+ theta = fit_marginal_likelihood(parent, y, steps=300) # recovers a, kappa, root_prec, sigma2
381
+ ```
382
+
383
+ ```bash
384
+ python -m gabp_sparse_inv.bench.gmrf_scaling --values 127 255 511 1023 2047
385
+ ```
386
+
387
+ The **loopy/grid** counterpart (`gabp_sparse_inv/gmrf_grid.py`) ports the same model to an
388
+ arbitrary graph via the junction kernel: a CAR precision `Q = kappa (I + a L)` with exact
389
+ marginal likelihood (`junction_marginal_log_likelihood`) and posterior marginal variances
390
+ (`junction_posterior_marginal_variances`). The cycles are handled *exactly*, not iteratively.
391
+
392
+ ```python
393
+ from gabp_sparse_inv import grid_gmrf_precision, fit_grid_marginal_likelihood
394
+
395
+ diag, edge_index, edge_val = grid_gmrf_precision(rows=8, cols=8, kappa=1.5, a=0.6)
396
+ # ... sample/observe y of shape [..., n, 1] ...
397
+ theta = fit_grid_marginal_likelihood(8, 8, y, steps=200) # recovers kappa, a, sigma2
398
+ ```
399
+
400
+ ### Demonstration: the tree-inverse as the only long-range operator (maze on trees)
401
+
402
+ `gabp_sparse_inv/demos/maze_tree.py` is the clean-room experiment behind the headline: a
403
+ source-routing task on trees where a single differentiable `tree_solve` layer is the *only*
404
+ operator that can move information across the graph. A model with that layer routes the
405
+ source near-exactly (test MSE `~1e-5`); an otherwise-identical model with only `K`-hop local
406
+ message passing cannot, and the gap widens with the tree diameter. The learned precision is
407
+ kept SPD and well-conditioned (`kappa ~ 200`) by construction, handling the maze-conditioning
408
+ risk. It is the tree proxy for the loopy grid maze (Phase 4). See [docs/MAZE.md](docs/MAZE.md).
409
+
410
+ ```bash
411
+ python -m gabp_sparse_inv.demos.maze_tree # depth-sweep table: gabp vs local vs baseline
412
+ ```
413
+
414
+ ### Demonstration: the junction inverse on a loopy grid
415
+
416
+ `gabp_sparse_inv/demos/maze_grid.py` is the direct analogue: each cell is a node on a 2-D
417
+ lattice, the precision is a grid Laplacian built from learned local features, and a single
418
+ differentiable `junction_solve` layer is the only long-range operator (convolutions are
419
+ strictly local). The loopy graph needs the junction-tree kernel; a tree kernel cannot
420
+ represent cycles. Same clean-attribution story as the tree proxy; see [docs/MAZE.md](docs/MAZE.md)
421
+ (grid section).
422
+
423
+ ```bash
424
+ python -m gabp_sparse_inv.demos.maze_grid # size-sweep table: junction vs local vs baseline
425
+ ```
426
+
427
+ ### Demonstration: the non-symmetric inverse as the exact DEQ backward (fixed-point layer)
428
+
429
+ `gabp_sparse_inv/demos/deq_fixedpoint.py` is the real-impact rung. A deep-equilibrium layer
430
+ `z* = f(z*, x)` has, by the implicit function theorem, a backward that is a **non-symmetric**
431
+ solve with the equilibrium Jacobian, `(I − J)ᵀ u = ∂L/∂z*`. When `J` is graph-structured this
432
+ is exactly `nonsym_junction_solve(…, transpose=True)` on `A = I − J`: one block `LDU`,
433
+ `O(fill)`, the transpose of the same factors. The structured backward is **exact** (matches a
434
+ dense implicit-diff oracle and autograd through an unrolled solver to `~1e-15`) and stays
435
+ machine-accurate as the equilibrium stiffens (`ρ(J) → 1`), where the standard iterative
436
+ (Neumann) DEQ backward's gradient error tracks `ρᴷ`. Honest scope: low-treewidth Jacobians,
437
+ `ρ(J) < 1`, a mechanism/impact result (not a SOTA claim). See [docs/DEQ.md](docs/DEQ.md).
438
+
439
+ ```bash
440
+ python -m gabp_sparse_inv.demos.deq_fixedpoint # rho-sweep: exact backward vs iterative
441
+ ```
442
+
443
+ ### Demonstration: the DeltaNet chunk inverse as a hot-swappable op (linear attention)
444
+
445
+ `gabp_sparse_inv/demos/deltanet_chunk.py` shows the differentiable triangular chunk inverse
446
+ `selinv_tril` (`T = (I − A)⁻¹`) is a **drop-in** inside a real chunked linear-attention layer;
447
+ no new kernel. DeltaNet's within-chunk delta rule *is* the triangular solve `W = (I − A)⁻¹ delta`
448
+ (`A = −tril(diag(β) K Kᵀ, −1)`); the minimal layer built around it reproduces the token-by-token
449
+ delta rule **exactly** (validated vs an `O(L)` sequential oracle at every chunk size). Forming
450
+ `T` with `selinv_tril` (analytic self-adjoint backward) vs the stock `solve_triangular` baseline
451
+ (autograd) gives the same forward and **the same gradients** through the whole multi-chunk layer
452
+ (`~3e-15`), and a layer trains identically either way. A capability / drop-in result, not a
453
+ DeltaNet reimplementation or a SOTA claim. See [docs/DELTANET.md](docs/DELTANET.md).
454
+
455
+ ```bash
456
+ python -m gabp_sparse_inv.demos.deltanet_chunk # drop-in equivalence + train-both-ways table
457
+ ```
458
+
459
+ ## Benchmark
460
+
461
+ ```bash
462
+ python -m gabp_sparse_inv.bench.run --problem chain --sweep L --b 8 --precisions fp64 fp32 bf16
463
+ python -m gabp_sparse_inv.bench.run --problem star --sweep K --b 8 --precisions fp64 fp32
464
+ python -m gabp_sparse_inv.bench.run --problem tree --sweep n --b 8 --tree-kind random
465
+ python -m gabp_sparse_inv.bench.run --problem tree --sweep n --b 8 --grad # fwd+bwd
466
+ ```
467
+
468
+ `--grad` benchmarks the differentiable kernel: forward+backward time scaling vs `n`
469
+ (linear), loop-vs-batched timing, gradient correctness vs dense autograd (machine
470
+ precision), and the structured-vs-dense-autograd backward-memory ratio (`O(n b^2)` vs
471
+ `O((n b)^2)`).
472
+
473
+ The benchmark writes CSV and JSON records keyed by `(seed, config)`. It reports
474
+ log-log timing slope, dense crossover, forward error, independent residuals, and
475
+ analytic structured-vs-dense memory **as diagnostics** - these depend on the BLAS
476
+ backend, device, and thread count, so they are recorded and reported rather than
477
+ asserted. Measured CPU peak RSS is secondary and noisy.
478
+
479
+ ### Precision study
480
+
481
+ ```bash
482
+ python -m gabp_sparse_inv.bench.precision --problem tree --size 64 --b 4
483
+ python -m gabp_sparse_inv.bench.precision --problem chain --precisions fp32 bf16
484
+ python -m gabp_sparse_inv.bench.precision --compare-orders --size 64 --b 4 # ordering study
485
+ ```
486
+
487
+ Puts the kernel head-to-head with a dense inverse at the **same** precision, both scored
488
+ against an fp64 oracle on the pattern, swept over condition number. Reports each method's
489
+ on-pattern error and the selinv/dense advantage ratio; a low-precision factorization
490
+ breakdown is recorded as `inf` rather than aborting the sweep. The honest bottom line
491
+ (diagnostics, not asserted): **no penalty, and no robust win**. The apparent fp32 edge on
492
+ ill-scaled random trees is modest (~1.5-2.3× median, heavy-tailed) and *mostly an
493
+ elimination-ordering effect* (`--compare-orders` shows a same-order dense Cholesky matches the
494
+ kernel to ~1× in the median); the well-scaled grid Laplacian shows parity at every κ. Precision
495
+ is **not** the differentiator; `O(n)` cost, `O(fill)` memory, and differentiability are.
496
+
497
+ ### No-pivot stability boundary (non-symmetric)
498
+
499
+ ```bash
500
+ python -m gabp_sparse_inv.bench.nonsym_stability # sweep block-diagonal dominance
501
+ ```
502
+
503
+ The non-symmetric kernel eliminates with **no pivoting**, so the static-pattern factorization is
504
+ only safe while block-diagonally dominant (`docs/derivations.md` §10.4). This sweeps the dominance
505
+ ratio and reports the Schur-pivot floor and the fp32 no-pivot error against a dense fp64 oracle,
506
+ alongside the fp32 **pivoted** dense LU on the same blocks. Diagnostic finding: at parity with
507
+ pivoted LU while dominant (`α ≳ 1`), the Schur floor collapsing and the no-pivot error departing
508
+ from `κ·u` below the dominance boundary, quantifying where the static-pattern regime ends.
509
+
510
+ ## Tests
511
+
512
+ ```bash
513
+ pytest -q
514
+ ```
515
+
516
+ The test suite covers every kernel against dense fp64 oracles: chain, star, tree, the
517
+ differentiable tree (`selinv_tree`), junction tree, the non-symmetric bidiagonal / DeltaNet
518
+ chunk / non-symmetric tree, the tree- and grid-GMRF applications, Gaussian sampling, and the
519
+ elimination-ordering helpers. Per kernel: dense-oracle accuracy, condition-aware
520
+ ill-conditioned cases (where implemented), independent residual checks (SPD kernels),
521
+ SPD/symmetry properties, edge cases, leading batch dimensions, `compute_dtype` (strongest on
522
+ junction; bf16 sanity elsewhere), first-order autograd (`gradcheck` / analytic-vs-dense
523
+ adjoint), and, where applicable, second-order autograd (`gradgradcheck` on junction,
524
+ `selected_inverse_tree(batched=True)`, and `selected_inverse_nonsym_tree` only; **not** on
525
+ `selinv_tree`, `selinv_bidiag`, or `selinv_tril`). Also: order-invariance, fill, trace
526
+ identities (junction), and HVP checks (`tests/test_double_backward.py`). CI runs on Ubuntu,
527
+ Windows, and macOS with Python 3.12 and 3.13.
528
+
529
+ ## Citation
530
+
531
+ If you use `gabp-sparse-inv` in your research, please cite it. Machine-readable metadata is in
532
+ [CITATION.cff](CITATION.cff) (GitHub's "Cite this repository" reads it), and a software paper is
533
+ in preparation for the Journal of Open Source Software
534
+ ([paper/joss/paper.md](paper/joss/paper.md)).
535
+
536
+ ## Contributing and support
537
+
538
+ Contributions, bug reports, and usage questions are welcome. [CONTRIBUTING.md](CONTRIBUTING.md)
539
+ covers how to contribute, report issues, and get support; [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md)
540
+ sets the community standards. The package scope and deliberate exclusions are summarized above
541
+ and in the package docstring.
542
+
543
+ ## License
544
+
545
+ MIT. See [LICENSE](LICENSE).