freealg 0.1.13__tar.gz → 0.1.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {freealg-0.1.13 → freealg-0.1.14}/PKG-INFO +17 -7
  2. {freealg-0.1.13 → freealg-0.1.14}/README.rst +16 -6
  3. {freealg-0.1.13 → freealg-0.1.14}/freealg/__init__.py +2 -1
  4. freealg-0.1.14/freealg/__version__.py +1 -0
  5. {freealg-0.1.13 → freealg-0.1.14}/freealg/_chebyshev.py +4 -5
  6. {freealg-0.1.13 → freealg-0.1.14}/freealg/_decompress.py +89 -21
  7. {freealg-0.1.13 → freealg-0.1.14}/freealg/_pade.py +23 -13
  8. {freealg-0.1.13 → freealg-0.1.14}/freealg/_plot_util.py +6 -3
  9. freealg-0.1.14/freealg/_support.py +186 -0
  10. {freealg-0.1.13 → freealg-0.1.14}/freealg/distributions/_kesten_mckay.py +13 -5
  11. {freealg-0.1.13 → freealg-0.1.14}/freealg/distributions/_marchenko_pastur.py +10 -2
  12. {freealg-0.1.13 → freealg-0.1.14}/freealg/distributions/_meixner.py +10 -2
  13. {freealg-0.1.13 → freealg-0.1.14}/freealg/distributions/_wachter.py +10 -2
  14. {freealg-0.1.13 → freealg-0.1.14}/freealg/distributions/_wigner.py +10 -2
  15. freealg-0.1.14/freealg/eigfree.py +120 -0
  16. {freealg-0.1.13 → freealg-0.1.14}/freealg/freeform.py +80 -55
  17. {freealg-0.1.13 → freealg-0.1.14}/freealg.egg-info/PKG-INFO +17 -7
  18. {freealg-0.1.13 → freealg-0.1.14}/freealg.egg-info/SOURCES.txt +1 -0
  19. freealg-0.1.13/freealg/__version__.py +0 -1
  20. freealg-0.1.13/freealg/_support.py +0 -85
  21. {freealg-0.1.13 → freealg-0.1.14}/AUTHORS.txt +0 -0
  22. {freealg-0.1.13 → freealg-0.1.14}/CHANGELOG.rst +0 -0
  23. {freealg-0.1.13 → freealg-0.1.14}/LICENSE.txt +0 -0
  24. {freealg-0.1.13 → freealg-0.1.14}/MANIFEST.in +0 -0
  25. {freealg-0.1.13 → freealg-0.1.14}/freealg/_damp.py +0 -0
  26. {freealg-0.1.13 → freealg-0.1.14}/freealg/_jacobi.py +0 -0
  27. {freealg-0.1.13 → freealg-0.1.14}/freealg/_sample.py +0 -0
  28. {freealg-0.1.13 → freealg-0.1.14}/freealg/_util.py +0 -0
  29. {freealg-0.1.13 → freealg-0.1.14}/freealg/distributions/__init__.py +0 -0
  30. {freealg-0.1.13 → freealg-0.1.14}/freealg.egg-info/dependency_links.txt +0 -0
  31. {freealg-0.1.13 → freealg-0.1.14}/freealg.egg-info/not-zip-safe +0 -0
  32. {freealg-0.1.13 → freealg-0.1.14}/freealg.egg-info/requires.txt +0 -0
  33. {freealg-0.1.13 → freealg-0.1.14}/freealg.egg-info/top_level.txt +0 -0
  34. {freealg-0.1.13 → freealg-0.1.14}/pyproject.toml +0 -0
  35. {freealg-0.1.13 → freealg-0.1.14}/requirements.txt +0 -0
  36. {freealg-0.1.13 → freealg-0.1.14}/setup.cfg +0 -0
  37. {freealg-0.1.13 → freealg-0.1.14}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: freealg
3
- Version: 0.1.13
3
+ Version: 0.1.14
4
4
  Summary: Free probability for large matrices
5
5
  Keywords: leaderboard bot chat
6
6
  Platform: Linux
@@ -70,6 +70,12 @@ Dynamic: summary
70
70
  :width: 240
71
71
  :class: custom-dark
72
72
 
73
+ `Paper <https://arxiv.org/abs/2506.11994>`__ |
74
+ `Slides <https://www.dropbox.com/scl/fi/03gjuyz17k9yhsqy0isoz/free_decomporession_slides.pdf?rlkey=8f82mhciyl2ju02l7hv1md5li&st=26xmhjga&dl=0>`__ |
75
+ `Docs <https://ameli.github.io/freealg>`__
76
+
77
+ .. `Slides <https://ameli.github.io/freealg/_static/data/slides.pdf>`__ |
78
+
73
79
  *freealg* is a Python package that employs **free** probability to evaluate the spectral
74
80
  densities of large matrix **form**\ s. The fundamental algorithm employed by *freealg* is
75
81
  **free decompression**, which extrapolates from the empirical spectral densities of small
@@ -140,15 +146,19 @@ requests and bug reports.
140
146
  How to Cite
141
147
  ===========
142
148
 
143
- If you use this work, please cite the `arXiv paper <https://arxiv.org/abs/2506.11994>`.
149
+ If you use this work, please cite the `arXiv paper <https://arxiv.org/abs/2506.11994>`__.
144
150
 
145
151
  .. code::
146
152
 
147
- @article{ameli2025spectral,
148
- title={Spectral Estimation with Free Decompression},
149
- author={Siavash Ameli and Chris van der Heide and Liam Hodgkinson and Michael W. Mahoney},
150
- journal={arXiv preprint arXiv:2506.11994},
151
- year={2025}
153
+ @article{spectral2025,
154
+ title={Spectral Estimation with Free Decompression},
155
+ author={Siavash Ameli and Chris van der Heide and Liam Hodgkinson and Michael W. Mahoney},
156
+ year={2025},
157
+ eprint={2506.11994},
158
+ archivePrefix={arXiv},
159
+ primaryClass={stat.ML},
160
+ url={https://arxiv.org/abs/2506.11994},
161
+ journal={arXiv preprint arXiv:2506.11994},
152
162
  }
153
163
 
154
164
 
@@ -3,6 +3,12 @@
3
3
  :width: 240
4
4
  :class: custom-dark
5
5
 
6
+ `Paper <https://arxiv.org/abs/2506.11994>`__ |
7
+ `Slides <https://www.dropbox.com/scl/fi/03gjuyz17k9yhsqy0isoz/free_decomporession_slides.pdf?rlkey=8f82mhciyl2ju02l7hv1md5li&st=26xmhjga&dl=0>`__ |
8
+ `Docs <https://ameli.github.io/freealg>`__
9
+
10
+ .. `Slides <https://ameli.github.io/freealg/_static/data/slides.pdf>`__ |
11
+
6
12
  *freealg* is a Python package that employs **free** probability to evaluate the spectral
7
13
  densities of large matrix **form**\ s. The fundamental algorithm employed by *freealg* is
8
14
  **free decompression**, which extrapolates from the empirical spectral densities of small
@@ -73,15 +79,19 @@ requests and bug reports.
73
79
  How to Cite
74
80
  ===========
75
81
 
76
- If you use this work, please cite the `arXiv paper <https://arxiv.org/abs/2506.11994>`.
82
+ If you use this work, please cite the `arXiv paper <https://arxiv.org/abs/2506.11994>`__.
77
83
 
78
84
  .. code::
79
85
 
80
- @article{ameli2025spectral,
81
- title={Spectral Estimation with Free Decompression},
82
- author={Siavash Ameli and Chris van der Heide and Liam Hodgkinson and Michael W. Mahoney},
83
- journal={arXiv preprint arXiv:2506.11994},
84
- year={2025}
86
+ @article{spectral2025,
87
+ title={Spectral Estimation with Free Decompression},
88
+ author={Siavash Ameli and Chris van der Heide and Liam Hodgkinson and Michael W. Mahoney},
89
+ year={2025},
90
+ eprint={2506.11994},
91
+ archivePrefix={arXiv},
92
+ primaryClass={stat.ML},
93
+ url={https://arxiv.org/abs/2506.11994},
94
+ journal={arXiv preprint arXiv:2506.11994},
85
95
  }
86
96
 
87
97
 
@@ -6,7 +6,8 @@
6
6
  # under the terms of the license found in the LICENSE.txt file in the root
7
7
  # directory of this source tree.
8
8
 
9
- from .freeform import FreeForm, eigfree
9
+ from .freeform import FreeForm
10
+ from .eigfree import eigfree
10
11
  from . import distributions
11
12
 
12
13
  __all__ = ['FreeForm', 'distributions', 'eigfree']
@@ -0,0 +1 @@
1
+ __version__ = "0.1.14"
@@ -58,7 +58,6 @@ def chebyshev_sample_proj(eig, support, K=10, reg=0.0):
58
58
 
59
59
  # Map to [–1,1] interval
60
60
  t = (2 * eig - (lam_m + lam_p)) / (lam_p - lam_m)
61
- N = eig.size
62
61
 
63
62
  # Inner‐product norm of each U_k under w(t) = sqrt{1–t^2} is \\pi/2
64
63
  norm = numpy.pi / 2
@@ -104,7 +103,7 @@ def chebyshev_kernel_proj(xs, pdf, support, K=10, reg=0.0):
104
103
 
105
104
  for k in range(K + 1):
106
105
  Pk = eval_chebyu(k, t) # U_k(t) on the grid
107
- moment = numpy.trapezoid(Pk * pdf, xs) # \int U_k(t) \rho(x) dx
106
+ moment = numpy.trapezoid(Pk * pdf, xs) # \int U_k(t) \rho(x) dx
108
107
 
109
108
  if k == 0:
110
109
  penalty = 0
@@ -226,12 +225,12 @@ def chebyshev_stieltjes(z, psi, support):
226
225
  S = wynn_pade(psi_zero, J)
227
226
 
228
227
  # build powers J^(k+1) for k=0..K
229
- #K = len(psi) - 1
228
+ # K = len(psi) - 1
230
229
  # shape: (..., K+1)
231
- #Jpow = J[..., None] ** numpy.arange(1, K+2)
230
+ # Jpow = J[..., None] ** numpy.arange(1, K+2)
232
231
 
233
232
  # sum psi_k * J^(k+1)
234
- #S = numpy.sum(psi * Jpow, axis=-1)
233
+ # S = numpy.sum(psi * Jpow, axis=-1)
235
234
 
236
235
  # assemble m(z)
237
236
  m_z = -2 / span * numpy.pi * S
@@ -11,12 +11,18 @@
11
11
  # =======
12
12
 
13
13
  import numpy
14
- # from scipy.integrate import solve_ivp
15
14
 
16
15
  __all__ = ['decompress', 'reverse_characteristics']
17
16
 
17
+
18
+ # =============
19
+ # secant method
20
+ # =============
21
+
22
+
18
23
  def secant_complex(f, z0, z1, a=0+0j, tol=1e-12, max_iter=100,
19
- alpha=0.5, max_bt=12, eps=1e-30, verbose=False):
24
+ alpha=0.5, max_bt=2, eps=1e-30, step_factor=5.0,
25
+ post_smooth=True, jump_tol=10.0, verbose=False):
20
26
  """
21
27
  Solves :math:``f(z) = a`` for many starting points simultaneously
22
28
  using the secant method in the complex plane.
@@ -42,11 +48,20 @@ def secant_complex(f, z0, z1, a=0+0j, tol=1e-12, max_iter=100,
42
48
  Back‑tracking shrink factor (``0 < alpha < 1``). Defaults to ``0.5``.
43
49
 
44
50
  max_bt : int, optional
45
- Maximum back‑tracking trials per iteration. Defaults to ``12``.
51
+ Maximum back‑tracking trials per iteration. Defaults to ``0``.
46
52
 
47
53
  eps : float, optional
48
54
  Safeguard added to tiny denominators. Defaults to ``1e-30``.
49
55
 
56
+ post_smooth : bool, optional
57
+ If True (default) run a single vectorised clean-up pass that
58
+ re-solves points whose final root differs from the *nearest*
59
+ neighbour by more than ``jump_tol`` times the local median jump.
60
+
61
+ jump_tol : float, optional
62
+ Sensitivity of the clean-up pass; larger tolerance implies fewer
63
+ re-solves.
64
+
50
65
  verbose : bool, optional
51
66
  If *True*, prints progress every 10 iterations.
52
67
 
@@ -69,8 +84,8 @@ def secant_complex(f, z0, z1, a=0+0j, tol=1e-12, max_iter=100,
69
84
  orig_shape = z0.shape
70
85
  z0, z1, a = (x.ravel() for x in (z0, z1, a))
71
86
 
72
- n_points = z0.size
73
- roots = z1.copy()
87
+ n_points = z0.size
88
+ roots = z1.copy()
74
89
  iterations = numpy.zeros(n_points, dtype=int)
75
90
 
76
91
  f0 = f(z0) - a
@@ -87,9 +102,16 @@ def secant_complex(f, z0, z1, a=0+0j, tol=1e-12, max_iter=100,
87
102
  # Secant step
88
103
  denom = f1 - f0
89
104
  denom = numpy.where(numpy.abs(denom) < eps, denom + eps, denom)
90
- dz = (z1 - z0) * f1 / denom
91
- z2 = z1 - dz
92
- f2 = f(z2) - a
105
+ dz = (z1 - z0) * f1 / denom
106
+
107
+ # Step-size limiter
108
+ prev_step = numpy.maximum(numpy.abs(z1 - z0), eps)
109
+ max_step = step_factor * prev_step
110
+ big = numpy.abs(dz) > max_step
111
+ dz[big] *= max_step[big] / numpy.abs(dz[big])
112
+
113
+ z2 = z1 - dz
114
+ f2 = f(z2) - a
93
115
 
94
116
  # Line search by backtracking
95
117
  worse = (numpy.abs(f2) >= numpy.abs(f1)) & active
@@ -130,18 +152,57 @@ def secant_complex(f, z0, z1, a=0+0j, tol=1e-12, max_iter=100,
130
152
  residuals[remaining] = numpy.abs(f1[remaining])
131
153
  iterations[remaining] = max_iter
132
154
 
155
+ # Optional clean-up pass
156
+ if post_smooth and n_points > 2:
157
+ # absolute jump to *nearest* neighbour (left or right)
158
+ diff_left = numpy.empty_like(roots)
159
+ diff_right = numpy.empty_like(roots)
160
+ diff_left[1:] = numpy.abs(roots[1:] - roots[:-1])
161
+ diff_right[:-1] = numpy.abs(roots[:-1] - roots[1:])
162
+ jump = numpy.minimum(diff_left, diff_right)
163
+
164
+ # ignore unconverged points
165
+ median_jump = numpy.median(jump[~remaining])
166
+ bad = (jump > jump_tol * median_jump) & ~remaining
167
+
168
+ if bad.any():
169
+ z_first_all = numpy.where(bad & (diff_left <= diff_right),
170
+ roots - diff_left,
171
+ roots + diff_right)
172
+
173
+ # keep only the offending indices
174
+ z_first = z_first_all[bad]
175
+ z_second = z_first + (roots[bad] - z_first) * 1e-2
176
+
177
+ # re-solve just the outliers in one vector call
178
+ new_root, new_res, new_iter = secant_complex(
179
+ f, z_first, z_second, a[bad],
180
+ tol=tol, max_iter=max_iter,
181
+ alpha=alpha, max_bt=max_bt,
182
+ eps=eps, step_factor=step_factor,
183
+ post_smooth=False, # avoid recursion
184
+ )
185
+ roots[bad] = new_root
186
+ residuals[bad] = new_res
187
+ iterations[bad] = iterations[bad] + new_iter
188
+
189
+ if verbose:
190
+ print(f"Clean-up: re-solved {bad.sum()} outliers")
191
+
133
192
  return (
134
193
  roots.reshape(orig_shape),
135
194
  residuals.reshape(orig_shape),
136
195
  iterations.reshape(orig_shape),
137
196
  )
138
197
 
198
+
139
199
  # ==========
140
200
  # decompress
141
201
  # ==========
142
202
 
143
- def decompress(freeform, size, x=None, delta=1e-6, max_iter=500,
144
- tolerance=1e-12):
203
+
204
+ def decompress(freeform, size, x=None, delta=1e-4, max_iter=500,
205
+ tolerance=1e-8):
145
206
  """
146
207
  Free decompression of spectral density.
147
208
 
@@ -201,34 +262,35 @@ def decompress(freeform, size, x=None, delta=1e-6, max_iter=500,
201
262
  alpha = size / freeform.n
202
263
  m = freeform._eval_stieltjes
203
264
  # Lower and upper bound on new support
204
- hilb_lb = (1 / m(freeform.lam_m + delta * 1j)[1]).real
205
- hilb_ub = (1 / m(freeform.lam_p + delta * 1j)[1]).real
265
+ hilb_lb = (1 / m(freeform.lam_m + delta * 1j)).real
266
+ hilb_ub = (1 / m(freeform.lam_p + delta * 1j)).real
206
267
  lb = freeform.lam_m - (alpha - 1) * hilb_lb
207
268
  ub = freeform.lam_p - (alpha - 1) * hilb_ub
208
269
 
209
270
  # Create x if not given
210
- if x is None:
271
+ on_grid = (x is None)
272
+ if on_grid:
211
273
  radius = 0.5 * (ub - lb)
212
274
  center = 0.5 * (ub + lb)
213
275
  scale = 1.25
214
276
  x_min = numpy.floor(center - radius * scale)
215
277
  x_max = numpy.ceil(center + radius * scale)
216
278
  x = numpy.linspace(x_min, x_max, 500)
279
+ else:
280
+ x = numpy.asarray(x)
217
281
 
218
- # Ensure that input is an array
219
- x = numpy.asarray(x)
220
282
  target = x + delta * 1j
221
283
  if numpy.isclose(alpha, 1.0):
222
284
  return freeform.density(x), x, freeform.support
223
285
 
224
286
  # Characteristic curve map
225
287
  def _char_z(z):
226
- return z + (1 / m(z)[1]) * (1 - alpha)
288
+ return z + (1 / m(z)) * (1 - alpha)
227
289
 
228
- z0 = numpy.full(target.shape, numpy.mean(freeform.support) + delta*1j,
290
+ z0 = numpy.full(target.shape, numpy.mean(freeform.support) + .1j,
229
291
  dtype=numpy.complex128)
230
- z1 = z0 - numpy.log(alpha) * 1j
231
-
292
+ z1 = z0 - .2j
293
+
232
294
  roots, _, _ = secant_complex(
233
295
  _char_z, z0, z1,
234
296
  a=target,
@@ -238,9 +300,15 @@ def decompress(freeform, size, x=None, delta=1e-6, max_iter=500,
238
300
 
239
301
  # Plemelj's formula
240
302
  z = roots
241
- char_s = m(z)[1] / alpha
303
+ char_s = m(z) / alpha
242
304
  rho = numpy.maximum(0, char_s.imag / numpy.pi)
243
305
  rho[numpy.isnan(rho) | numpy.isinf(rho)] = 0
306
+ if on_grid:
307
+ x, rho = x.ravel(), rho.ravel()
308
+ # dx = x[1] - x[0]
309
+ # left_idx, right_idx = support_from_density(dx, rho)
310
+ # x, rho = x[left_idx-1:right_idx+1], rho[left_idx-1:right_idx+1]
311
+ rho = rho / numpy.trapezoid(rho, x)
244
312
 
245
313
  return rho.reshape(*x.shape), x, (lb, ub)
246
314
 
@@ -260,7 +328,7 @@ def reverse_characteristics(freeform, z_inits, T, iterations=500,
260
328
  m = freeform._eval_stieltjes
261
329
 
262
330
  def _char_z(z, t):
263
- return z + (1 / m(z)[1]) * (1 - numpy.exp(t))
331
+ return z + (1 / m(z)) * (1 - numpy.exp(t))
264
332
 
265
333
  target_z, target_t = numpy.meshgrid(z_inits, t_eval)
266
334
 
@@ -236,9 +236,10 @@ def _eval_rational(z, c, D, poles, resid):
236
236
 
237
237
  return c + D * z + term
238
238
 
239
- # ========
240
- # Wynn epsilon algorithm for Pade
241
- # ========
239
+
240
+ # =========
241
+ # Wynn pade
242
+ # =========
242
243
 
243
244
  @numba.jit(nopython=True, parallel=True)
244
245
  def wynn_pade(coeffs, x):
@@ -248,48 +249,57 @@ def wynn_pade(coeffs, x):
248
249
  returns a function handle that computes the Pade approximant at any x
249
250
  using Wynn's epsilon algorithm.
250
251
 
251
- Parameters:
252
- coeffs (list or array): Coefficients [a0, a1, a2, ...] of the power series.
252
+ Parameters
253
+ ----------
254
+
255
+ coeffs (list or array):
256
+ Coefficients [a0, a1, a2, ...] of the power series.
253
257
 
254
- Returns:
255
- function: A function approximant(x) that returns the approximated value f(x).
258
+ Returns
259
+ -------
260
+
261
+ function:
262
+ A function approximant(x) that returns the approximated value f(x).
256
263
  """
264
+
257
265
  # Number of coefficients
258
266
  xn = x.ravel()
259
267
  d = len(xn)
260
268
  N = len(coeffs)
261
-
269
+
262
270
  # Compute the partial sums s_n = sum_{i=0}^n a_i * x^i for n=0,...,N-1
263
271
  eps = numpy.zeros((N+1, N, d), dtype=numpy.complex128)
264
272
  for i in numba.prange(d):
265
273
  partial_sum = 0.0
266
274
  for n in range(N):
267
275
  partial_sum += coeffs[n] * (xn[i] ** n)
268
- eps[0,n,i] = partial_sum
276
+ eps[0, n, i] = partial_sum
269
277
 
270
278
  for i in numba.prange(d):
271
279
  for k in range(1, N+1):
272
280
  for j in range(N - k):
273
- delta = eps[k-1, j+1,i] - eps[k-1, j,i]
281
+ delta = eps[k-1, j+1, i] - eps[k-1, j, i]
274
282
  if delta == 0:
275
283
  rec_delta = numpy.inf
276
284
  elif numpy.isinf(delta) or numpy.isnan(delta):
277
285
  rec_delta = 0.0
278
286
  else:
279
287
  rec_delta = 1.0 / delta
280
- eps[k,j,i] = rec_delta
288
+ eps[k, j, i] = rec_delta
281
289
  if k > 1:
282
- eps[k,j,i] += eps[k-2,j+1,i]
290
+ eps[k, j, i] += eps[k-2, j+1, i]
283
291
 
284
292
  if (N % 2) == 0:
285
293
  N -= 1
286
-
294
+
287
295
  return eps[N-1, 0, :].reshape(x.shape)
288
296
 
297
+
289
298
  # ========
290
299
  # fit pade
291
300
  # ========
292
301
 
302
+
293
303
  def fit_pade(x, f, lam_m, lam_p, p=1, q=2, odd_side='left', pade_reg=0.0,
294
304
  safety=1.0, max_outer=40, xtol=1e-12, ftol=1e-12, optimizer='ls',
295
305
  verbose=0):
@@ -139,7 +139,7 @@ def _auto_bins(array, method='scott', factor=5):
139
139
  # ============
140
140
 
141
141
  def plot_density(x, rho, eig=None, support=None, label='',
142
- title='Spectral density', latex=False, save=False):
142
+ title='Spectral Density', latex=False, save=False):
143
143
  """
144
144
  """
145
145
 
@@ -147,8 +147,11 @@ def plot_density(x, rho, eig=None, support=None, label='',
147
147
 
148
148
  fig, ax = plt.subplots(figsize=(6, 2.7))
149
149
 
150
- if (support is not None) and (eig is not None):
151
- lam_m, lam_p = support
150
+ if eig is not None:
151
+ if support is not None:
152
+ lam_m, lam_p = support
153
+ else:
154
+ lam_m, lam_p = min(eig), max(eig)
152
155
  bins = numpy.linspace(lam_m, lam_p, _auto_bins(eig))
153
156
  _ = ax.hist(eig, bins, density=True, color='silver',
154
157
  edgecolor='none', label='Histogram')
@@ -0,0 +1,186 @@
1
+ # SPDX-License-Identifier: BSD-3-Clause
2
+ # SPDX-FileType: SOURCE
3
+ #
4
+ # This program is free software: you can redistribute it and/or modify it under
5
+ # the terms of the license found in the LICENSE.txt file in the root directory
6
+ # of this source tree.
7
+
8
+
9
+ # =======
10
+ # Imports
11
+ # =======
12
+
13
+ import numpy
14
+ import numba
15
+ from scipy.stats import gaussian_kde
16
+
17
+
18
+ @numba.njit(numba.types.UniTuple(numba.types.int64, 2)(
19
+ numba.types.float64,
20
+ numba.types.float64[::1]
21
+ ))
22
+ def support_from_density(dx, density):
23
+ """
24
+ Estimates the support from a collection of noisy observations of a
25
+ density over a grid of x-values with mesh spacing dx.
26
+ """
27
+ n = density.shape[0]
28
+ target = 1.0 / dx
29
+
30
+ # 1) compute total_sum once
31
+ total_sum = 0.0
32
+ for t in range(n):
33
+ total_sum += density[t]
34
+
35
+ # 2) set up our “best‐so‐far” trackers
36
+ large = 1e300
37
+ best_nonneg_sum = large
38
+ best_nonneg_idx = -1
39
+ best_nonpos_sum = -large
40
+ best_nonpos_idx = -1
41
+
42
+ # 3) seed with first element (i.e. prefix_sum for k=1)
43
+ prefix_sum = density[0]
44
+ if prefix_sum >= 0.0:
45
+ best_nonneg_sum, best_nonneg_idx = prefix_sum, 1
46
+ else:
47
+ best_nonpos_sum, best_nonpos_idx = prefix_sum, 1
48
+
49
+ # 4) sweep j from 2...n–1, updating prefix_sum on the fly
50
+ optimal_i, optimal_j = 1, 2
51
+ minimal_cost = large
52
+
53
+ for j in range(2, n):
54
+ # extend prefix_sum to cover density[0]...density[j-1]
55
+ prefix_sum += density[j-1]
56
+
57
+ # cost for [0...i], [i...j]
58
+ diff_mid = prefix_sum - target
59
+ if diff_mid >= 0.0 and best_nonneg_sum <= diff_mid:
60
+ cost12 = diff_mid
61
+ i_cand = best_nonneg_idx
62
+ elif diff_mid < 0.0 and best_nonpos_sum >= diff_mid:
63
+ cost12 = -diff_mid
64
+ i_cand = best_nonpos_idx
65
+ else:
66
+ cost_using_nonpos = diff_mid - 2.0 * best_nonpos_sum
67
+ cost_using_nonneg = 2.0 * best_nonneg_sum - diff_mid
68
+ if cost_using_nonpos < cost_using_nonneg:
69
+ cost12, i_cand = cost_using_nonpos, best_nonpos_idx
70
+ else:
71
+ cost12, i_cand = cost_using_nonneg, best_nonneg_idx
72
+
73
+ # cost for [j...n]
74
+ cost3 = total_sum - prefix_sum
75
+ if cost3 < 0.0:
76
+ cost3 = -cost3
77
+
78
+ # total and maybe update best split
79
+ total_cost = cost12 + cost3
80
+ if total_cost < minimal_cost:
81
+ minimal_cost = total_cost
82
+ optimal_i, optimal_j = i_cand, j
83
+
84
+ # update our prefix‐sum trackers
85
+ if prefix_sum >= 0.0:
86
+ if prefix_sum < best_nonneg_sum:
87
+ best_nonneg_sum, best_nonneg_idx = prefix_sum, j
88
+ else:
89
+ if prefix_sum > best_nonpos_sum:
90
+ best_nonpos_sum, best_nonpos_idx = prefix_sum, j
91
+
92
+ return optimal_i, optimal_j
93
+
94
+
95
+ def detect_support(eigs, method='asymp', k=None, p=0.001, **kwargs):
96
+ """
97
+ Estimates the support of the eigenvalue density.
98
+
99
+ Parameters
100
+ ----------
101
+ method : {``'range'``, ``'asymp'``, ``'jackknife'``, ``'regression'``,
102
+ ``'interior'``, ``'interior_smooth'``}, \
103
+ default= ``'asymp'``
104
+ The method of support estimation:
105
+
106
+ * ``'range'``: no estimation; the support is the range of the
107
+ eigenvalues.
108
+ * ``'asymp'``: assume the relative error in the min/max estimator is
109
+ 1/n.
110
+ * ``'jackknife'``: estimates the support using Quenouille's [1]
111
+ jackknife estimator. Fast and simple, more accurate than the
112
+ range.
113
+ * ``'regression'``: estimates the support by performing a regression
114
+ under the assumption that the edge behavior is of square-root
115
+ type. Often most accurate.
116
+ * ``'interior'``: estimates a support assuming the range overestimates;
117
+ uses quantiles (p, 1-p).
118
+ * ``'interior_smooth'``: same as ``'interior'`` but using kernel
119
+ density estimation.
120
+
121
+ k : int, default = None
122
+ Number of extreme order statistics to use for ``method='regression'``.
123
+
124
+ p : float, default=0.001
125
+ The edges of the support of the distribution is detected by the
126
+ :math:`p`-quantile on the left and :math:`(1-p)`-quantile on the right
127
+ where ``method='interior'`` or ``method='interior_smooth'``.
128
+ This value should be between 0 and 1, ideally a small number close to
129
+ zero.
130
+
131
+ References
132
+ ----------
133
+
134
+ .. [1] Quenouille, M. H. (1949, July). Approximate tests of correlation in
135
+ time-series. In Mathematical Proceedings of the Cambridge
136
+ Philosophical Society (Vol. 45, No. 3, pp. 483-484). Cambridge
137
+ University Press.
138
+ """
139
+
140
+ if method == 'range':
141
+ lam_m = eigs.min()
142
+ lam_p = eigs.max()
143
+
144
+ elif method == 'asymp':
145
+ lam_m = eigs.min() - abs(eigs.min()) / len(eigs)
146
+ lam_p = eigs.max() + abs(eigs.max()) / len(eigs)
147
+
148
+ elif method == 'jackknife':
149
+ x, n = numpy.sort(eigs), len(eigs)
150
+ lam_m = x[0] - (n - 1)/n * (x[1] - x[0])
151
+ lam_p = x[-1] + (n - 1)/n * (x[-1] - x[-2])
152
+
153
+ elif method == 'regression':
154
+ x, n = numpy.sort(eigs), len(eigs)
155
+ if k is None:
156
+ k = int(round(n ** (2/3)))
157
+ k = max(5, min(k, n // 2))
158
+
159
+ # The theoretical cdf near the edge behaves like const*(x - a)^{3/2},
160
+ # so (i/n) ~ (x - a)^{3/2} -> x ~ a + const*(i/n)^{2/3}.
161
+ y = ((numpy.arange(1, k + 1) - 0.5) / n) ** (2 / 3)
162
+
163
+ # Left edge: regress x_{(i)} on y
164
+ _, lam_m = numpy.polyfit(y, x[:k], 1)
165
+
166
+ # Right edge: regress x_{(n-i+1)} on y
167
+ _, lam_p = numpy.polyfit(y, x[-k:][::-1], 1)
168
+
169
+ elif method == 'interior':
170
+ lam_m, lam_p = numpy.quantile(eigs, [p, 1-p])
171
+
172
+ elif method == 'interior_smooth':
173
+ kde = gaussian_kde(eigs)
174
+ xs = numpy.linspace(eigs.min(), eigs.max(), 1000)
175
+ fs = kde(xs)
176
+
177
+ cdf = numpy.cumsum(fs)
178
+ cdf /= cdf[-1]
179
+
180
+ lam_m = numpy.interp(p, cdf, xs)
181
+ lam_p = numpy.interp(1-p, cdf, xs)
182
+
183
+ else:
184
+ raise NotImplementedError("Unknown method")
185
+
186
+ return lam_m, lam_p
@@ -110,7 +110,7 @@ class KestenMcKay(object):
110
110
  # density
111
111
  # =======
112
112
 
113
- def density(self, x=None, plot=False, latex=False, save=False):
113
+ def density(self, x=None, plot=False, latex=False, save=False, eig=None):
114
114
  """
115
115
  Density of distribution.
116
116
 
@@ -137,6 +137,10 @@ class KestenMcKay(object):
137
137
  assumed to the save filename (with the file extension). This option
138
138
  is relevant only if ``plot=True``.
139
139
 
140
+ eig : numpy.array, default=None
141
+ A collection of eigenvalues to compare to via histogram. This
142
+ option is relevant only if ``plot=True``.
143
+
140
144
  Returns
141
145
  -------
142
146
 
@@ -173,7 +177,11 @@ class KestenMcKay(object):
173
177
  numpy.sqrt(4.0 * (self.d - 1.0) - x[mask]**2)
174
178
 
175
179
  if plot:
176
- plot_density(x, rho, label='', latex=latex, save=save)
180
+ if eig is not None:
181
+ label = 'Estimate'
182
+ else:
183
+ label = ''
184
+ plot_density(x, rho, label=label, latex=latex, save=save, eig=eig)
177
185
 
178
186
  return rho
179
187
 
@@ -539,9 +547,9 @@ class KestenMcKay(object):
539
547
 
540
548
  return samples
541
549
 
542
- # ============
543
- # Haar unitary
544
- # ============
550
+ # ===============
551
+ # haar orthogonal
552
+ # ===============
545
553
 
546
554
  def _haar_orthogonal(self, n, k, seed=None):
547
555
  """