pragmastat 3.1.29__tar.gz → 3.1.30__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pragmastat might be problematic. Click here for more details.

Files changed (24) hide show
  1. {pragmastat-3.1.29/pragmastat.egg-info → pragmastat-3.1.30}/PKG-INFO +5 -5
  2. {pragmastat-3.1.29 → pragmastat-3.1.30}/README.md +4 -4
  3. {pragmastat-3.1.29 → pragmastat-3.1.30}/pragmastat/estimators.py +3 -2
  4. pragmastat-3.1.30/pragmastat/fast_shift.py +243 -0
  5. {pragmastat-3.1.29 → pragmastat-3.1.30/pragmastat.egg-info}/PKG-INFO +5 -5
  6. {pragmastat-3.1.29 → pragmastat-3.1.30}/pragmastat.egg-info/SOURCES.txt +3 -0
  7. {pragmastat-3.1.29 → pragmastat-3.1.30}/pyproject.toml +1 -1
  8. {pragmastat-3.1.29 → pragmastat-3.1.30}/setup.py +6 -0
  9. pragmastat-3.1.30/src/fast_shift_c.c +354 -0
  10. {pragmastat-3.1.29 → pragmastat-3.1.30}/tests/test_performance.py +46 -12
  11. {pragmastat-3.1.29 → pragmastat-3.1.30}/LICENSE +0 -0
  12. {pragmastat-3.1.29 → pragmastat-3.1.30}/MANIFEST.in +0 -0
  13. {pragmastat-3.1.29 → pragmastat-3.1.30}/examples/demo.py +0 -0
  14. {pragmastat-3.1.29 → pragmastat-3.1.30}/pragmastat/__init__.py +0 -0
  15. {pragmastat-3.1.29 → pragmastat-3.1.30}/pragmastat/fast_center.py +0 -0
  16. {pragmastat-3.1.29 → pragmastat-3.1.30}/pragmastat/fast_spread.py +0 -0
  17. {pragmastat-3.1.29 → pragmastat-3.1.30}/pragmastat.egg-info/dependency_links.txt +0 -0
  18. {pragmastat-3.1.29 → pragmastat-3.1.30}/pragmastat.egg-info/requires.txt +0 -0
  19. {pragmastat-3.1.29 → pragmastat-3.1.30}/pragmastat.egg-info/top_level.txt +0 -0
  20. {pragmastat-3.1.29 → pragmastat-3.1.30}/setup.cfg +0 -0
  21. {pragmastat-3.1.29 → pragmastat-3.1.30}/src/fast_center_c.c +0 -0
  22. {pragmastat-3.1.29 → pragmastat-3.1.30}/src/fast_spread_c.c +0 -0
  23. {pragmastat-3.1.29 → pragmastat-3.1.30}/tests/test_invariance.py +0 -0
  24. {pragmastat-3.1.29 → pragmastat-3.1.30}/tests/test_reference.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pragmastat
3
- Version: 3.1.29
3
+ Version: 3.1.30
4
4
  Summary: Pragmastat: Pragmatic Statistical Toolkit
5
5
  Author: Andrey Akinshin
6
6
  License-Expression: MIT
@@ -18,9 +18,9 @@ Dynamic: license-file
18
18
 
19
19
  This is a Python implementation of 'Pragmastat: Pragmatic Statistical Toolkit', which presents a toolkit of statistical procedures that provide reliable results across diverse real-world distributions, with ready-to-use implementations and detailed explanations.
20
20
 
21
- - PDF manual for this version: [pragmastat-v3.1.29.pdf](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.29/pragmastat-v3.1.29.pdf)
22
- - Markdown manual for this version: [pragmastat-v3.1.29.md](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.29/pragmastat-v3.1.29.md)
23
- - Source code for this version: [pragmastat/py/v3.1.29](https://github.com/AndreyAkinshin/pragmastat/tree/v3.1.29/py)
21
+ - PDF manual for this version: [pragmastat-v3.1.30.pdf](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.30/pragmastat-v3.1.30.pdf)
22
+ - Markdown manual for this version: [pragmastat-v3.1.30.md](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.30/pragmastat-v3.1.30.md)
23
+ - Source code for this version: [pragmastat/py/v3.1.30](https://github.com/AndreyAkinshin/pragmastat/tree/v3.1.30/py)
24
24
  - Latest online manual: https://pragmastat.dev
25
25
  - Manual DOI: [10.5281/zenodo.17236778](https://doi.org/10.5281/zenodo.17236778)
26
26
 
@@ -29,7 +29,7 @@ This is a Python implementation of 'Pragmastat: Pragmatic Statistical Toolkit',
29
29
  Install from PyPI:
30
30
 
31
31
  ```bash
32
- pip install pragmastat==3.1.29
32
+ pip install pragmastat==3.1.30
33
33
  ```
34
34
 
35
35
  ## Demo
@@ -2,9 +2,9 @@
2
2
 
3
3
  This is a Python implementation of 'Pragmastat: Pragmatic Statistical Toolkit', which presents a toolkit of statistical procedures that provide reliable results across diverse real-world distributions, with ready-to-use implementations and detailed explanations.
4
4
 
5
- - PDF manual for this version: [pragmastat-v3.1.29.pdf](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.29/pragmastat-v3.1.29.pdf)
6
- - Markdown manual for this version: [pragmastat-v3.1.29.md](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.29/pragmastat-v3.1.29.md)
7
- - Source code for this version: [pragmastat/py/v3.1.29](https://github.com/AndreyAkinshin/pragmastat/tree/v3.1.29/py)
5
+ - PDF manual for this version: [pragmastat-v3.1.30.pdf](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.30/pragmastat-v3.1.30.pdf)
6
+ - Markdown manual for this version: [pragmastat-v3.1.30.md](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.30/pragmastat-v3.1.30.md)
7
+ - Source code for this version: [pragmastat/py/v3.1.30](https://github.com/AndreyAkinshin/pragmastat/tree/v3.1.30/py)
8
8
  - Latest online manual: https://pragmastat.dev
9
9
  - Manual DOI: [10.5281/zenodo.17236778](https://doi.org/10.5281/zenodo.17236778)
10
10
 
@@ -13,7 +13,7 @@ This is a Python implementation of 'Pragmastat: Pragmatic Statistical Toolkit',
13
13
  Install from PyPI:
14
14
 
15
15
  ```bash
16
- pip install pragmastat==3.1.29
16
+ pip install pragmastat==3.1.30
17
17
  ```
18
18
 
19
19
  ## Demo
@@ -3,6 +3,7 @@ import numpy as np
3
3
  from numpy.typing import NDArray
4
4
  from .fast_center import _fast_center
5
5
  from .fast_spread import _fast_spread
6
+ from .fast_shift import _fast_shift
6
7
 
7
8
 
8
9
  def center(x: Union[Sequence[float], NDArray]) -> float:
@@ -39,8 +40,8 @@ def shift(
39
40
  y = np.asarray(y)
40
41
  if len(x) == 0 or len(y) == 0:
41
42
  raise ValueError("Input arrays cannot be empty")
42
- pairwise_shifts = np.subtract.outer(x, y)
43
- return float(np.median(pairwise_shifts))
43
+ # Use fast O((m+n) log L) algorithm instead of materializing all m*n differences
44
+ return float(_fast_shift(x, y, p=0.5))
44
45
 
45
46
 
46
47
  def ratio(
@@ -0,0 +1,243 @@
1
+ """Fast O((m+n) log L) implementation of the Shift estimator.
2
+
3
+ Computes quantiles of all pairwise differences without materializing them.
4
+ Uses binary search in value space with two-pointer counting.
5
+ """
6
+
7
+ from typing import List, Union, Sequence
8
+ import numpy as np
9
+ from numpy.typing import NDArray
10
+
11
+ # Try to import the C implementation, fall back to pure Python if unavailable
12
+ try:
13
+ from . import _fast_shift_c
14
+
15
+ _HAS_C_EXTENSION = True
16
+ except ImportError:
17
+ _HAS_C_EXTENSION = False
18
+
19
+
20
+ def _midpoint(a: float, b: float) -> float:
21
+ """Compute numerically stable midpoint."""
22
+ return a + (b - a) * 0.5
23
+
24
+
25
+ def _count_and_neighbors(
26
+ x: List[float], y: List[float], threshold: float
27
+ ) -> tuple[int, float, float]:
28
+ """
29
+ Count pairs where x[i] - y[j] <= threshold using two-pointer algorithm.
30
+
31
+ Also tracks the closest actual differences on either side of threshold.
32
+
33
+ Args:
34
+ x: Sorted array of x values
35
+ y: Sorted array of y values
36
+ threshold: The threshold value
37
+
38
+ Returns:
39
+ Tuple of (count_less_or_equal, closest_below, closest_above)
40
+ """
41
+ m = len(x)
42
+ n = len(y)
43
+ count = 0
44
+ max_below = float("-inf")
45
+ min_above = float("inf")
46
+
47
+ j = 0
48
+ for i in range(m):
49
+ # Move j forward while x[i] - y[j] > threshold
50
+ while j < n and x[i] - y[j] > threshold:
51
+ j += 1
52
+
53
+ # All elements from y[j] to y[n-1] satisfy x[i] - y[j] <= threshold
54
+ count += n - j
55
+
56
+ # Track boundary values
57
+ if j < n:
58
+ diff = x[i] - y[j]
59
+ max_below = max(max_below, diff)
60
+
61
+ if j > 0:
62
+ diff = x[i] - y[j - 1]
63
+ min_above = min(min_above, diff)
64
+
65
+ # Fallback to actual min/max if no boundaries found
66
+ if max_below == float("-inf"):
67
+ max_below = x[0] - y[n - 1]
68
+ if min_above == float("inf"):
69
+ min_above = x[m - 1] - y[0]
70
+
71
+ return count, max_below, min_above
72
+
73
+
74
+ def _select_kth_pairwise_diff(x: List[float], y: List[float], k: int) -> float:
75
+ """
76
+ Select the k-th smallest pairwise difference (1-indexed).
77
+
78
+ Uses binary search in value space to avoid materializing all differences.
79
+
80
+ Args:
81
+ x: Sorted array of x values
82
+ y: Sorted array of y values
83
+ k: The rank to select (1-indexed)
84
+
85
+ Returns:
86
+ The k-th smallest pairwise difference
87
+ """
88
+ m = len(x)
89
+ n = len(y)
90
+ total = m * n
91
+
92
+ if k < 1 or k > total:
93
+ raise ValueError(f"k must be in [1, {total}], got {k}")
94
+
95
+ # Initialize search bounds
96
+ search_min = x[0] - y[n - 1]
97
+ search_max = x[m - 1] - y[0]
98
+
99
+ if np.isnan(search_min) or np.isnan(search_max):
100
+ raise ValueError("NaN in input values")
101
+
102
+ max_iterations = 128 # Sufficient for double precision convergence
103
+ prev_min = float("-inf")
104
+ prev_max = float("inf")
105
+
106
+ for _ in range(max_iterations):
107
+ if search_min == search_max:
108
+ break
109
+
110
+ mid = _midpoint(search_min, search_max)
111
+ count_le, closest_below, closest_above = _count_and_neighbors(x, y, mid)
112
+
113
+ # Check if we found the exact value
114
+ if closest_below == closest_above:
115
+ return closest_below
116
+
117
+ # No progress means we're stuck between two discrete values
118
+ if search_min == prev_min and search_max == prev_max:
119
+ return closest_below if count_le >= k else closest_above
120
+
121
+ prev_min = search_min
122
+ prev_max = search_max
123
+
124
+ # Narrow the search space
125
+ if count_le >= k:
126
+ search_max = closest_below
127
+ else:
128
+ search_min = closest_above
129
+
130
+ if search_min != search_max:
131
+ raise RuntimeError("Convergence failure (pathological input)")
132
+
133
+ return search_min
134
+
135
+
136
+ def _fast_shift_python(
137
+ x: List[float], y: List[float], p: Union[float, List[float]] = 0.5
138
+ ) -> Union[float, List[float]]:
139
+ """
140
+ Pure Python implementation of fast shift estimator.
141
+
142
+ Computes quantiles of all pairwise differences {x_i - y_j} efficiently.
143
+
144
+ Time complexity: O((m + n) * log(precision)) per quantile
145
+ Space complexity: O(1)
146
+
147
+ Args:
148
+ x: First sample (will be sorted if needed)
149
+ y: Second sample (will be sorted if needed)
150
+ p: Quantile(s) to compute (0.5 for median). Can be a single float or list of floats.
151
+
152
+ Returns:
153
+ The quantile estimate(s). Returns float if p is float, list if p is list.
154
+ """
155
+ if len(x) == 0 or len(y) == 0:
156
+ raise ValueError("x and y must be non-empty")
157
+
158
+ # Handle single probability or list
159
+ return_single = isinstance(p, (float, int))
160
+ probabilities = [p] if return_single else list(p)
161
+
162
+ # Validate probabilities
163
+ for pk in probabilities:
164
+ if np.isnan(pk) or pk < 0.0 or pk > 1.0:
165
+ raise ValueError(f"Probabilities must be within [0, 1], got {pk}")
166
+
167
+ # Sort the arrays
168
+ xs = sorted(x)
169
+ ys = sorted(y)
170
+
171
+ m = len(xs)
172
+ n = len(ys)
173
+ total = m * n
174
+
175
+ # Type-7 quantile: h = 1 + (n-1)*p, then interpolate between floor(h) and ceil(h)
176
+ required_ranks = set()
177
+ interpolation_params = []
178
+
179
+ for pk in probabilities:
180
+ h = 1.0 + (total - 1) * pk
181
+ lower_rank = int(np.floor(h))
182
+ upper_rank = int(np.ceil(h))
183
+ weight = h - lower_rank
184
+
185
+ # Clamp to valid range
186
+ lower_rank = max(1, min(total, lower_rank))
187
+ upper_rank = max(1, min(total, upper_rank))
188
+
189
+ interpolation_params.append((lower_rank, upper_rank, weight))
190
+ required_ranks.add(lower_rank)
191
+ required_ranks.add(upper_rank)
192
+
193
+ # Compute required rank values
194
+ rank_values = {}
195
+ for rank in required_ranks:
196
+ rank_values[rank] = _select_kth_pairwise_diff(xs, ys, rank)
197
+
198
+ # Interpolate to get final quantile values
199
+ result = []
200
+ for lower_rank, upper_rank, weight in interpolation_params:
201
+ lower = rank_values[lower_rank]
202
+ upper = rank_values[upper_rank]
203
+ if weight == 0.0:
204
+ result.append(lower)
205
+ else:
206
+ result.append((1.0 - weight) * lower + weight * upper)
207
+
208
+ return result[0] if return_single else result
209
+
210
+
211
+ def _fast_shift(
212
+ x: Union[Sequence[float], NDArray],
213
+ y: Union[Sequence[float], NDArray],
214
+ p: Union[float, List[float]] = 0.5,
215
+ ) -> Union[float, List[float]]:
216
+ """
217
+ Compute quantiles of all pairwise differences {x_i - y_j} efficiently.
218
+
219
+ Internal implementation - not part of public API.
220
+ Uses C implementation if available, falls back to pure Python.
221
+
222
+ Time complexity: O((m + n) * log(precision)) per quantile
223
+ Space complexity: O(1)
224
+
225
+ Args:
226
+ x: First sample
227
+ y: Second sample
228
+ p: Quantile(s) to compute (0.5 for median)
229
+
230
+ Returns:
231
+ The quantile estimate(s)
232
+ """
233
+ if _HAS_C_EXTENSION:
234
+ # Convert to numpy arrays and use C implementation
235
+ x_arr = np.asarray(x, dtype=np.float64)
236
+ y_arr = np.asarray(y, dtype=np.float64)
237
+ return_single = isinstance(p, (float, int))
238
+ p_arr = np.array([p] if return_single else p, dtype=np.float64)
239
+ result = _fast_shift_c.fast_shift_c(x_arr, y_arr, p_arr)
240
+ return float(result[0]) if return_single else result.tolist()
241
+ else:
242
+ # Fall back to pure Python implementation
243
+ return _fast_shift_python(x, y, p)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pragmastat
3
- Version: 3.1.29
3
+ Version: 3.1.30
4
4
  Summary: Pragmastat: Pragmatic Statistical Toolkit
5
5
  Author: Andrey Akinshin
6
6
  License-Expression: MIT
@@ -18,9 +18,9 @@ Dynamic: license-file
18
18
 
19
19
  This is a Python implementation of 'Pragmastat: Pragmatic Statistical Toolkit', which presents a toolkit of statistical procedures that provide reliable results across diverse real-world distributions, with ready-to-use implementations and detailed explanations.
20
20
 
21
- - PDF manual for this version: [pragmastat-v3.1.29.pdf](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.29/pragmastat-v3.1.29.pdf)
22
- - Markdown manual for this version: [pragmastat-v3.1.29.md](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.29/pragmastat-v3.1.29.md)
23
- - Source code for this version: [pragmastat/py/v3.1.29](https://github.com/AndreyAkinshin/pragmastat/tree/v3.1.29/py)
21
+ - PDF manual for this version: [pragmastat-v3.1.30.pdf](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.30/pragmastat-v3.1.30.pdf)
22
+ - Markdown manual for this version: [pragmastat-v3.1.30.md](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.30/pragmastat-v3.1.30.md)
23
+ - Source code for this version: [pragmastat/py/v3.1.30](https://github.com/AndreyAkinshin/pragmastat/tree/v3.1.30/py)
24
24
  - Latest online manual: https://pragmastat.dev
25
25
  - Manual DOI: [10.5281/zenodo.17236778](https://doi.org/10.5281/zenodo.17236778)
26
26
 
@@ -29,7 +29,7 @@ This is a Python implementation of 'Pragmastat: Pragmatic Statistical Toolkit',
29
29
  Install from PyPI:
30
30
 
31
31
  ```bash
32
- pip install pragmastat==3.1.29
32
+ pip install pragmastat==3.1.30
33
33
  ```
34
34
 
35
35
  ## Demo
@@ -8,6 +8,7 @@ setup.py
8
8
  ./pragmastat/__init__.py
9
9
  ./pragmastat/estimators.py
10
10
  ./pragmastat/fast_center.py
11
+ ./pragmastat/fast_shift.py
11
12
  ./pragmastat/fast_spread.py
12
13
  ./tests/test_invariance.py
13
14
  ./tests/test_performance.py
@@ -16,6 +17,7 @@ examples/demo.py
16
17
  pragmastat/__init__.py
17
18
  pragmastat/estimators.py
18
19
  pragmastat/fast_center.py
20
+ pragmastat/fast_shift.py
19
21
  pragmastat/fast_spread.py
20
22
  pragmastat.egg-info/PKG-INFO
21
23
  pragmastat.egg-info/SOURCES.txt
@@ -23,6 +25,7 @@ pragmastat.egg-info/dependency_links.txt
23
25
  pragmastat.egg-info/requires.txt
24
26
  pragmastat.egg-info/top_level.txt
25
27
  src/fast_center_c.c
28
+ src/fast_shift_c.c
26
29
  src/fast_spread_c.c
27
30
  tests/test_invariance.py
28
31
  tests/test_performance.py
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pragmastat"
3
- version = "3.1.29"
3
+ version = "3.1.30"
4
4
  description = "Pragmastat: Pragmatic Statistical Toolkit"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.8"
@@ -15,6 +15,12 @@ extensions = [
15
15
  include_dirs=[numpy.get_include()],
16
16
  extra_compile_args=["-O3", "-Wall"],
17
17
  ),
18
+ Extension(
19
+ "pragmastat._fast_shift_c",
20
+ sources=["src/fast_shift_c.c"],
21
+ include_dirs=[numpy.get_include()],
22
+ extra_compile_args=["-O3", "-Wall"],
23
+ ),
18
24
  ]
19
25
 
20
26
  setup(
@@ -0,0 +1,354 @@
1
+ #define PY_SSIZE_T_CLEAN
2
+ #include <Python.h>
3
+ #include <numpy/arrayobject.h>
4
+ #include <math.h>
5
+ #include <stdlib.h>
6
+
7
+ #define MIN(a, b) ((a) < (b) ? (a) : (b))
8
+ #define MAX(a, b) ((a) > (b) ? (a) : (b))
9
+
10
+ // Comparison function for qsort
11
+ static int compare_doubles(const void *a, const void *b) {
12
+ double da = *(const double *)a;
13
+ double db = *(const double *)b;
14
+ if (da < db) return -1;
15
+ if (da > db) return 1;
16
+ return 0;
17
+ }
18
+
19
+ // Numerically stable midpoint
20
+ static double midpoint(double a, double b) {
21
+ return a + (b - a) * 0.5;
22
+ }
23
+
24
+ // Two-pointer algorithm to count pairs where x[i] - y[j] <= threshold
25
+ // Also tracks the closest actual differences on either side of threshold
26
+ static void count_and_neighbors(
27
+ double *x, npy_intp m,
28
+ double *y, npy_intp n,
29
+ double threshold,
30
+ long long *count_le,
31
+ double *closest_below,
32
+ double *closest_above)
33
+ {
34
+ long long count = 0;
35
+ double max_below = -INFINITY;
36
+ double min_above = INFINITY;
37
+
38
+ npy_intp j = 0;
39
+ for (npy_intp i = 0; i < m; i++) {
40
+ // Move j forward while x[i] - y[j] > threshold
41
+ while (j < n && x[i] - y[j] > threshold) {
42
+ j++;
43
+ }
44
+
45
+ // All elements from y[j] to y[n-1] satisfy x[i] - y[j] <= threshold
46
+ count += (n - j);
47
+
48
+ // Track boundary values
49
+ if (j < n) {
50
+ double diff = x[i] - y[j];
51
+ if (diff > max_below) max_below = diff;
52
+ }
53
+
54
+ if (j > 0) {
55
+ double diff = x[i] - y[j - 1];
56
+ if (diff < min_above) min_above = diff;
57
+ }
58
+ }
59
+
60
+ // Fallback to actual min/max if no boundaries found
61
+ if (isinf(max_below) && max_below < 0) {
62
+ max_below = x[0] - y[n - 1];
63
+ }
64
+ if (isinf(min_above) && min_above > 0) {
65
+ min_above = x[m - 1] - y[0];
66
+ }
67
+
68
+ *count_le = count;
69
+ *closest_below = max_below;
70
+ *closest_above = min_above;
71
+ }
72
+
73
+ // Select the k-th smallest pairwise difference (1-indexed)
74
+ static double select_kth_pairwise_diff(
75
+ double *x, npy_intp m,
76
+ double *y, npy_intp n,
77
+ long long k)
78
+ {
79
+ long long total = (long long)m * n;
80
+
81
+ if (k < 1 || k > total) {
82
+ PyErr_Format(PyExc_ValueError, "k must be in [1, %lld], got %lld", total, k);
83
+ return NAN;
84
+ }
85
+
86
+ // Initialize search bounds
87
+ double search_min = x[0] - y[n - 1];
88
+ double search_max = x[m - 1] - y[0];
89
+
90
+ if (isnan(search_min) || isnan(search_max)) {
91
+ PyErr_SetString(PyExc_ValueError, "NaN in input values");
92
+ return NAN;
93
+ }
94
+
95
+ const int max_iterations = 128;
96
+ double prev_min = -INFINITY;
97
+ double prev_max = INFINITY;
98
+
99
+ for (int iter = 0; iter < max_iterations && search_min != search_max; iter++) {
100
+ double mid = midpoint(search_min, search_max);
101
+ long long count_le;
102
+ double closest_below, closest_above;
103
+
104
+ count_and_neighbors(x, m, y, n, mid, &count_le, &closest_below, &closest_above);
105
+
106
+ // Check if we found the exact value
107
+ if (closest_below == closest_above) {
108
+ return closest_below;
109
+ }
110
+
111
+ // No progress means we're stuck between two discrete values
112
+ if (search_min == prev_min && search_max == prev_max) {
113
+ return (count_le >= k) ? closest_below : closest_above;
114
+ }
115
+
116
+ prev_min = search_min;
117
+ prev_max = search_max;
118
+
119
+ // Narrow the search space
120
+ if (count_le >= k) {
121
+ search_max = closest_below;
122
+ } else {
123
+ search_min = closest_above;
124
+ }
125
+ }
126
+
127
+ if (search_min != search_max) {
128
+ PyErr_SetString(PyExc_RuntimeError, "Convergence failure (pathological input)");
129
+ return NAN;
130
+ }
131
+
132
+ return search_min;
133
+ }
134
+
135
+ /*
136
+ * Fast O((m+n) log L) implementation of the Shift estimator
137
+ * Computes quantiles of all pairwise differences without materializing them
138
+ */
139
+ static PyObject* fast_shift_c(PyObject* self, PyObject* args) {
140
+ PyArrayObject *x_array, *y_array, *p_array;
141
+
142
+ // Parse input
143
+ if (!PyArg_ParseTuple(args, "O!O!O!", &PyArray_Type, &x_array,
144
+ &PyArray_Type, &y_array, &PyArray_Type, &p_array)) {
145
+ return NULL;
146
+ }
147
+
148
+ // Ensure arrays are 1D
149
+ if (PyArray_NDIM(x_array) != 1 || PyArray_NDIM(y_array) != 1 || PyArray_NDIM(p_array) != 1) {
150
+ PyErr_SetString(PyExc_ValueError, "All inputs must be 1-dimensional arrays");
151
+ return NULL;
152
+ }
153
+
154
+ npy_intp m = PyArray_DIM(x_array, 0);
155
+ npy_intp n = PyArray_DIM(y_array, 0);
156
+ npy_intp num_quantiles = PyArray_DIM(p_array, 0);
157
+
158
+ if (m == 0 || n == 0) {
159
+ PyErr_SetString(PyExc_ValueError, "x and y must be non-empty");
160
+ return NULL;
161
+ }
162
+
163
+ // Allocate and sort x and y
164
+ double *xs = (double*)malloc(m * sizeof(double));
165
+ double *ys = (double*)malloc(n * sizeof(double));
166
+
167
+ if (!xs || !ys) {
168
+ free(xs);
169
+ free(ys);
170
+ PyErr_NoMemory();
171
+ return NULL;
172
+ }
173
+
174
+ for (npy_intp i = 0; i < m; i++) {
175
+ xs[i] = *(double*)PyArray_GETPTR1(x_array, i);
176
+ if (isnan(xs[i])) {
177
+ free(xs);
178
+ free(ys);
179
+ PyErr_SetString(PyExc_ValueError, "NaN values not allowed in x");
180
+ return NULL;
181
+ }
182
+ }
183
+
184
+ for (npy_intp i = 0; i < n; i++) {
185
+ ys[i] = *(double*)PyArray_GETPTR1(y_array, i);
186
+ if (isnan(ys[i])) {
187
+ free(xs);
188
+ free(ys);
189
+ PyErr_SetString(PyExc_ValueError, "NaN values not allowed in y");
190
+ return NULL;
191
+ }
192
+ }
193
+
194
+ qsort(xs, m, sizeof(double), compare_doubles);
195
+ qsort(ys, n, sizeof(double), compare_doubles);
196
+
197
+ long long total = (long long)m * n;
198
+
199
+ // Process quantiles
200
+ // First, collect all required ranks and interpolation parameters
201
+ typedef struct {
202
+ long long lower_rank;
203
+ long long upper_rank;
204
+ double weight;
205
+ } InterpolationParam;
206
+
207
+ InterpolationParam *interp_params = (InterpolationParam*)malloc(num_quantiles * sizeof(InterpolationParam));
208
+ if (!interp_params) {
209
+ free(xs);
210
+ free(ys);
211
+ PyErr_NoMemory();
212
+ return NULL;
213
+ }
214
+
215
+ // Use a simple array to track unique ranks (could be optimized with hash set)
216
+ long long *required_ranks = (long long*)malloc(2 * num_quantiles * sizeof(long long));
217
+ int num_required = 0;
218
+
219
+ if (!required_ranks) {
220
+ free(xs);
221
+ free(ys);
222
+ free(interp_params);
223
+ PyErr_NoMemory();
224
+ return NULL;
225
+ }
226
+
227
+ // Collect required ranks
228
+ for (npy_intp i = 0; i < num_quantiles; i++) {
229
+ double pk = *(double*)PyArray_GETPTR1(p_array, i);
230
+
231
+ if (isnan(pk) || pk < 0.0 || pk > 1.0) {
232
+ free(xs);
233
+ free(ys);
234
+ free(interp_params);
235
+ free(required_ranks);
236
+ PyErr_Format(PyExc_ValueError, "Probabilities must be within [0, 1], got %f", pk);
237
+ return NULL;
238
+ }
239
+
240
+ // Type-7 quantile: h = 1 + (n-1)*p
241
+ double h = 1.0 + (total - 1) * pk;
242
+ long long lower_rank = (long long)floor(h);
243
+ long long upper_rank = (long long)ceil(h);
244
+ double weight = h - lower_rank;
245
+
246
+ // Clamp to valid range
247
+ if (lower_rank < 1) lower_rank = 1;
248
+ if (upper_rank > total) upper_rank = total;
249
+ if (lower_rank > total) lower_rank = total;
250
+ if (upper_rank < 1) upper_rank = 1;
251
+
252
+ interp_params[i].lower_rank = lower_rank;
253
+ interp_params[i].upper_rank = upper_rank;
254
+ interp_params[i].weight = weight;
255
+
256
+ // Add to required ranks if not already present
257
+ int found_lower = 0, found_upper = 0;
258
+ for (int j = 0; j < num_required; j++) {
259
+ if (required_ranks[j] == lower_rank) found_lower = 1;
260
+ if (required_ranks[j] == upper_rank) found_upper = 1;
261
+ }
262
+ if (!found_lower) required_ranks[num_required++] = lower_rank;
263
+ if (!found_upper && upper_rank != lower_rank) required_ranks[num_required++] = upper_rank;
264
+ }
265
+
266
+ // Compute rank values
267
+ double *rank_values = (double*)malloc(num_required * sizeof(double));
268
+ if (!rank_values) {
269
+ free(xs);
270
+ free(ys);
271
+ free(interp_params);
272
+ free(required_ranks);
273
+ PyErr_NoMemory();
274
+ return NULL;
275
+ }
276
+
277
+ for (int i = 0; i < num_required; i++) {
278
+ rank_values[i] = select_kth_pairwise_diff(xs, m, ys, n, required_ranks[i]);
279
+ if (isnan(rank_values[i])) {
280
+ // Error was set by select_kth_pairwise_diff
281
+ free(xs);
282
+ free(ys);
283
+ free(interp_params);
284
+ free(required_ranks);
285
+ free(rank_values);
286
+ return NULL;
287
+ }
288
+ }
289
+
290
+ // Create result array
291
+ npy_intp dims[1] = {num_quantiles};
292
+ PyArrayObject *result = (PyArrayObject*)PyArray_SimpleNew(1, dims, NPY_DOUBLE);
293
+ if (!result) {
294
+ free(xs);
295
+ free(ys);
296
+ free(interp_params);
297
+ free(required_ranks);
298
+ free(rank_values);
299
+ return NULL;
300
+ }
301
+
302
+ // Interpolate to get final quantile values
303
+ for (npy_intp i = 0; i < num_quantiles; i++) {
304
+ long long lower_rank = interp_params[i].lower_rank;
305
+ long long upper_rank = interp_params[i].upper_rank;
306
+ double weight = interp_params[i].weight;
307
+
308
+ // Find rank values
309
+ double lower_val = 0.0, upper_val = 0.0;
310
+ for (int j = 0; j < num_required; j++) {
311
+ if (required_ranks[j] == lower_rank) lower_val = rank_values[j];
312
+ if (required_ranks[j] == upper_rank) upper_val = rank_values[j];
313
+ }
314
+
315
+ double result_val;
316
+ if (weight == 0.0) {
317
+ result_val = lower_val;
318
+ } else {
319
+ result_val = (1.0 - weight) * lower_val + weight * upper_val;
320
+ }
321
+
322
+ *(double*)PyArray_GETPTR1(result, i) = result_val;
323
+ }
324
+
325
+ // Cleanup
326
+ free(xs);
327
+ free(ys);
328
+ free(interp_params);
329
+ free(required_ranks);
330
+ free(rank_values);
331
+
332
+ return (PyObject*)result;
333
+ }
334
+
335
+ // Method definitions
336
+ static PyMethodDef FastShiftMethods[] = {
337
+ {"fast_shift_c", fast_shift_c, METH_VARARGS, "Fast shift estimator in C"},
338
+ {NULL, NULL, 0, NULL}
339
+ };
340
+
341
+ // Module definition
342
+ static struct PyModuleDef fast_shift_module = {
343
+ PyModuleDef_HEAD_INIT,
344
+ "_fast_shift_c",
345
+ "Fast shift estimator C extension",
346
+ -1,
347
+ FastShiftMethods
348
+ };
349
+
350
+ // Module initialization
351
+ PyMODINIT_FUNC PyInit__fast_shift_c(void) {
352
+ import_array();
353
+ return PyModule_Create(&fast_shift_module);
354
+ }
@@ -1,13 +1,11 @@
1
- """Performance tests for fast Center and Spread implementations."""
2
-
3
1
  import time
4
2
  import numpy as np
5
3
  from pragmastat.fast_center import _fast_center
6
4
  from pragmastat.fast_spread import _fast_spread
5
+ from pragmastat.fast_shift import _fast_shift
7
6
 
8
7
 
9
- def center_simple(x):
10
- """Simple O(n^2) implementation for comparison."""
8
+ def center_naive(x):
11
9
  n = len(x)
12
10
  pairwise_averages = []
13
11
  for i in range(n):
@@ -16,8 +14,7 @@ def center_simple(x):
16
14
  return np.median(pairwise_averages)
17
15
 
18
16
 
19
- def spread_simple(x):
20
- """Simple O(n^2) implementation for comparison."""
17
+ def spread_naive(x):
21
18
  n = len(x)
22
19
  if n == 1:
23
20
  return 0.0
@@ -28,13 +25,20 @@ def spread_simple(x):
28
25
  return np.median(pairwise_diffs)
29
26
 
30
27
 
28
+ def shift_naive(x, y):
29
+ pairwise_shifts = []
30
+ for xi in x:
31
+ for yj in y:
32
+ pairwise_shifts.append(xi - yj)
33
+ return np.median(pairwise_shifts)
34
+
35
+
31
36
  def test_center_correctness():
32
- """Test that _fast_center produces the same results as simple implementation."""
33
37
  np.random.seed(1729)
34
38
  for n in range(1, 101):
35
39
  for iteration in range(n):
36
40
  x = np.random.randn(n).tolist()
37
- expected = center_simple(x)
41
+ expected = center_naive(x)
38
42
  actual = _fast_center(x)
39
43
  assert (
40
44
  abs(expected - actual) < 1e-9
@@ -42,12 +46,11 @@ def test_center_correctness():
42
46
 
43
47
 
44
48
  def test_spread_correctness():
45
- """Test that _fast_spread produces the same results as simple implementation."""
46
49
  np.random.seed(1729)
47
50
  for n in range(1, 101):
48
51
  for iteration in range(n):
49
52
  x = np.random.randn(n).tolist()
50
- expected = spread_simple(x)
53
+ expected = spread_naive(x)
51
54
  actual = _fast_spread(x)
52
55
  assert (
53
56
  abs(expected - actual) < 1e-9
@@ -55,7 +58,6 @@ def test_spread_correctness():
55
58
 
56
59
 
57
60
  def test_center_performance():
58
- """Test performance of _fast_center on large dataset."""
59
61
  np.random.seed(1729)
60
62
  x = np.random.randn(100000).tolist()
61
63
 
@@ -69,7 +71,6 @@ def test_center_performance():
69
71
 
70
72
 
71
73
  def test_spread_performance():
72
- """Test performance of _fast_spread on large dataset."""
73
74
  np.random.seed(1729)
74
75
  x = np.random.randn(100000).tolist()
75
76
 
@@ -82,6 +83,33 @@ def test_spread_performance():
82
83
  assert elapsed < 10.0, f"Performance too slow: {elapsed}s"
83
84
 
84
85
 
86
+ def test_shift_correctness():
87
+ np.random.seed(1729)
88
+ for n in range(2, 51):
89
+ for m in range(2, 51):
90
+ x = np.random.randn(n).tolist()
91
+ y = np.random.randn(m).tolist()
92
+ expected = shift_naive(x, y)
93
+ actual = _fast_shift(x, y, p=0.5)
94
+ assert (
95
+ abs(expected - actual) < 1e-9
96
+ ), f"Mismatch for n={n}, m={m}: expected={expected}, actual={actual}"
97
+
98
+
99
+ def test_shift_performance():
100
+ np.random.seed(1729)
101
+ x = np.random.randn(10000).tolist()
102
+ y = np.random.randn(10000).tolist()
103
+
104
+ start = time.time()
105
+ result = _fast_shift(x, y, p=0.5)
106
+ elapsed = time.time() - start
107
+
108
+ print(f"\nShift for n=m=10000: {result:.6f}")
109
+ print(f"Elapsed time: {elapsed:.3f}s")
110
+ assert elapsed < 10.0, f"Performance too slow: {elapsed}s"
111
+
112
+
85
113
  if __name__ == "__main__":
86
114
  test_center_correctness()
87
115
  print("✓ Center correctness tests passed")
@@ -89,8 +117,14 @@ if __name__ == "__main__":
89
117
  test_spread_correctness()
90
118
  print("✓ Spread correctness tests passed")
91
119
 
120
+ test_shift_correctness()
121
+ print("✓ Shift correctness tests passed")
122
+
92
123
  test_center_performance()
93
124
  print("✓ Center performance test passed")
94
125
 
95
126
  test_spread_performance()
96
127
  print("✓ Spread performance test passed")
128
+
129
+ test_shift_performance()
130
+ print("✓ Shift performance test passed")
File without changes
File without changes
File without changes