pragmastat 3.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pragmastat might be problematic. Click here for more details.
- pragmastat-3.1.6/LICENSE +21 -0
- pragmastat-3.1.6/MANIFEST.in +3 -0
- pragmastat-3.1.6/PKG-INFO +76 -0
- pragmastat-3.1.6/README.md +61 -0
- pragmastat-3.1.6/examples/demo.py +51 -0
- pragmastat-3.1.6/pragmastat/__init__.py +21 -0
- pragmastat-3.1.6/pragmastat/estimators.py +71 -0
- pragmastat-3.1.6/pragmastat/fast_center.py +215 -0
- pragmastat-3.1.6/pragmastat/fast_spread.py +217 -0
- pragmastat-3.1.6/pragmastat.egg-info/PKG-INFO +76 -0
- pragmastat-3.1.6/pragmastat.egg-info/SOURCES.txt +29 -0
- pragmastat-3.1.6/pragmastat.egg-info/dependency_links.txt +1 -0
- pragmastat-3.1.6/pragmastat.egg-info/requires.txt +1 -0
- pragmastat-3.1.6/pragmastat.egg-info/top_level.txt +6 -0
- pragmastat-3.1.6/pyproject.toml +18 -0
- pragmastat-3.1.6/setup.cfg +4 -0
- pragmastat-3.1.6/setup.py +24 -0
- pragmastat-3.1.6/src/fast_center_c.c +314 -0
- pragmastat-3.1.6/src/fast_spread_c.c +335 -0
- pragmastat-3.1.6/tests/test_invariance.py +152 -0
- pragmastat-3.1.6/tests/test_performance.py +93 -0
- pragmastat-3.1.6/tests/test_reference.py +70 -0
pragmastat-3.1.6/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Andrey Akinshin
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pragmastat
|
|
3
|
+
Version: 3.1.6
|
|
4
|
+
Summary: Pragmastat: Pragmatic Statistical Toolkit
|
|
5
|
+
Author: Andrey Akinshin
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://pragmastat.dev
|
|
8
|
+
Project-URL: Repository, https://github.com/AndreyAkinshin/pragmastat
|
|
9
|
+
Project-URL: DOI, https://doi.org/10.5281/zenodo.17236778
|
|
10
|
+
Requires-Python: >=3.8
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
Requires-Dist: numpy>=1.20
|
|
14
|
+
Dynamic: license-file
|
|
15
|
+
|
|
16
|
+
# Pragmastat Python Implementation
|
|
17
|
+
|
|
18
|
+
[](https://doi.org/10.5281/zenodo.17236778)
|
|
19
|
+
|
|
20
|
+
A Python implementation of the Pragmastat statistical toolkit, providing robust statistical estimators for reliable analysis of real-world data.
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install pragmastat
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Requirements
|
|
29
|
+
|
|
30
|
+
- Python >= 3.8
|
|
31
|
+
- NumPy >= 1.20
|
|
32
|
+
|
|
33
|
+
## Usage
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
from pragmastat import center, spread, rel_spread, shift, ratio, avg_spread, disparity
|
|
37
|
+
|
|
38
|
+
# Basic estimators
|
|
39
|
+
x = [1, 2, 3, 4, 5]
|
|
40
|
+
print(f"Center: {center(x)}")
|
|
41
|
+
print(f"Spread: {spread(x)}")
|
|
42
|
+
print(f"RelSpread: {rel_spread(x)}")
|
|
43
|
+
|
|
44
|
+
# Comparison estimators
|
|
45
|
+
y = [3, 4, 5, 6, 7]
|
|
46
|
+
print(f"Shift: {shift(x, y)}")
|
|
47
|
+
print(f"Ratio: {ratio(x, y)}")
|
|
48
|
+
print(f"AvgSpread: {avg_spread(x, y)}")
|
|
49
|
+
print(f"Disparity: {disparity(x, y)}")
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Estimators
|
|
53
|
+
|
|
54
|
+
### Single-sample estimators
|
|
55
|
+
|
|
56
|
+
- `center(x)`: Hodges-Lehmann estimator - median of all pairwise averages
|
|
57
|
+
- `spread(x)`: Shamos estimator - median of all pairwise absolute differences
|
|
58
|
+
- `rel_spread(x)`: Relative spread - spread divided by absolute center
|
|
59
|
+
|
|
60
|
+
### Two-sample estimators
|
|
61
|
+
|
|
62
|
+
- `shift(x, y)`: Hodges-Lehmann shift estimator - median of all pairwise differences
|
|
63
|
+
- `ratio(x, y)`: Median of all pairwise ratios
|
|
64
|
+
- `avg_spread(x, y)`: Weighted average of spreads
|
|
65
|
+
- `disparity(x, y)`: Normalized shift - shift divided by average spread
|
|
66
|
+
|
|
67
|
+
## Features
|
|
68
|
+
|
|
69
|
+
- Robust to outliers
|
|
70
|
+
- Supports both Python lists and NumPy arrays
|
|
71
|
+
- Type hints with numpy.typing
|
|
72
|
+
- Efficient vectorized NumPy operations
|
|
73
|
+
|
|
74
|
+
## License
|
|
75
|
+
|
|
76
|
+
MIT
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Pragmastat Python Implementation
|
|
2
|
+
|
|
3
|
+
[](https://doi.org/10.5281/zenodo.17236778)
|
|
4
|
+
|
|
5
|
+
A Python implementation of the Pragmastat statistical toolkit, providing robust statistical estimators for reliable analysis of real-world data.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install pragmastat
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Requirements
|
|
14
|
+
|
|
15
|
+
- Python >= 3.8
|
|
16
|
+
- NumPy >= 1.20
|
|
17
|
+
|
|
18
|
+
## Usage
|
|
19
|
+
|
|
20
|
+
```python
|
|
21
|
+
from pragmastat import center, spread, rel_spread, shift, ratio, avg_spread, disparity
|
|
22
|
+
|
|
23
|
+
# Basic estimators
|
|
24
|
+
x = [1, 2, 3, 4, 5]
|
|
25
|
+
print(f"Center: {center(x)}")
|
|
26
|
+
print(f"Spread: {spread(x)}")
|
|
27
|
+
print(f"RelSpread: {rel_spread(x)}")
|
|
28
|
+
|
|
29
|
+
# Comparison estimators
|
|
30
|
+
y = [3, 4, 5, 6, 7]
|
|
31
|
+
print(f"Shift: {shift(x, y)}")
|
|
32
|
+
print(f"Ratio: {ratio(x, y)}")
|
|
33
|
+
print(f"AvgSpread: {avg_spread(x, y)}")
|
|
34
|
+
print(f"Disparity: {disparity(x, y)}")
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Estimators
|
|
38
|
+
|
|
39
|
+
### Single-sample estimators
|
|
40
|
+
|
|
41
|
+
- `center(x)`: Hodges-Lehmann estimator - median of all pairwise averages
|
|
42
|
+
- `spread(x)`: Shamos estimator - median of all pairwise absolute differences
|
|
43
|
+
- `rel_spread(x)`: Relative spread - spread divided by absolute center
|
|
44
|
+
|
|
45
|
+
### Two-sample estimators
|
|
46
|
+
|
|
47
|
+
- `shift(x, y)`: Hodges-Lehmann shift estimator - median of all pairwise differences
|
|
48
|
+
- `ratio(x, y)`: Median of all pairwise ratios
|
|
49
|
+
- `avg_spread(x, y)`: Weighted average of spreads
|
|
50
|
+
- `disparity(x, y)`: Normalized shift - shift divided by average spread
|
|
51
|
+
|
|
52
|
+
## Features
|
|
53
|
+
|
|
54
|
+
- Robust to outliers
|
|
55
|
+
- Supports both Python lists and NumPy arrays
|
|
56
|
+
- Type hints with numpy.typing
|
|
57
|
+
- Efficient vectorized NumPy operations
|
|
58
|
+
|
|
59
|
+
## License
|
|
60
|
+
|
|
61
|
+
MIT
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from pragmastat import center, spread, rel_spread, shift, ratio, avg_spread, disparity
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def main():
|
|
5
|
+
x = [0, 2, 4, 6, 8]
|
|
6
|
+
print(center(x)) # 4
|
|
7
|
+
print(center([v + 10 for v in x])) # 14
|
|
8
|
+
print(center([v * 3 for v in x])) # 12
|
|
9
|
+
|
|
10
|
+
print(spread(x)) # 4
|
|
11
|
+
print(spread([v + 10 for v in x])) # 4
|
|
12
|
+
print(spread([v * 2 for v in x])) # 8
|
|
13
|
+
|
|
14
|
+
print(rel_spread(x)) # 1
|
|
15
|
+
print(rel_spread([v * 5 for v in x])) # 1
|
|
16
|
+
|
|
17
|
+
y = [10, 12, 14, 16, 18]
|
|
18
|
+
print(shift(x, y)) # -10
|
|
19
|
+
print(shift(x, x)) # 0
|
|
20
|
+
print(shift([v + 7 for v in x], [v + 3 for v in y])) # -6
|
|
21
|
+
print(shift([v * 2 for v in x], [v * 2 for v in y])) # -20
|
|
22
|
+
print(shift(y, x)) # 10
|
|
23
|
+
|
|
24
|
+
x = [1, 2, 4, 8, 16]
|
|
25
|
+
y = [2, 4, 8, 16, 32]
|
|
26
|
+
print(ratio(x, y)) # 0.5
|
|
27
|
+
print(ratio(x, x)) # 1
|
|
28
|
+
print(ratio([v * 2 for v in x], [v * 5 for v in y])) # 0.2
|
|
29
|
+
|
|
30
|
+
x = [0, 3, 6, 9, 12]
|
|
31
|
+
y = [0, 2, 4, 6, 8]
|
|
32
|
+
print(spread(x)) # 6
|
|
33
|
+
print(spread(y)) # 4
|
|
34
|
+
|
|
35
|
+
print(avg_spread(x, y)) # 5
|
|
36
|
+
print(avg_spread(x, x)) # 6
|
|
37
|
+
print(avg_spread([v * 2 for v in x], [v * 3 for v in x])) # 15
|
|
38
|
+
print(avg_spread(y, x)) # 5
|
|
39
|
+
print(avg_spread([v * 2 for v in x], [v * 2 for v in y])) # 10
|
|
40
|
+
|
|
41
|
+
print(shift(x, y)) # 2
|
|
42
|
+
print(avg_spread(x, y)) # 5
|
|
43
|
+
|
|
44
|
+
print(disparity(x, y)) # 0.4
|
|
45
|
+
print(disparity([v + 5 for v in x], [v + 5 for v in y])) # 0.4
|
|
46
|
+
print(disparity([v * 2 for v in x], [v * 2 for v in y])) # 0.4
|
|
47
|
+
print(disparity(y, x)) # -0.4
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
if __name__ == "__main__":
|
|
51
|
+
main()
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from .estimators import (
|
|
2
|
+
center,
|
|
3
|
+
spread,
|
|
4
|
+
rel_spread,
|
|
5
|
+
shift,
|
|
6
|
+
ratio,
|
|
7
|
+
avg_spread,
|
|
8
|
+
disparity
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
'center',
|
|
13
|
+
'spread',
|
|
14
|
+
'rel_spread',
|
|
15
|
+
'shift',
|
|
16
|
+
'ratio',
|
|
17
|
+
'avg_spread',
|
|
18
|
+
'disparity'
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
__version__ = '3.1.6'
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
from typing import Sequence, Union
|
|
2
|
+
import numpy as np
|
|
3
|
+
from numpy.typing import NDArray
|
|
4
|
+
from .fast_center import _fast_center
|
|
5
|
+
from .fast_spread import _fast_spread
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def center(x: Union[Sequence[float], NDArray]) -> float:
|
|
9
|
+
x = np.asarray(x)
|
|
10
|
+
n = len(x)
|
|
11
|
+
if n == 0:
|
|
12
|
+
raise ValueError("Input array cannot be empty")
|
|
13
|
+
# Use fast O(n log n) algorithm
|
|
14
|
+
return _fast_center(x.tolist())
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def spread(x: Union[Sequence[float], NDArray]) -> float:
|
|
18
|
+
x = np.asarray(x)
|
|
19
|
+
n = len(x)
|
|
20
|
+
if n == 0:
|
|
21
|
+
raise ValueError("Input array cannot be empty")
|
|
22
|
+
if n == 1:
|
|
23
|
+
return 0.0
|
|
24
|
+
# Use fast O(n log n) algorithm
|
|
25
|
+
return _fast_spread(x.tolist())
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def rel_spread(x: Union[Sequence[float], NDArray]) -> float:
|
|
29
|
+
center_val = center(x)
|
|
30
|
+
if center_val == 0:
|
|
31
|
+
raise ValueError("RelSpread is undefined when Center equals zero")
|
|
32
|
+
return spread(x) / abs(center_val)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def shift(x: Union[Sequence[float], NDArray], y: Union[Sequence[float], NDArray]) -> float:
|
|
36
|
+
x = np.asarray(x)
|
|
37
|
+
y = np.asarray(y)
|
|
38
|
+
if len(x) == 0 or len(y) == 0:
|
|
39
|
+
raise ValueError("Input arrays cannot be empty")
|
|
40
|
+
pairwise_shifts = np.subtract.outer(x, y)
|
|
41
|
+
return float(np.median(pairwise_shifts))
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def ratio(x: Union[Sequence[float], NDArray], y: Union[Sequence[float], NDArray]) -> float:
|
|
45
|
+
x = np.asarray(x)
|
|
46
|
+
y = np.asarray(y)
|
|
47
|
+
if len(x) == 0 or len(y) == 0:
|
|
48
|
+
raise ValueError("Input arrays cannot be empty")
|
|
49
|
+
if np.any(y <= 0):
|
|
50
|
+
raise ValueError("All values in y must be strictly positive")
|
|
51
|
+
pairwise_ratios = np.divide.outer(x, y)
|
|
52
|
+
return float(np.median(pairwise_ratios))
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def avg_spread(x: Union[Sequence[float], NDArray], y: Union[Sequence[float], NDArray]) -> float:
|
|
56
|
+
x = np.asarray(x)
|
|
57
|
+
y = np.asarray(y)
|
|
58
|
+
n = len(x)
|
|
59
|
+
m = len(y)
|
|
60
|
+
if n == 0 or m == 0:
|
|
61
|
+
raise ValueError("Input arrays cannot be empty")
|
|
62
|
+
spread_x = spread(x)
|
|
63
|
+
spread_y = spread(y)
|
|
64
|
+
return (n * spread_x + m * spread_y) / (n + m)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def disparity(x: Union[Sequence[float], NDArray], y: Union[Sequence[float], NDArray]) -> float:
|
|
68
|
+
avg_spread_val = avg_spread(x, y)
|
|
69
|
+
if avg_spread_val == 0:
|
|
70
|
+
return float('inf')
|
|
71
|
+
return shift(x, y) / avg_spread_val
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
"""Fast O(n log n) implementation of the Center (Hodges-Lehmann) estimator.
|
|
2
|
+
|
|
3
|
+
Based on Monahan's Algorithm 616 (1984).
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import List
|
|
7
|
+
import random
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
# Try to import the C implementation, fall back to pure Python if unavailable
|
|
11
|
+
try:
|
|
12
|
+
from . import _fast_center_c
|
|
13
|
+
_HAS_C_EXTENSION = True
|
|
14
|
+
except ImportError:
|
|
15
|
+
_HAS_C_EXTENSION = False
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _fast_center_python(values: List[float]) -> float:
|
|
19
|
+
"""
|
|
20
|
+
Pure Python implementation of fast center estimator.
|
|
21
|
+
|
|
22
|
+
Compute the median of all pairwise averages (xi + xj)/2 efficiently.
|
|
23
|
+
|
|
24
|
+
Time complexity: O(n log n) expected
|
|
25
|
+
Space complexity: O(n)
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
values: A list of numeric values
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
The center estimate (Hodges-Lehmann estimator)
|
|
32
|
+
"""
|
|
33
|
+
n = len(values)
|
|
34
|
+
if n == 0:
|
|
35
|
+
raise ValueError("Input array cannot be empty")
|
|
36
|
+
if n == 1:
|
|
37
|
+
return values[0]
|
|
38
|
+
if n == 2:
|
|
39
|
+
return (values[0] + values[1]) / 2
|
|
40
|
+
|
|
41
|
+
# Sort the values
|
|
42
|
+
sorted_values = sorted(values)
|
|
43
|
+
|
|
44
|
+
# Calculate target median rank(s) among all pairwise sums
|
|
45
|
+
total_pairs = n * (n + 1) // 2
|
|
46
|
+
median_rank_low = (total_pairs + 1) // 2 # 1-based rank
|
|
47
|
+
median_rank_high = (total_pairs + 2) // 2
|
|
48
|
+
|
|
49
|
+
# Initialize search bounds for each row (1-based indexing)
|
|
50
|
+
left_bounds = [i + 1 for i in range(n)] # Row i pairs with columns [i+1..n]
|
|
51
|
+
right_bounds = [n for i in range(n)]
|
|
52
|
+
|
|
53
|
+
# Start with a good pivot: sum of middle elements
|
|
54
|
+
pivot = sorted_values[(n - 1) // 2] + sorted_values[n // 2]
|
|
55
|
+
active_set_size = total_pairs
|
|
56
|
+
previous_count = 0
|
|
57
|
+
|
|
58
|
+
while True:
|
|
59
|
+
# === PARTITION STEP ===
|
|
60
|
+
# Count pairwise sums less than current pivot
|
|
61
|
+
count_below_pivot = 0
|
|
62
|
+
current_column = n
|
|
63
|
+
partition_counts = []
|
|
64
|
+
|
|
65
|
+
for row in range(1, n + 1): # 1-based
|
|
66
|
+
# Move left from current column until we find sums < pivot
|
|
67
|
+
while current_column >= row and sorted_values[row - 1] + sorted_values[current_column - 1] >= pivot:
|
|
68
|
+
current_column -= 1
|
|
69
|
+
|
|
70
|
+
# Count elements in this row that are < pivot
|
|
71
|
+
elements_below = max(0, current_column - row + 1)
|
|
72
|
+
partition_counts.append(elements_below)
|
|
73
|
+
count_below_pivot += elements_below
|
|
74
|
+
|
|
75
|
+
# === CONVERGENCE CHECK ===
|
|
76
|
+
if count_below_pivot == previous_count:
|
|
77
|
+
# No progress - use midrange strategy
|
|
78
|
+
min_active_sum = float('inf')
|
|
79
|
+
max_active_sum = float('-inf')
|
|
80
|
+
|
|
81
|
+
for i in range(n):
|
|
82
|
+
if left_bounds[i] > right_bounds[i]:
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
row_value = sorted_values[i]
|
|
86
|
+
smallest_in_row = sorted_values[left_bounds[i] - 1] + row_value
|
|
87
|
+
largest_in_row = sorted_values[right_bounds[i] - 1] + row_value
|
|
88
|
+
|
|
89
|
+
min_active_sum = min(min_active_sum, smallest_in_row)
|
|
90
|
+
max_active_sum = max(max_active_sum, largest_in_row)
|
|
91
|
+
|
|
92
|
+
pivot = (min_active_sum + max_active_sum) / 2
|
|
93
|
+
if pivot <= min_active_sum or pivot > max_active_sum:
|
|
94
|
+
pivot = max_active_sum
|
|
95
|
+
|
|
96
|
+
if min_active_sum == max_active_sum or active_set_size <= 2:
|
|
97
|
+
return pivot / 2
|
|
98
|
+
|
|
99
|
+
continue
|
|
100
|
+
|
|
101
|
+
# === TARGET CHECK ===
|
|
102
|
+
at_target_rank = (count_below_pivot == median_rank_low or
|
|
103
|
+
count_below_pivot == median_rank_high - 1)
|
|
104
|
+
|
|
105
|
+
if at_target_rank:
|
|
106
|
+
# Find boundary values
|
|
107
|
+
largest_below_pivot = float('-inf')
|
|
108
|
+
smallest_at_or_above_pivot = float('inf')
|
|
109
|
+
|
|
110
|
+
for i in range(n):
|
|
111
|
+
count_in_row = partition_counts[i]
|
|
112
|
+
row_value = sorted_values[i]
|
|
113
|
+
total_in_row = n - i
|
|
114
|
+
|
|
115
|
+
# Find largest sum in this row that's < pivot
|
|
116
|
+
if count_in_row > 0:
|
|
117
|
+
last_below_index = i + count_in_row
|
|
118
|
+
last_below_value = row_value + sorted_values[last_below_index - 1]
|
|
119
|
+
largest_below_pivot = max(largest_below_pivot, last_below_value)
|
|
120
|
+
|
|
121
|
+
# Find smallest sum in this row that's >= pivot
|
|
122
|
+
if count_in_row < total_in_row:
|
|
123
|
+
first_at_or_above_index = i + count_in_row + 1
|
|
124
|
+
first_at_or_above_value = row_value + sorted_values[first_at_or_above_index - 1]
|
|
125
|
+
smallest_at_or_above_pivot = min(smallest_at_or_above_pivot, first_at_or_above_value)
|
|
126
|
+
|
|
127
|
+
# Calculate final result
|
|
128
|
+
if median_rank_low < median_rank_high:
|
|
129
|
+
# Even total: average the two middle values
|
|
130
|
+
return (smallest_at_or_above_pivot + largest_below_pivot) / 4
|
|
131
|
+
else:
|
|
132
|
+
# Odd total: return the single middle value
|
|
133
|
+
need_largest = (count_below_pivot == median_rank_low)
|
|
134
|
+
return (largest_below_pivot if need_largest else smallest_at_or_above_pivot) / 2
|
|
135
|
+
|
|
136
|
+
# === UPDATE BOUNDS ===
|
|
137
|
+
if count_below_pivot < median_rank_low:
|
|
138
|
+
# Too few values below pivot - search higher
|
|
139
|
+
for i in range(n):
|
|
140
|
+
left_bounds[i] = i + partition_counts[i] + 1
|
|
141
|
+
else:
|
|
142
|
+
# Too many values below pivot - search lower
|
|
143
|
+
for i in range(n):
|
|
144
|
+
right_bounds[i] = i + partition_counts[i]
|
|
145
|
+
|
|
146
|
+
# === PREPARE NEXT ITERATION ===
|
|
147
|
+
previous_count = count_below_pivot
|
|
148
|
+
|
|
149
|
+
# Recalculate active set size
|
|
150
|
+
active_set_size = sum(max(0, right_bounds[i] - left_bounds[i] + 1) for i in range(n))
|
|
151
|
+
|
|
152
|
+
# Choose next pivot
|
|
153
|
+
if active_set_size > 2:
|
|
154
|
+
# Use randomized row median strategy
|
|
155
|
+
target_index = random.randint(0, active_set_size - 1)
|
|
156
|
+
cumulative_size = 0
|
|
157
|
+
selected_row = 0
|
|
158
|
+
|
|
159
|
+
for i in range(n):
|
|
160
|
+
row_size = max(0, right_bounds[i] - left_bounds[i] + 1)
|
|
161
|
+
if target_index < cumulative_size + row_size:
|
|
162
|
+
selected_row = i
|
|
163
|
+
break
|
|
164
|
+
cumulative_size += row_size
|
|
165
|
+
|
|
166
|
+
# Use median element of the selected row as pivot
|
|
167
|
+
median_column_in_row = (left_bounds[selected_row] + right_bounds[selected_row]) // 2
|
|
168
|
+
pivot = sorted_values[selected_row] + sorted_values[median_column_in_row - 1]
|
|
169
|
+
else:
|
|
170
|
+
# Few elements remain - use midrange strategy
|
|
171
|
+
min_remaining_sum = float('inf')
|
|
172
|
+
max_remaining_sum = float('-inf')
|
|
173
|
+
|
|
174
|
+
for i in range(n):
|
|
175
|
+
if left_bounds[i] > right_bounds[i]:
|
|
176
|
+
continue
|
|
177
|
+
|
|
178
|
+
row_value = sorted_values[i]
|
|
179
|
+
min_in_row = sorted_values[left_bounds[i] - 1] + row_value
|
|
180
|
+
max_in_row = sorted_values[right_bounds[i] - 1] + row_value
|
|
181
|
+
|
|
182
|
+
min_remaining_sum = min(min_remaining_sum, min_in_row)
|
|
183
|
+
max_remaining_sum = max(max_remaining_sum, max_in_row)
|
|
184
|
+
|
|
185
|
+
pivot = (min_remaining_sum + max_remaining_sum) / 2
|
|
186
|
+
if pivot <= min_remaining_sum or pivot > max_remaining_sum:
|
|
187
|
+
pivot = max_remaining_sum
|
|
188
|
+
|
|
189
|
+
if min_remaining_sum == max_remaining_sum:
|
|
190
|
+
return pivot / 2
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _fast_center(values: List[float]) -> float:
|
|
194
|
+
"""
|
|
195
|
+
Compute the median of all pairwise averages (xi + xj)/2 efficiently.
|
|
196
|
+
|
|
197
|
+
Internal implementation - not part of public API.
|
|
198
|
+
Uses C implementation if available, falls back to pure Python.
|
|
199
|
+
|
|
200
|
+
Time complexity: O(n log n) expected
|
|
201
|
+
Space complexity: O(n)
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
values: A list of numeric values
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
The center estimate (Hodges-Lehmann estimator)
|
|
208
|
+
"""
|
|
209
|
+
if _HAS_C_EXTENSION:
|
|
210
|
+
# Convert to numpy array and use C implementation
|
|
211
|
+
arr = np.asarray(values, dtype=np.float64)
|
|
212
|
+
return _fast_center_c.fast_center_c(arr)
|
|
213
|
+
else:
|
|
214
|
+
# Fall back to pure Python implementation
|
|
215
|
+
return _fast_center_python(values)
|