pragmastat 3.1.23__tar.gz → 3.1.25__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pragmastat might be problematic. Click here for more details.
- {pragmastat-3.1.23/pragmastat.egg-info → pragmastat-3.1.25}/PKG-INFO +7 -5
- {pragmastat-3.1.23 → pragmastat-3.1.25}/README.md +6 -4
- pragmastat-3.1.25/pragmastat/__init__.py +13 -0
- {pragmastat-3.1.23 → pragmastat-3.1.25}/pragmastat/estimators.py +14 -6
- {pragmastat-3.1.23 → pragmastat-3.1.25}/pragmastat/fast_center.py +34 -16
- {pragmastat-3.1.23 → pragmastat-3.1.25}/pragmastat/fast_spread.py +14 -9
- {pragmastat-3.1.23 → pragmastat-3.1.25/pragmastat.egg-info}/PKG-INFO +7 -5
- {pragmastat-3.1.23 → pragmastat-3.1.25}/pyproject.toml +1 -1
- pragmastat-3.1.25/setup.py +23 -0
- pragmastat-3.1.25/tests/test_invariance.py +111 -0
- {pragmastat-3.1.23 → pragmastat-3.1.25}/tests/test_performance.py +6 -3
- {pragmastat-3.1.23 → pragmastat-3.1.25}/tests/test_reference.py +15 -19
- pragmastat-3.1.23/pragmastat/__init__.py +0 -21
- pragmastat-3.1.23/setup.py +0 -24
- pragmastat-3.1.23/tests/test_invariance.py +0 -152
- {pragmastat-3.1.23 → pragmastat-3.1.25}/LICENSE +0 -0
- {pragmastat-3.1.23 → pragmastat-3.1.25}/MANIFEST.in +0 -0
- {pragmastat-3.1.23 → pragmastat-3.1.25}/examples/demo.py +0 -0
- {pragmastat-3.1.23 → pragmastat-3.1.25}/pragmastat.egg-info/SOURCES.txt +0 -0
- {pragmastat-3.1.23 → pragmastat-3.1.25}/pragmastat.egg-info/dependency_links.txt +0 -0
- {pragmastat-3.1.23 → pragmastat-3.1.25}/pragmastat.egg-info/requires.txt +0 -0
- {pragmastat-3.1.23 → pragmastat-3.1.25}/pragmastat.egg-info/top_level.txt +0 -0
- {pragmastat-3.1.23 → pragmastat-3.1.25}/setup.cfg +0 -0
- {pragmastat-3.1.23 → pragmastat-3.1.25}/src/fast_center_c.c +0 -0
- {pragmastat-3.1.23 → pragmastat-3.1.25}/src/fast_spread_c.c +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pragmastat
|
|
3
|
-
Version: 3.1.
|
|
3
|
+
Version: 3.1.25
|
|
4
4
|
Summary: Pragmastat: Pragmatic Statistical Toolkit
|
|
5
5
|
Author: Andrey Akinshin
|
|
6
6
|
License-Expression: MIT
|
|
@@ -18,15 +18,17 @@ Dynamic: license-file
|
|
|
18
18
|
|
|
19
19
|
This is a Python implementation of 'Pragmastat: Pragmatic Statistical Toolkit', which presents a toolkit of statistical procedures that provide reliable results across diverse real-world distributions, with ready-to-use implementations and detailed explanations.
|
|
20
20
|
|
|
21
|
-
- PDF manual for this version: https://pragmastat.
|
|
22
|
-
-
|
|
21
|
+
- PDF manual for this version: [pragmastat-v3.1.25.pdf](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.25/pragmastat-v3.1.25.pdf)
|
|
22
|
+
- Source code for this version: [pragmastat/python/v3.1.25](https://github.com/AndreyAkinshin/pragmastat/tree/v3.1.25/python)
|
|
23
|
+
- Latest online manual: https://pragmastat.dev
|
|
23
24
|
- Manual DOI: [10.5281/zenodo.17236778](https://doi.org/10.5281/zenodo.17236778)
|
|
24
|
-
- Source code: https://github.com/AndreyAkinshin/pragmastat/tree/v3.1.23/python
|
|
25
25
|
|
|
26
26
|
## Installation
|
|
27
27
|
|
|
28
|
+
Install from PyPI:
|
|
29
|
+
|
|
28
30
|
```bash
|
|
29
|
-
pip install pragmastat
|
|
31
|
+
pip install pragmastat==3.1.25
|
|
30
32
|
```
|
|
31
33
|
|
|
32
34
|
## Demo
|
|
@@ -2,15 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
This is a Python implementation of 'Pragmastat: Pragmatic Statistical Toolkit', which presents a toolkit of statistical procedures that provide reliable results across diverse real-world distributions, with ready-to-use implementations and detailed explanations.
|
|
4
4
|
|
|
5
|
-
- PDF manual for this version: https://pragmastat.
|
|
6
|
-
-
|
|
5
|
+
- PDF manual for this version: [pragmastat-v3.1.25.pdf](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.25/pragmastat-v3.1.25.pdf)
|
|
6
|
+
- Source code for this version: [pragmastat/python/v3.1.25](https://github.com/AndreyAkinshin/pragmastat/tree/v3.1.25/python)
|
|
7
|
+
- Latest online manual: https://pragmastat.dev
|
|
7
8
|
- Manual DOI: [10.5281/zenodo.17236778](https://doi.org/10.5281/zenodo.17236778)
|
|
8
|
-
- Source code: https://github.com/AndreyAkinshin/pragmastat/tree/v3.1.23/python
|
|
9
9
|
|
|
10
10
|
## Installation
|
|
11
11
|
|
|
12
|
+
Install from PyPI:
|
|
13
|
+
|
|
12
14
|
```bash
|
|
13
|
-
pip install pragmastat
|
|
15
|
+
pip install pragmastat==3.1.25
|
|
14
16
|
```
|
|
15
17
|
|
|
16
18
|
## Demo
|
|
@@ -32,7 +32,9 @@ def rel_spread(x: Union[Sequence[float], NDArray]) -> float:
|
|
|
32
32
|
return spread(x) / abs(center_val)
|
|
33
33
|
|
|
34
34
|
|
|
35
|
-
def shift(
|
|
35
|
+
def shift(
|
|
36
|
+
x: Union[Sequence[float], NDArray], y: Union[Sequence[float], NDArray]
|
|
37
|
+
) -> float:
|
|
36
38
|
x = np.asarray(x)
|
|
37
39
|
y = np.asarray(y)
|
|
38
40
|
if len(x) == 0 or len(y) == 0:
|
|
@@ -41,7 +43,9 @@ def shift(x: Union[Sequence[float], NDArray], y: Union[Sequence[float], NDArray]
|
|
|
41
43
|
return float(np.median(pairwise_shifts))
|
|
42
44
|
|
|
43
45
|
|
|
44
|
-
def ratio(
|
|
46
|
+
def ratio(
|
|
47
|
+
x: Union[Sequence[float], NDArray], y: Union[Sequence[float], NDArray]
|
|
48
|
+
) -> float:
|
|
45
49
|
x = np.asarray(x)
|
|
46
50
|
y = np.asarray(y)
|
|
47
51
|
if len(x) == 0 or len(y) == 0:
|
|
@@ -52,7 +56,9 @@ def ratio(x: Union[Sequence[float], NDArray], y: Union[Sequence[float], NDArray]
|
|
|
52
56
|
return float(np.median(pairwise_ratios))
|
|
53
57
|
|
|
54
58
|
|
|
55
|
-
def avg_spread(
|
|
59
|
+
def avg_spread(
|
|
60
|
+
x: Union[Sequence[float], NDArray], y: Union[Sequence[float], NDArray]
|
|
61
|
+
) -> float:
|
|
56
62
|
x = np.asarray(x)
|
|
57
63
|
y = np.asarray(y)
|
|
58
64
|
n = len(x)
|
|
@@ -64,8 +70,10 @@ def avg_spread(x: Union[Sequence[float], NDArray], y: Union[Sequence[float], NDA
|
|
|
64
70
|
return (n * spread_x + m * spread_y) / (n + m)
|
|
65
71
|
|
|
66
72
|
|
|
67
|
-
def disparity(
|
|
73
|
+
def disparity(
|
|
74
|
+
x: Union[Sequence[float], NDArray], y: Union[Sequence[float], NDArray]
|
|
75
|
+
) -> float:
|
|
68
76
|
avg_spread_val = avg_spread(x, y)
|
|
69
77
|
if avg_spread_val == 0:
|
|
70
|
-
return float(
|
|
71
|
-
return shift(x, y) / avg_spread_val
|
|
78
|
+
return float("inf")
|
|
79
|
+
return shift(x, y) / avg_spread_val
|
|
@@ -10,6 +10,7 @@ import numpy as np
|
|
|
10
10
|
# Try to import the C implementation, fall back to pure Python if unavailable
|
|
11
11
|
try:
|
|
12
12
|
from . import _fast_center_c
|
|
13
|
+
|
|
13
14
|
_HAS_C_EXTENSION = True
|
|
14
15
|
except ImportError:
|
|
15
16
|
_HAS_C_EXTENSION = False
|
|
@@ -64,7 +65,10 @@ def _fast_center_python(values: List[float]) -> float:
|
|
|
64
65
|
|
|
65
66
|
for row in range(1, n + 1): # 1-based
|
|
66
67
|
# Move left from current column until we find sums < pivot
|
|
67
|
-
while
|
|
68
|
+
while (
|
|
69
|
+
current_column >= row
|
|
70
|
+
and sorted_values[row - 1] + sorted_values[current_column - 1] >= pivot
|
|
71
|
+
):
|
|
68
72
|
current_column -= 1
|
|
69
73
|
|
|
70
74
|
# Count elements in this row that are < pivot
|
|
@@ -75,8 +79,8 @@ def _fast_center_python(values: List[float]) -> float:
|
|
|
75
79
|
# === CONVERGENCE CHECK ===
|
|
76
80
|
if count_below_pivot == previous_count:
|
|
77
81
|
# No progress - use midrange strategy
|
|
78
|
-
min_active_sum = float(
|
|
79
|
-
max_active_sum = float(
|
|
82
|
+
min_active_sum = float("inf")
|
|
83
|
+
max_active_sum = float("-inf")
|
|
80
84
|
|
|
81
85
|
for i in range(n):
|
|
82
86
|
if left_bounds[i] > right_bounds[i]:
|
|
@@ -99,13 +103,15 @@ def _fast_center_python(values: List[float]) -> float:
|
|
|
99
103
|
continue
|
|
100
104
|
|
|
101
105
|
# === TARGET CHECK ===
|
|
102
|
-
at_target_rank = (
|
|
103
|
-
|
|
106
|
+
at_target_rank = (
|
|
107
|
+
count_below_pivot == median_rank_low
|
|
108
|
+
or count_below_pivot == median_rank_high - 1
|
|
109
|
+
)
|
|
104
110
|
|
|
105
111
|
if at_target_rank:
|
|
106
112
|
# Find boundary values
|
|
107
|
-
largest_below_pivot = float(
|
|
108
|
-
smallest_at_or_above_pivot = float(
|
|
113
|
+
largest_below_pivot = float("-inf")
|
|
114
|
+
smallest_at_or_above_pivot = float("inf")
|
|
109
115
|
|
|
110
116
|
for i in range(n):
|
|
111
117
|
count_in_row = partition_counts[i]
|
|
@@ -121,8 +127,12 @@ def _fast_center_python(values: List[float]) -> float:
|
|
|
121
127
|
# Find smallest sum in this row that's >= pivot
|
|
122
128
|
if count_in_row < total_in_row:
|
|
123
129
|
first_at_or_above_index = i + count_in_row + 1
|
|
124
|
-
first_at_or_above_value =
|
|
125
|
-
|
|
130
|
+
first_at_or_above_value = (
|
|
131
|
+
row_value + sorted_values[first_at_or_above_index - 1]
|
|
132
|
+
)
|
|
133
|
+
smallest_at_or_above_pivot = min(
|
|
134
|
+
smallest_at_or_above_pivot, first_at_or_above_value
|
|
135
|
+
)
|
|
126
136
|
|
|
127
137
|
# Calculate final result
|
|
128
138
|
if median_rank_low < median_rank_high:
|
|
@@ -130,8 +140,10 @@ def _fast_center_python(values: List[float]) -> float:
|
|
|
130
140
|
return (smallest_at_or_above_pivot + largest_below_pivot) / 4
|
|
131
141
|
else:
|
|
132
142
|
# Odd total: return the single middle value
|
|
133
|
-
need_largest =
|
|
134
|
-
return (
|
|
143
|
+
need_largest = count_below_pivot == median_rank_low
|
|
144
|
+
return (
|
|
145
|
+
largest_below_pivot if need_largest else smallest_at_or_above_pivot
|
|
146
|
+
) / 2
|
|
135
147
|
|
|
136
148
|
# === UPDATE BOUNDS ===
|
|
137
149
|
if count_below_pivot < median_rank_low:
|
|
@@ -147,7 +159,9 @@ def _fast_center_python(values: List[float]) -> float:
|
|
|
147
159
|
previous_count = count_below_pivot
|
|
148
160
|
|
|
149
161
|
# Recalculate active set size
|
|
150
|
-
active_set_size = sum(
|
|
162
|
+
active_set_size = sum(
|
|
163
|
+
max(0, right_bounds[i] - left_bounds[i] + 1) for i in range(n)
|
|
164
|
+
)
|
|
151
165
|
|
|
152
166
|
# Choose next pivot
|
|
153
167
|
if active_set_size > 2:
|
|
@@ -164,12 +178,16 @@ def _fast_center_python(values: List[float]) -> float:
|
|
|
164
178
|
cumulative_size += row_size
|
|
165
179
|
|
|
166
180
|
# Use median element of the selected row as pivot
|
|
167
|
-
median_column_in_row = (
|
|
168
|
-
|
|
181
|
+
median_column_in_row = (
|
|
182
|
+
left_bounds[selected_row] + right_bounds[selected_row]
|
|
183
|
+
) // 2
|
|
184
|
+
pivot = (
|
|
185
|
+
sorted_values[selected_row] + sorted_values[median_column_in_row - 1]
|
|
186
|
+
)
|
|
169
187
|
else:
|
|
170
188
|
# Few elements remain - use midrange strategy
|
|
171
|
-
min_remaining_sum = float(
|
|
172
|
-
max_remaining_sum = float(
|
|
189
|
+
min_remaining_sum = float("inf")
|
|
190
|
+
max_remaining_sum = float("-inf")
|
|
173
191
|
|
|
174
192
|
for i in range(n):
|
|
175
193
|
if left_bounds[i] > right_bounds[i]:
|
|
@@ -10,6 +10,7 @@ import numpy as np
|
|
|
10
10
|
# Try to import the C implementation, fall back to pure Python if unavailable
|
|
11
11
|
try:
|
|
12
12
|
from . import _fast_spread_c
|
|
13
|
+
|
|
13
14
|
_HAS_C_EXTENSION = True
|
|
14
15
|
except ImportError:
|
|
15
16
|
_HAS_C_EXTENSION = False
|
|
@@ -63,8 +64,8 @@ def _fast_spread_python(values: List[float]) -> float:
|
|
|
63
64
|
while True:
|
|
64
65
|
# === PARTITION: count how many differences are < pivot ===
|
|
65
66
|
count_below = 0
|
|
66
|
-
largest_below = float(
|
|
67
|
-
smallest_at_or_above = float(
|
|
67
|
+
largest_below = float("-inf")
|
|
68
|
+
smallest_at_or_above = float("inf")
|
|
68
69
|
|
|
69
70
|
j = 1 # global two-pointer (non-decreasing across rows)
|
|
70
71
|
for i in range(n - 1):
|
|
@@ -95,14 +96,14 @@ def _fast_spread_python(values: List[float]) -> float:
|
|
|
95
96
|
return 0.5 * (largest_below + smallest_at_or_above)
|
|
96
97
|
else:
|
|
97
98
|
# Odd N: pick the single middle
|
|
98
|
-
need_largest =
|
|
99
|
+
need_largest = count_below == k_low
|
|
99
100
|
return largest_below if need_largest else smallest_at_or_above
|
|
100
101
|
|
|
101
102
|
# === STALL HANDLING ===
|
|
102
103
|
if count_below == prev_count_below:
|
|
103
104
|
# Compute min/max remaining difference in the ACTIVE set
|
|
104
|
-
min_active = float(
|
|
105
|
-
max_active = float(
|
|
105
|
+
min_active = float("inf")
|
|
106
|
+
max_active = float("-inf")
|
|
106
107
|
active = 0
|
|
107
108
|
|
|
108
109
|
for i in range(n - 1):
|
|
@@ -114,7 +115,7 @@ def _fast_spread_python(values: List[float]) -> float:
|
|
|
114
115
|
row_max = a[Ri] - a[i]
|
|
115
116
|
min_active = min(min_active, row_min)
|
|
116
117
|
max_active = max(max_active, row_max)
|
|
117
|
-
active +=
|
|
118
|
+
active += Ri - Li + 1
|
|
118
119
|
|
|
119
120
|
if active <= 0:
|
|
120
121
|
if k_low < k_high:
|
|
@@ -156,8 +157,8 @@ def _fast_spread_python(values: List[float]) -> float:
|
|
|
156
157
|
|
|
157
158
|
if active_size <= 2:
|
|
158
159
|
# Few candidates left: return midrange of remaining
|
|
159
|
-
min_rem = float(
|
|
160
|
-
max_rem = float(
|
|
160
|
+
min_rem = float("inf")
|
|
161
|
+
max_rem = float("-inf")
|
|
161
162
|
for i in range(n - 1):
|
|
162
163
|
if L[i] > R[i]:
|
|
163
164
|
continue
|
|
@@ -173,7 +174,11 @@ def _fast_spread_python(values: List[float]) -> float:
|
|
|
173
174
|
|
|
174
175
|
if k_low < k_high:
|
|
175
176
|
return 0.5 * (min_rem + max_rem)
|
|
176
|
-
return
|
|
177
|
+
return (
|
|
178
|
+
min_rem
|
|
179
|
+
if abs((k_low - 1) - count_below) <= abs(count_below - k_low)
|
|
180
|
+
else max_rem
|
|
181
|
+
)
|
|
177
182
|
else:
|
|
178
183
|
# Weighted random row selection
|
|
179
184
|
t = random.randint(0, active_size - 1)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pragmastat
|
|
3
|
-
Version: 3.1.
|
|
3
|
+
Version: 3.1.25
|
|
4
4
|
Summary: Pragmastat: Pragmatic Statistical Toolkit
|
|
5
5
|
Author: Andrey Akinshin
|
|
6
6
|
License-Expression: MIT
|
|
@@ -18,15 +18,17 @@ Dynamic: license-file
|
|
|
18
18
|
|
|
19
19
|
This is a Python implementation of 'Pragmastat: Pragmatic Statistical Toolkit', which presents a toolkit of statistical procedures that provide reliable results across diverse real-world distributions, with ready-to-use implementations and detailed explanations.
|
|
20
20
|
|
|
21
|
-
- PDF manual for this version: https://pragmastat.
|
|
22
|
-
-
|
|
21
|
+
- PDF manual for this version: [pragmastat-v3.1.25.pdf](https://github.com/AndreyAkinshin/pragmastat/releases/download/v3.1.25/pragmastat-v3.1.25.pdf)
|
|
22
|
+
- Source code for this version: [pragmastat/python/v3.1.25](https://github.com/AndreyAkinshin/pragmastat/tree/v3.1.25/python)
|
|
23
|
+
- Latest online manual: https://pragmastat.dev
|
|
23
24
|
- Manual DOI: [10.5281/zenodo.17236778](https://doi.org/10.5281/zenodo.17236778)
|
|
24
|
-
- Source code: https://github.com/AndreyAkinshin/pragmastat/tree/v3.1.23/python
|
|
25
25
|
|
|
26
26
|
## Installation
|
|
27
27
|
|
|
28
|
+
Install from PyPI:
|
|
29
|
+
|
|
28
30
|
```bash
|
|
29
|
-
pip install pragmastat
|
|
31
|
+
pip install pragmastat==3.1.25
|
|
30
32
|
```
|
|
31
33
|
|
|
32
34
|
## Demo
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from setuptools import setup, Extension
|
|
2
|
+
import numpy
|
|
3
|
+
|
|
4
|
+
# Define the C extensions
|
|
5
|
+
extensions = [
|
|
6
|
+
Extension(
|
|
7
|
+
"pragmastat._fast_center_c",
|
|
8
|
+
sources=["src/fast_center_c.c"],
|
|
9
|
+
include_dirs=[numpy.get_include()],
|
|
10
|
+
extra_compile_args=["-O3", "-Wall"],
|
|
11
|
+
),
|
|
12
|
+
Extension(
|
|
13
|
+
"pragmastat._fast_spread_c",
|
|
14
|
+
sources=["src/fast_spread_c.c"],
|
|
15
|
+
include_dirs=[numpy.get_include()],
|
|
16
|
+
extra_compile_args=["-O3", "-Wall"],
|
|
17
|
+
),
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
setup(
|
|
21
|
+
ext_modules=extensions,
|
|
22
|
+
package_dir={"": "."},
|
|
23
|
+
)
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from pragmastat import center, spread, rel_spread, shift, ratio, avg_spread, disparity
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class TestInvariance:
|
|
6
|
+
seed = 1729
|
|
7
|
+
sample_sizes = [2, 3, 4, 5, 6, 7, 8, 9, 10]
|
|
8
|
+
tolerance = 1e-9
|
|
9
|
+
|
|
10
|
+
def perform_test_one(self, expr1_func, expr2_func):
|
|
11
|
+
np.random.seed(self.seed)
|
|
12
|
+
for n in self.sample_sizes:
|
|
13
|
+
x = np.random.uniform(0, 1, n)
|
|
14
|
+
result1 = expr1_func(x)
|
|
15
|
+
result2 = expr2_func(x)
|
|
16
|
+
assert (
|
|
17
|
+
abs(result1 - result2) < self.tolerance
|
|
18
|
+
), f"Failed for n={n}: {result1} != {result2}"
|
|
19
|
+
|
|
20
|
+
def perform_test_two(self, expr1_func, expr2_func):
|
|
21
|
+
np.random.seed(self.seed)
|
|
22
|
+
for n in self.sample_sizes:
|
|
23
|
+
x = np.random.uniform(0, 1, n)
|
|
24
|
+
y = np.random.uniform(0, 1, n)
|
|
25
|
+
result1 = expr1_func(x, y)
|
|
26
|
+
result2 = expr2_func(x, y)
|
|
27
|
+
assert (
|
|
28
|
+
abs(result1 - result2) < self.tolerance
|
|
29
|
+
), f"Failed for n={n}: {result1} != {result2}"
|
|
30
|
+
|
|
31
|
+
# Center tests
|
|
32
|
+
def test_center_shift(self):
|
|
33
|
+
self.perform_test_one(lambda x: center(x + 2), lambda x: center(x) + 2)
|
|
34
|
+
|
|
35
|
+
def test_center_scale(self):
|
|
36
|
+
self.perform_test_one(lambda x: center(2 * x), lambda x: 2 * center(x))
|
|
37
|
+
|
|
38
|
+
def test_center_negate(self):
|
|
39
|
+
self.perform_test_one(lambda x: center(-1 * x), lambda x: -1 * center(x))
|
|
40
|
+
|
|
41
|
+
# Spread tests
|
|
42
|
+
def test_spread_shift(self):
|
|
43
|
+
self.perform_test_one(lambda x: spread(x + 2), lambda x: spread(x))
|
|
44
|
+
|
|
45
|
+
def test_spread_scale(self):
|
|
46
|
+
self.perform_test_one(lambda x: spread(2 * x), lambda x: 2 * spread(x))
|
|
47
|
+
|
|
48
|
+
def test_spread_negate(self):
|
|
49
|
+
self.perform_test_one(lambda x: spread(-1 * x), lambda x: spread(x))
|
|
50
|
+
|
|
51
|
+
# RelSpread tests
|
|
52
|
+
def test_rel_spread_scale(self):
|
|
53
|
+
self.perform_test_one(lambda x: rel_spread(2 * x), lambda x: rel_spread(x))
|
|
54
|
+
|
|
55
|
+
# Shift tests
|
|
56
|
+
def test_shift_shift(self):
|
|
57
|
+
self.perform_test_two(
|
|
58
|
+
lambda x, y: shift(x + 3, y + 2), lambda x, y: shift(x, y) + 1
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
def test_shift_scale(self):
|
|
62
|
+
self.perform_test_two(
|
|
63
|
+
lambda x, y: shift(2 * x, 2 * y), lambda x, y: 2 * shift(x, y)
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def test_shift_antisymmetry(self):
|
|
67
|
+
self.perform_test_two(lambda x, y: shift(x, y), lambda x, y: -1 * shift(y, x))
|
|
68
|
+
|
|
69
|
+
# Ratio tests
|
|
70
|
+
def test_ratio_scale(self):
|
|
71
|
+
self.perform_test_two(
|
|
72
|
+
lambda x, y: ratio(2 * x, 3 * y), lambda x, y: (2.0 / 3) * ratio(x, y)
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# AvgSpread tests
|
|
76
|
+
def test_avg_spread_equal(self):
|
|
77
|
+
self.perform_test_one(lambda x: avg_spread(x, x), lambda x: spread(x))
|
|
78
|
+
|
|
79
|
+
def test_avg_spread_symmetry(self):
|
|
80
|
+
self.perform_test_two(
|
|
81
|
+
lambda x, y: avg_spread(x, y), lambda x, y: avg_spread(y, x)
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
def test_avg_spread_average(self):
|
|
85
|
+
self.perform_test_one(lambda x: avg_spread(x, 5 * x), lambda x: 3 * spread(x))
|
|
86
|
+
|
|
87
|
+
def test_avg_spread_scale(self):
|
|
88
|
+
self.perform_test_two(
|
|
89
|
+
lambda x, y: avg_spread(-2 * x, -2 * y), lambda x, y: 2 * avg_spread(x, y)
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Disparity tests
|
|
93
|
+
def test_disparity_shift(self):
|
|
94
|
+
self.perform_test_two(
|
|
95
|
+
lambda x, y: disparity(x + 2, y + 2), lambda x, y: disparity(x, y)
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
def test_disparity_scale(self):
|
|
99
|
+
self.perform_test_two(
|
|
100
|
+
lambda x, y: disparity(2 * x, 2 * y), lambda x, y: disparity(x, y)
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
def test_disparity_scale_neg(self):
|
|
104
|
+
self.perform_test_two(
|
|
105
|
+
lambda x, y: disparity(-2 * x, -2 * y), lambda x, y: -1 * disparity(x, y)
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
def test_disparity_antisymmetry(self):
|
|
109
|
+
self.perform_test_two(
|
|
110
|
+
lambda x, y: disparity(x, y), lambda x, y: -1 * disparity(y, x)
|
|
111
|
+
)
|
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
import time
|
|
4
4
|
import numpy as np
|
|
5
|
-
from pragmastat.estimators import center, spread
|
|
6
5
|
from pragmastat.fast_center import _fast_center
|
|
7
6
|
from pragmastat.fast_spread import _fast_spread
|
|
8
7
|
|
|
@@ -37,7 +36,9 @@ def test_center_correctness():
|
|
|
37
36
|
x = np.random.randn(n).tolist()
|
|
38
37
|
expected = center_simple(x)
|
|
39
38
|
actual = _fast_center(x)
|
|
40
|
-
assert
|
|
39
|
+
assert (
|
|
40
|
+
abs(expected - actual) < 1e-9
|
|
41
|
+
), f"Mismatch for n={n}: expected={expected}, actual={actual}"
|
|
41
42
|
|
|
42
43
|
|
|
43
44
|
def test_spread_correctness():
|
|
@@ -48,7 +49,9 @@ def test_spread_correctness():
|
|
|
48
49
|
x = np.random.randn(n).tolist()
|
|
49
50
|
expected = spread_simple(x)
|
|
50
51
|
actual = _fast_spread(x)
|
|
51
|
-
assert
|
|
52
|
+
assert (
|
|
53
|
+
abs(expected - actual) < 1e-9
|
|
54
|
+
), f"Mismatch for n={n}: expected={expected}, actual={actual}"
|
|
52
55
|
|
|
53
56
|
|
|
54
57
|
def test_center_performance():
|
|
@@ -1,49 +1,45 @@
|
|
|
1
1
|
import json
|
|
2
|
-
import os
|
|
3
|
-
import pytest
|
|
4
2
|
from pathlib import Path
|
|
5
|
-
from pragmastat import
|
|
6
|
-
center, spread, rel_spread,
|
|
7
|
-
shift, ratio, avg_spread, disparity
|
|
8
|
-
)
|
|
3
|
+
from pragmastat import center, spread, rel_spread, shift, ratio, avg_spread, disparity
|
|
9
4
|
|
|
10
5
|
|
|
11
6
|
def find_repo_root():
|
|
12
|
-
"""Find the repository root by looking for
|
|
7
|
+
"""Find the repository root by looking for CITATION.cff file."""
|
|
13
8
|
current_dir = Path(__file__).parent
|
|
14
9
|
while current_dir != current_dir.parent:
|
|
15
|
-
if (current_dir / "
|
|
10
|
+
if (current_dir / "CITATION.cff").exists():
|
|
16
11
|
return current_dir
|
|
17
12
|
current_dir = current_dir.parent
|
|
18
|
-
raise RuntimeError("Could not find repository root (
|
|
13
|
+
raise RuntimeError("Could not find repository root (CITATION.cff not found)")
|
|
19
14
|
|
|
20
15
|
|
|
21
16
|
def run_reference_tests(estimator_name, estimator_func, is_two_sample=False):
|
|
22
17
|
"""Run reference tests against JSON data files."""
|
|
23
18
|
repo_root = find_repo_root()
|
|
24
19
|
test_data_dir = repo_root / "tests" / estimator_name
|
|
25
|
-
|
|
20
|
+
|
|
26
21
|
json_files = list(test_data_dir.glob("*.json"))
|
|
27
22
|
assert len(json_files) > 0, f"No JSON test files found in {test_data_dir}"
|
|
28
|
-
|
|
23
|
+
|
|
29
24
|
for json_file in json_files:
|
|
30
|
-
with open(json_file,
|
|
25
|
+
with open(json_file, "r") as f:
|
|
31
26
|
test_case = json.load(f)
|
|
32
|
-
|
|
27
|
+
|
|
33
28
|
if is_two_sample:
|
|
34
29
|
input_x = test_case["input"]["x"]
|
|
35
30
|
input_y = test_case["input"]["y"]
|
|
36
31
|
expected_output = test_case["output"]
|
|
37
|
-
|
|
32
|
+
|
|
38
33
|
actual_output = estimator_func(input_x, input_y)
|
|
39
34
|
else:
|
|
40
35
|
input_x = test_case["input"]["x"]
|
|
41
36
|
expected_output = test_case["output"]
|
|
42
|
-
|
|
37
|
+
|
|
43
38
|
actual_output = estimator_func(input_x)
|
|
44
|
-
|
|
45
|
-
assert
|
|
46
|
-
|
|
39
|
+
|
|
40
|
+
assert (
|
|
41
|
+
abs(actual_output - expected_output) < 1e-10
|
|
42
|
+
), f"Failed for test file: {json_file.name}, expected: {expected_output}, got: {actual_output}"
|
|
47
43
|
|
|
48
44
|
|
|
49
45
|
class TestReference:
|
|
@@ -67,4 +63,4 @@ class TestReference:
|
|
|
67
63
|
run_reference_tests("avg-spread", avg_spread, is_two_sample=True)
|
|
68
64
|
|
|
69
65
|
def test_disparity_reference(self):
|
|
70
|
-
run_reference_tests("disparity", disparity, is_two_sample=True)
|
|
66
|
+
run_reference_tests("disparity", disparity, is_two_sample=True)
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
from .estimators import (
|
|
2
|
-
center,
|
|
3
|
-
spread,
|
|
4
|
-
rel_spread,
|
|
5
|
-
shift,
|
|
6
|
-
ratio,
|
|
7
|
-
avg_spread,
|
|
8
|
-
disparity
|
|
9
|
-
)
|
|
10
|
-
|
|
11
|
-
__all__ = [
|
|
12
|
-
'center',
|
|
13
|
-
'spread',
|
|
14
|
-
'rel_spread',
|
|
15
|
-
'shift',
|
|
16
|
-
'ratio',
|
|
17
|
-
'avg_spread',
|
|
18
|
-
'disparity'
|
|
19
|
-
]
|
|
20
|
-
|
|
21
|
-
__version__ = '3.1.23'
|
pragmastat-3.1.23/setup.py
DELETED
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
from setuptools import setup, Extension
|
|
2
|
-
import numpy
|
|
3
|
-
import os
|
|
4
|
-
|
|
5
|
-
# Define the C extensions
|
|
6
|
-
extensions = [
|
|
7
|
-
Extension(
|
|
8
|
-
'pragmastat._fast_center_c',
|
|
9
|
-
sources=['src/fast_center_c.c'],
|
|
10
|
-
include_dirs=[numpy.get_include()],
|
|
11
|
-
extra_compile_args=['-O3', '-Wall'],
|
|
12
|
-
),
|
|
13
|
-
Extension(
|
|
14
|
-
'pragmastat._fast_spread_c',
|
|
15
|
-
sources=['src/fast_spread_c.c'],
|
|
16
|
-
include_dirs=[numpy.get_include()],
|
|
17
|
-
extra_compile_args=['-O3', '-Wall'],
|
|
18
|
-
),
|
|
19
|
-
]
|
|
20
|
-
|
|
21
|
-
setup(
|
|
22
|
-
ext_modules=extensions,
|
|
23
|
-
package_dir={'': '.'},
|
|
24
|
-
)
|
|
@@ -1,152 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import pytest
|
|
3
|
-
from pragmastat import (
|
|
4
|
-
center, spread, rel_spread,
|
|
5
|
-
shift, ratio, avg_spread, disparity
|
|
6
|
-
)
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class TestInvariance:
|
|
10
|
-
seed = 1729
|
|
11
|
-
sample_sizes = [2, 3, 4, 5, 6, 7, 8, 9, 10]
|
|
12
|
-
tolerance = 1e-9
|
|
13
|
-
|
|
14
|
-
def perform_test_one(self, expr1_func, expr2_func):
|
|
15
|
-
np.random.seed(self.seed)
|
|
16
|
-
for n in self.sample_sizes:
|
|
17
|
-
x = np.random.uniform(0, 1, n)
|
|
18
|
-
result1 = expr1_func(x)
|
|
19
|
-
result2 = expr2_func(x)
|
|
20
|
-
assert abs(result1 - result2) < self.tolerance, \
|
|
21
|
-
f"Failed for n={n}: {result1} != {result2}"
|
|
22
|
-
|
|
23
|
-
def perform_test_two(self, expr1_func, expr2_func):
|
|
24
|
-
np.random.seed(self.seed)
|
|
25
|
-
for n in self.sample_sizes:
|
|
26
|
-
x = np.random.uniform(0, 1, n)
|
|
27
|
-
y = np.random.uniform(0, 1, n)
|
|
28
|
-
result1 = expr1_func(x, y)
|
|
29
|
-
result2 = expr2_func(x, y)
|
|
30
|
-
assert abs(result1 - result2) < self.tolerance, \
|
|
31
|
-
f"Failed for n={n}: {result1} != {result2}"
|
|
32
|
-
|
|
33
|
-
# Center tests
|
|
34
|
-
def test_center_shift(self):
|
|
35
|
-
self.perform_test_one(
|
|
36
|
-
lambda x: center(x + 2),
|
|
37
|
-
lambda x: center(x) + 2
|
|
38
|
-
)
|
|
39
|
-
|
|
40
|
-
def test_center_scale(self):
|
|
41
|
-
self.perform_test_one(
|
|
42
|
-
lambda x: center(2 * x),
|
|
43
|
-
lambda x: 2 * center(x)
|
|
44
|
-
)
|
|
45
|
-
|
|
46
|
-
def test_center_negate(self):
|
|
47
|
-
self.perform_test_one(
|
|
48
|
-
lambda x: center(-1 * x),
|
|
49
|
-
lambda x: -1 * center(x)
|
|
50
|
-
)
|
|
51
|
-
|
|
52
|
-
# Spread tests
|
|
53
|
-
def test_spread_shift(self):
|
|
54
|
-
self.perform_test_one(
|
|
55
|
-
lambda x: spread(x + 2),
|
|
56
|
-
lambda x: spread(x)
|
|
57
|
-
)
|
|
58
|
-
|
|
59
|
-
def test_spread_scale(self):
|
|
60
|
-
self.perform_test_one(
|
|
61
|
-
lambda x: spread(2 * x),
|
|
62
|
-
lambda x: 2 * spread(x)
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
def test_spread_negate(self):
|
|
66
|
-
self.perform_test_one(
|
|
67
|
-
lambda x: spread(-1 * x),
|
|
68
|
-
lambda x: spread(x)
|
|
69
|
-
)
|
|
70
|
-
|
|
71
|
-
# RelSpread tests
|
|
72
|
-
def test_rel_spread_scale(self):
|
|
73
|
-
self.perform_test_one(
|
|
74
|
-
lambda x: rel_spread(2 * x),
|
|
75
|
-
lambda x: rel_spread(x)
|
|
76
|
-
)
|
|
77
|
-
|
|
78
|
-
# Shift tests
|
|
79
|
-
def test_shift_shift(self):
|
|
80
|
-
self.perform_test_two(
|
|
81
|
-
lambda x, y: shift(x + 3, y + 2),
|
|
82
|
-
lambda x, y: shift(x, y) + 1
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
def test_shift_scale(self):
|
|
86
|
-
self.perform_test_two(
|
|
87
|
-
lambda x, y: shift(2 * x, 2 * y),
|
|
88
|
-
lambda x, y: 2 * shift(x, y)
|
|
89
|
-
)
|
|
90
|
-
|
|
91
|
-
def test_shift_antisymmetry(self):
|
|
92
|
-
self.perform_test_two(
|
|
93
|
-
lambda x, y: shift(x, y),
|
|
94
|
-
lambda x, y: -1 * shift(y, x)
|
|
95
|
-
)
|
|
96
|
-
|
|
97
|
-
# Ratio tests
|
|
98
|
-
def test_ratio_scale(self):
|
|
99
|
-
self.perform_test_two(
|
|
100
|
-
lambda x, y: ratio(2 * x, 3 * y),
|
|
101
|
-
lambda x, y: (2.0 / 3) * ratio(x, y)
|
|
102
|
-
)
|
|
103
|
-
|
|
104
|
-
# AvgSpread tests
|
|
105
|
-
def test_avg_spread_equal(self):
|
|
106
|
-
self.perform_test_one(
|
|
107
|
-
lambda x: avg_spread(x, x),
|
|
108
|
-
lambda x: spread(x)
|
|
109
|
-
)
|
|
110
|
-
|
|
111
|
-
def test_avg_spread_symmetry(self):
|
|
112
|
-
self.perform_test_two(
|
|
113
|
-
lambda x, y: avg_spread(x, y),
|
|
114
|
-
lambda x, y: avg_spread(y, x)
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
def test_avg_spread_average(self):
|
|
118
|
-
self.perform_test_one(
|
|
119
|
-
lambda x: avg_spread(x, 5 * x),
|
|
120
|
-
lambda x: 3 * spread(x)
|
|
121
|
-
)
|
|
122
|
-
|
|
123
|
-
def test_avg_spread_scale(self):
|
|
124
|
-
self.perform_test_two(
|
|
125
|
-
lambda x, y: avg_spread(-2 * x, -2 * y),
|
|
126
|
-
lambda x, y: 2 * avg_spread(x, y)
|
|
127
|
-
)
|
|
128
|
-
|
|
129
|
-
# Disparity tests
|
|
130
|
-
def test_disparity_shift(self):
|
|
131
|
-
self.perform_test_two(
|
|
132
|
-
lambda x, y: disparity(x + 2, y + 2),
|
|
133
|
-
lambda x, y: disparity(x, y)
|
|
134
|
-
)
|
|
135
|
-
|
|
136
|
-
def test_disparity_scale(self):
|
|
137
|
-
self.perform_test_two(
|
|
138
|
-
lambda x, y: disparity(2 * x, 2 * y),
|
|
139
|
-
lambda x, y: disparity(x, y)
|
|
140
|
-
)
|
|
141
|
-
|
|
142
|
-
def test_disparity_scale_neg(self):
|
|
143
|
-
self.perform_test_two(
|
|
144
|
-
lambda x, y: disparity(-2 * x, -2 * y),
|
|
145
|
-
lambda x, y: -1 * disparity(x, y)
|
|
146
|
-
)
|
|
147
|
-
|
|
148
|
-
def test_disparity_antisymmetry(self):
|
|
149
|
-
self.perform_test_two(
|
|
150
|
-
lambda x, y: disparity(x, y),
|
|
151
|
-
lambda x, y: -1 * disparity(y, x)
|
|
152
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|