scitex 2.15.1__py3-none-any.whl → 2.15.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/__init__.py +68 -61
- scitex/_mcp_tools/introspect.py +42 -23
- scitex/_mcp_tools/template.py +24 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +30 -1550
- scitex/ai/classification/timeseries/_sliding_window_core.py +467 -0
- scitex/ai/classification/timeseries/_sliding_window_plotting.py +369 -0
- scitex/audio/__init__.py +2 -2
- scitex/audio/_tts.py +18 -10
- scitex/audio/engines/base.py +17 -10
- scitex/audio/engines/elevenlabs_engine.py +1 -1
- scitex/canvas/editor/flask_editor/_core/__init__.py +27 -0
- scitex/canvas/editor/flask_editor/_core/_bbox_extraction.py +200 -0
- scitex/canvas/editor/flask_editor/_core/_editor.py +173 -0
- scitex/canvas/editor/flask_editor/_core/_export_helpers.py +353 -0
- scitex/canvas/editor/flask_editor/_core/_routes_basic.py +190 -0
- scitex/canvas/editor/flask_editor/_core/_routes_export.py +332 -0
- scitex/canvas/editor/flask_editor/_core/_routes_panels.py +252 -0
- scitex/canvas/editor/flask_editor/_core/_routes_save.py +218 -0
- scitex/canvas/editor/flask_editor/_core.py +25 -1684
- scitex/cli/introspect.py +112 -74
- scitex/cli/main.py +2 -0
- scitex/cli/plt.py +357 -0
- scitex/cli/repro.py +15 -8
- scitex/cli/resource.py +15 -8
- scitex/cli/scholar/__init__.py +15 -8
- scitex/cli/social.py +6 -6
- scitex/cli/stats.py +15 -8
- scitex/cli/template.py +129 -12
- scitex/cli/tex.py +15 -8
- scitex/cli/writer.py +15 -8
- scitex/cloud/__init__.py +41 -2
- scitex/config/_env_registry.py +84 -19
- scitex/context/__init__.py +22 -0
- scitex/dev/__init__.py +20 -1
- scitex/gen/__init__.py +50 -14
- scitex/gen/_list_packages.py +4 -4
- scitex/introspect/__init__.py +16 -9
- scitex/introspect/_core.py +7 -8
- scitex/{gen/_inspect_module.py → introspect/_list_api.py} +43 -54
- scitex/introspect/_mcp/__init__.py +10 -6
- scitex/introspect/_mcp/handlers.py +37 -12
- scitex/introspect/_members.py +7 -3
- scitex/introspect/_signature.py +3 -3
- scitex/introspect/_source.py +2 -2
- scitex/io/_save.py +1 -2
- scitex/logging/_formatters.py +19 -9
- scitex/mcp_server.py +1 -1
- scitex/os/__init__.py +4 -0
- scitex/{gen → os}/_check_host.py +4 -5
- scitex/plt/__init__.py +11 -14
- scitex/session/__init__.py +26 -7
- scitex/session/_decorator.py +1 -1
- scitex/sh/__init__.py +7 -4
- scitex/social/__init__.py +10 -8
- scitex/stats/_mcp/_handlers/__init__.py +31 -0
- scitex/stats/_mcp/_handlers/_corrections.py +113 -0
- scitex/stats/_mcp/_handlers/_descriptive.py +78 -0
- scitex/stats/_mcp/_handlers/_effect_size.py +106 -0
- scitex/stats/_mcp/_handlers/_format.py +94 -0
- scitex/stats/_mcp/_handlers/_normality.py +110 -0
- scitex/stats/_mcp/_handlers/_posthoc.py +224 -0
- scitex/stats/_mcp/_handlers/_power.py +247 -0
- scitex/stats/_mcp/_handlers/_recommend.py +102 -0
- scitex/stats/_mcp/_handlers/_run_test.py +279 -0
- scitex/stats/_mcp/_handlers/_stars.py +48 -0
- scitex/stats/_mcp/handlers.py +19 -1171
- scitex/stats/auto/_stat_style.py +175 -0
- scitex/stats/auto/_style_definitions.py +411 -0
- scitex/stats/auto/_styles.py +22 -620
- scitex/stats/descriptive/__init__.py +11 -8
- scitex/stats/descriptive/_ci.py +39 -0
- scitex/stats/power/_power.py +15 -4
- scitex/str/__init__.py +2 -1
- scitex/str/_title_case.py +63 -0
- scitex/template/__init__.py +25 -10
- scitex/template/_code_templates.py +147 -0
- scitex/template/_mcp/handlers.py +81 -0
- scitex/template/_mcp/tool_schemas.py +55 -0
- scitex/template/_templates/__init__.py +51 -0
- scitex/template/_templates/audio.py +233 -0
- scitex/template/_templates/canvas.py +312 -0
- scitex/template/_templates/capture.py +268 -0
- scitex/template/_templates/config.py +43 -0
- scitex/template/_templates/diagram.py +294 -0
- scitex/template/_templates/io.py +107 -0
- scitex/template/_templates/module.py +53 -0
- scitex/template/_templates/plt.py +202 -0
- scitex/template/_templates/scholar.py +267 -0
- scitex/template/_templates/session.py +130 -0
- scitex/template/_templates/session_minimal.py +43 -0
- scitex/template/_templates/session_plot.py +67 -0
- scitex/template/_templates/session_stats.py +77 -0
- scitex/template/_templates/stats.py +323 -0
- scitex/template/_templates/writer.py +296 -0
- scitex/ui/_backends/_email.py +10 -2
- scitex/ui/_backends/_webhook.py +5 -1
- scitex/web/_search_pubmed.py +10 -6
- {scitex-2.15.1.dist-info → scitex-2.15.2.dist-info}/METADATA +1 -1
- {scitex-2.15.1.dist-info → scitex-2.15.2.dist-info}/RECORD +105 -64
- scitex/gen/_ci.py +0 -12
- scitex/gen/_title_case.py +0 -89
- /scitex/{gen → context}/_detect_environment.py +0 -0
- /scitex/{gen → context}/_get_notebook_path.py +0 -0
- /scitex/{gen/_shell.py → sh/_shell_legacy.py} +0 -0
- {scitex-2.15.1.dist-info → scitex-2.15.2.dist-info}/WHEEL +0 -0
- {scitex-2.15.1.dist-info → scitex-2.15.2.dist-info}/entry_points.txt +0 -0
- {scitex-2.15.1.dist-info → scitex-2.15.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Timestamp: 2026-01-25
|
|
3
|
+
# File: src/scitex/stats/_mcp/_handlers/_format.py
|
|
4
|
+
|
|
5
|
+
"""Results formatting handler."""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
|
|
12
|
+
__all__ = ["format_results_handler"]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
async def format_results_handler(
|
|
16
|
+
test_name: str,
|
|
17
|
+
statistic: float,
|
|
18
|
+
p_value: float,
|
|
19
|
+
df: float | None = None,
|
|
20
|
+
effect_size: float | None = None,
|
|
21
|
+
effect_size_name: str | None = None,
|
|
22
|
+
style: str = "apa",
|
|
23
|
+
ci_lower: float | None = None,
|
|
24
|
+
ci_upper: float | None = None,
|
|
25
|
+
) -> dict:
|
|
26
|
+
"""Format statistical results in journal style."""
|
|
27
|
+
try:
|
|
28
|
+
loop = asyncio.get_event_loop()
|
|
29
|
+
|
|
30
|
+
def do_format():
|
|
31
|
+
from scitex.stats.auto import format_test_line, p_to_stars
|
|
32
|
+
from scitex.stats.auto._formatting import EffectResultDict, TestResultDict
|
|
33
|
+
|
|
34
|
+
# Build test result dict
|
|
35
|
+
test_result: TestResultDict = {
|
|
36
|
+
"test_name": test_name,
|
|
37
|
+
"stat": statistic,
|
|
38
|
+
"p_raw": p_value,
|
|
39
|
+
}
|
|
40
|
+
if df is not None:
|
|
41
|
+
test_result["df"] = df
|
|
42
|
+
|
|
43
|
+
# Build effect result if provided
|
|
44
|
+
effects = None
|
|
45
|
+
if effect_size is not None:
|
|
46
|
+
effects = [
|
|
47
|
+
EffectResultDict(
|
|
48
|
+
name=effect_size_name or "d",
|
|
49
|
+
label=effect_size_name or "Cohen's d",
|
|
50
|
+
value=effect_size,
|
|
51
|
+
ci_lower=ci_lower,
|
|
52
|
+
ci_upper=ci_upper,
|
|
53
|
+
)
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
# Map style names
|
|
57
|
+
style_map = {
|
|
58
|
+
"apa": "apa_latex",
|
|
59
|
+
"nature": "nature",
|
|
60
|
+
"science": "science",
|
|
61
|
+
"brief": "brief",
|
|
62
|
+
}
|
|
63
|
+
style_id = style_map.get(style, "apa_latex")
|
|
64
|
+
|
|
65
|
+
# Format the line
|
|
66
|
+
formatted = format_test_line(
|
|
67
|
+
test_result,
|
|
68
|
+
effects=effects,
|
|
69
|
+
style=style_id,
|
|
70
|
+
include_n=False,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Get stars representation
|
|
74
|
+
stars = p_to_stars(p_value)
|
|
75
|
+
|
|
76
|
+
return {
|
|
77
|
+
"formatted": formatted,
|
|
78
|
+
"stars": stars,
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
result = await loop.run_in_executor(None, do_format)
|
|
82
|
+
|
|
83
|
+
return {
|
|
84
|
+
"success": True,
|
|
85
|
+
"style": style,
|
|
86
|
+
**result,
|
|
87
|
+
"timestamp": datetime.now().isoformat(),
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
except Exception as e:
|
|
91
|
+
return {"success": False, "error": str(e)}
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# EOF
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Timestamp: 2026-01-25
|
|
3
|
+
# File: src/scitex/stats/_mcp/_handlers/_normality.py
|
|
4
|
+
|
|
5
|
+
"""Normality test handler."""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
|
|
14
|
+
__all__ = ["normality_test_handler"]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
async def normality_test_handler(
|
|
18
|
+
data: list[float],
|
|
19
|
+
method: str = "shapiro",
|
|
20
|
+
) -> dict:
|
|
21
|
+
"""Test whether data follows a normal distribution."""
|
|
22
|
+
try:
|
|
23
|
+
from scipy import stats as scipy_stats
|
|
24
|
+
|
|
25
|
+
loop = asyncio.get_event_loop()
|
|
26
|
+
|
|
27
|
+
def do_normality():
|
|
28
|
+
arr = np.array(data, dtype=float)
|
|
29
|
+
arr = arr[~np.isnan(arr)]
|
|
30
|
+
|
|
31
|
+
if len(arr) < 3:
|
|
32
|
+
return {"error": "Need at least 3 data points"}
|
|
33
|
+
|
|
34
|
+
result = {}
|
|
35
|
+
|
|
36
|
+
if method == "shapiro":
|
|
37
|
+
stat, p_value = scipy_stats.shapiro(arr)
|
|
38
|
+
result = {
|
|
39
|
+
"test": "Shapiro-Wilk",
|
|
40
|
+
"statistic": float(stat),
|
|
41
|
+
"statistic_name": "W",
|
|
42
|
+
"p_value": float(p_value),
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
elif method == "dagostino":
|
|
46
|
+
if len(arr) < 8:
|
|
47
|
+
return {"error": "D'Agostino test requires at least 8 samples"}
|
|
48
|
+
stat, p_value = scipy_stats.normaltest(arr)
|
|
49
|
+
result = {
|
|
50
|
+
"test": "D'Agostino-Pearson",
|
|
51
|
+
"statistic": float(stat),
|
|
52
|
+
"statistic_name": "K2",
|
|
53
|
+
"p_value": float(p_value),
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
elif method == "anderson":
|
|
57
|
+
res = scipy_stats.anderson(arr, dist="norm")
|
|
58
|
+
# Use 5% significance level
|
|
59
|
+
idx = 2 # Index for 5% level
|
|
60
|
+
result = {
|
|
61
|
+
"test": "Anderson-Darling",
|
|
62
|
+
"statistic": float(res.statistic),
|
|
63
|
+
"statistic_name": "A2",
|
|
64
|
+
"critical_value_5pct": float(res.critical_values[idx]),
|
|
65
|
+
"normal": bool(res.statistic < res.critical_values[idx]),
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
elif method == "lilliefors":
|
|
69
|
+
try:
|
|
70
|
+
from statsmodels.stats.diagnostic import lilliefors
|
|
71
|
+
|
|
72
|
+
stat, p_value = lilliefors(arr, dist="norm")
|
|
73
|
+
result = {
|
|
74
|
+
"test": "Lilliefors",
|
|
75
|
+
"statistic": float(stat),
|
|
76
|
+
"statistic_name": "D",
|
|
77
|
+
"p_value": float(p_value),
|
|
78
|
+
}
|
|
79
|
+
except ImportError:
|
|
80
|
+
return {"error": "statsmodels required for Lilliefors test"}
|
|
81
|
+
|
|
82
|
+
else:
|
|
83
|
+
raise ValueError(f"Unknown method: {method}")
|
|
84
|
+
|
|
85
|
+
# Add interpretation
|
|
86
|
+
if "p_value" in result:
|
|
87
|
+
result["is_normal"] = result["p_value"] >= 0.05
|
|
88
|
+
result["interpretation"] = (
|
|
89
|
+
"Data appears normally distributed (p >= 0.05)"
|
|
90
|
+
if result["is_normal"]
|
|
91
|
+
else "Data deviates from normal distribution (p < 0.05)"
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
return result
|
|
95
|
+
|
|
96
|
+
result = await loop.run_in_executor(None, do_normality)
|
|
97
|
+
|
|
98
|
+
return {
|
|
99
|
+
"success": True,
|
|
100
|
+
"method": method,
|
|
101
|
+
"n": len(data),
|
|
102
|
+
**result,
|
|
103
|
+
"timestamp": datetime.now().isoformat(),
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
except Exception as e:
|
|
107
|
+
return {"success": False, "error": str(e)}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# EOF
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Timestamp: 2026-01-25
|
|
3
|
+
# File: src/scitex/stats/_mcp/_handlers/_posthoc.py
|
|
4
|
+
|
|
5
|
+
"""Post-hoc test handler."""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
|
|
14
|
+
__all__ = ["posthoc_test_handler"]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
async def posthoc_test_handler(
|
|
18
|
+
groups: list[list[float]],
|
|
19
|
+
group_names: list[str] | None = None,
|
|
20
|
+
method: str = "tukey",
|
|
21
|
+
control_group: int = 0,
|
|
22
|
+
) -> dict:
|
|
23
|
+
"""Run post-hoc pairwise comparisons."""
|
|
24
|
+
try:
|
|
25
|
+
loop = asyncio.get_event_loop()
|
|
26
|
+
|
|
27
|
+
def do_posthoc():
|
|
28
|
+
group_arrays = [np.array(g, dtype=float) for g in groups]
|
|
29
|
+
names = group_names or [f"Group_{i + 1}" for i in range(len(groups))]
|
|
30
|
+
|
|
31
|
+
if method == "tukey":
|
|
32
|
+
comparisons = _tukey_hsd(group_arrays, names)
|
|
33
|
+
elif method == "dunnett":
|
|
34
|
+
comparisons = _dunnett(group_arrays, names, control_group)
|
|
35
|
+
elif method == "games_howell":
|
|
36
|
+
comparisons = _games_howell(group_arrays, names)
|
|
37
|
+
elif method == "dunn":
|
|
38
|
+
comparisons = _dunn(group_arrays, names)
|
|
39
|
+
else:
|
|
40
|
+
raise ValueError(f"Unknown method: {method}")
|
|
41
|
+
|
|
42
|
+
return comparisons
|
|
43
|
+
|
|
44
|
+
comparisons = await loop.run_in_executor(None, do_posthoc)
|
|
45
|
+
|
|
46
|
+
return {
|
|
47
|
+
"success": True,
|
|
48
|
+
"method": method,
|
|
49
|
+
"n_groups": len(groups),
|
|
50
|
+
"n_comparisons": len(comparisons),
|
|
51
|
+
"comparisons": comparisons,
|
|
52
|
+
"timestamp": datetime.now().isoformat(),
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
except Exception as e:
|
|
56
|
+
return {"success": False, "error": str(e)}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _tukey_hsd(group_arrays, names):
|
|
60
|
+
"""Tukey HSD test."""
|
|
61
|
+
from scipy import stats as scipy_stats
|
|
62
|
+
|
|
63
|
+
all_data = np.concatenate(group_arrays)
|
|
64
|
+
group_labels = np.concatenate(
|
|
65
|
+
[[names[i]] * len(g) for i, g in enumerate(group_arrays)]
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
comparisons = []
|
|
69
|
+
|
|
70
|
+
try:
|
|
71
|
+
from statsmodels.stats.multicomp import pairwise_tukeyhsd
|
|
72
|
+
|
|
73
|
+
tukey = pairwise_tukeyhsd(all_data, group_labels)
|
|
74
|
+
|
|
75
|
+
for i in range(len(tukey.summary().data) - 1):
|
|
76
|
+
row = tukey.summary().data[i + 1]
|
|
77
|
+
comparisons.append(
|
|
78
|
+
{
|
|
79
|
+
"group1": str(row[0]),
|
|
80
|
+
"group2": str(row[1]),
|
|
81
|
+
"mean_diff": float(row[2]),
|
|
82
|
+
"p_adj": float(row[3]),
|
|
83
|
+
"ci_lower": float(row[4]),
|
|
84
|
+
"ci_upper": float(row[5]),
|
|
85
|
+
"reject": bool(row[6]),
|
|
86
|
+
}
|
|
87
|
+
)
|
|
88
|
+
except ImportError:
|
|
89
|
+
# Fallback: Bonferroni-corrected t-tests
|
|
90
|
+
n_comparisons = len(group_arrays) * (len(group_arrays) - 1) // 2
|
|
91
|
+
for i in range(len(group_arrays)):
|
|
92
|
+
for j in range(i + 1, len(group_arrays)):
|
|
93
|
+
stat, p = scipy_stats.ttest_ind(group_arrays[i], group_arrays[j])
|
|
94
|
+
p_adj = min(p * n_comparisons, 1.0)
|
|
95
|
+
comparisons.append(
|
|
96
|
+
{
|
|
97
|
+
"group1": names[i],
|
|
98
|
+
"group2": names[j],
|
|
99
|
+
"mean_diff": float(
|
|
100
|
+
np.mean(group_arrays[i]) - np.mean(group_arrays[j])
|
|
101
|
+
),
|
|
102
|
+
"t_statistic": float(stat),
|
|
103
|
+
"p_value": float(p),
|
|
104
|
+
"p_adj": float(p_adj),
|
|
105
|
+
"reject": p_adj < 0.05,
|
|
106
|
+
}
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
return comparisons
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _dunnett(group_arrays, names, control_group):
|
|
113
|
+
"""Dunnett's test (compare all to control)."""
|
|
114
|
+
from scipy import stats as scipy_stats
|
|
115
|
+
|
|
116
|
+
control = group_arrays[control_group]
|
|
117
|
+
n_comparisons = len(group_arrays) - 1
|
|
118
|
+
|
|
119
|
+
comparisons = []
|
|
120
|
+
for i, (name, group) in enumerate(zip(names, group_arrays)):
|
|
121
|
+
if i == control_group:
|
|
122
|
+
continue
|
|
123
|
+
stat, p = scipy_stats.ttest_ind(group, control)
|
|
124
|
+
p_adj = min(p * n_comparisons, 1.0)
|
|
125
|
+
comparisons.append(
|
|
126
|
+
{
|
|
127
|
+
"group": name,
|
|
128
|
+
"vs_control": names[control_group],
|
|
129
|
+
"mean_diff": float(np.mean(group) - np.mean(control)),
|
|
130
|
+
"t_statistic": float(stat),
|
|
131
|
+
"p_value": float(p),
|
|
132
|
+
"p_adj": float(p_adj),
|
|
133
|
+
"reject": p_adj < 0.05,
|
|
134
|
+
}
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
return comparisons
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _games_howell(group_arrays, names):
|
|
141
|
+
"""Games-Howell test (doesn't assume equal variances)."""
|
|
142
|
+
from scipy import stats as scipy_stats
|
|
143
|
+
|
|
144
|
+
comparisons = []
|
|
145
|
+
n_comparisons = len(group_arrays) * (len(group_arrays) - 1) // 2
|
|
146
|
+
|
|
147
|
+
for i in range(len(group_arrays)):
|
|
148
|
+
for j in range(i + 1, len(group_arrays)):
|
|
149
|
+
g1, g2 = group_arrays[i], group_arrays[j]
|
|
150
|
+
n1, n2 = len(g1), len(g2)
|
|
151
|
+
m1, m2 = np.mean(g1), np.mean(g2)
|
|
152
|
+
v1, v2 = np.var(g1, ddof=1), np.var(g2, ddof=1)
|
|
153
|
+
|
|
154
|
+
se = np.sqrt(v1 / n1 + v2 / n2)
|
|
155
|
+
t_stat = (m1 - m2) / se
|
|
156
|
+
|
|
157
|
+
# Welch-Satterthwaite df
|
|
158
|
+
df = (v1 / n1 + v2 / n2) ** 2 / (
|
|
159
|
+
(v1 / n1) ** 2 / (n1 - 1) + (v2 / n2) ** 2 / (n2 - 1)
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
p = 2 * (1 - scipy_stats.t.cdf(abs(t_stat), df))
|
|
163
|
+
p_adj = min(p * n_comparisons, 1.0)
|
|
164
|
+
|
|
165
|
+
comparisons.append(
|
|
166
|
+
{
|
|
167
|
+
"group1": names[i],
|
|
168
|
+
"group2": names[j],
|
|
169
|
+
"mean_diff": float(m1 - m2),
|
|
170
|
+
"t_statistic": float(t_stat),
|
|
171
|
+
"df": float(df),
|
|
172
|
+
"p_value": float(p),
|
|
173
|
+
"p_adj": float(p_adj),
|
|
174
|
+
"reject": p_adj < 0.05,
|
|
175
|
+
}
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
return comparisons
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _dunn(group_arrays, names):
|
|
182
|
+
"""Dunn's test for Kruskal-Wallis post-hoc."""
|
|
183
|
+
from scipy import stats as scipy_stats
|
|
184
|
+
|
|
185
|
+
all_data = np.concatenate(group_arrays)
|
|
186
|
+
ranks = scipy_stats.rankdata(all_data)
|
|
187
|
+
|
|
188
|
+
# Assign ranks to groups
|
|
189
|
+
idx = 0
|
|
190
|
+
group_ranks = []
|
|
191
|
+
for g in group_arrays:
|
|
192
|
+
group_ranks.append(ranks[idx : idx + len(g)])
|
|
193
|
+
idx += len(g)
|
|
194
|
+
|
|
195
|
+
n_total = len(all_data)
|
|
196
|
+
n_comparisons = len(group_arrays) * (len(group_arrays) - 1) // 2
|
|
197
|
+
|
|
198
|
+
comparisons = []
|
|
199
|
+
for i in range(len(group_arrays)):
|
|
200
|
+
for j in range(i + 1, len(group_arrays)):
|
|
201
|
+
n_i, n_j = len(group_arrays[i]), len(group_arrays[j])
|
|
202
|
+
r_i, r_j = np.mean(group_ranks[i]), np.mean(group_ranks[j])
|
|
203
|
+
|
|
204
|
+
se = np.sqrt(n_total * (n_total + 1) / 12 * (1 / n_i + 1 / n_j))
|
|
205
|
+
z = (r_i - r_j) / se
|
|
206
|
+
p = 2 * (1 - scipy_stats.norm.cdf(abs(z)))
|
|
207
|
+
p_adj = min(p * n_comparisons, 1.0)
|
|
208
|
+
|
|
209
|
+
comparisons.append(
|
|
210
|
+
{
|
|
211
|
+
"group1": names[i],
|
|
212
|
+
"group2": names[j],
|
|
213
|
+
"mean_rank_diff": float(r_i - r_j),
|
|
214
|
+
"z_statistic": float(z),
|
|
215
|
+
"p_value": float(p),
|
|
216
|
+
"p_adj": float(p_adj),
|
|
217
|
+
"reject": p_adj < 0.05,
|
|
218
|
+
}
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
return comparisons
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
# EOF
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# Timestamp: 2026-01-25
|
|
3
|
+
# File: src/scitex/stats/_mcp/_handlers/_power.py
|
|
4
|
+
|
|
5
|
+
"""Power analysis handler."""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
|
|
14
|
+
__all__ = ["power_analysis_handler"]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
async def power_analysis_handler(
|
|
18
|
+
test_type: str = "ttest",
|
|
19
|
+
effect_size: float | None = None,
|
|
20
|
+
alpha: float = 0.05,
|
|
21
|
+
power: float = 0.8,
|
|
22
|
+
n: int | None = None,
|
|
23
|
+
n_groups: int = 2,
|
|
24
|
+
ratio: float = 1.0,
|
|
25
|
+
) -> dict:
|
|
26
|
+
"""Calculate statistical power or required sample size."""
|
|
27
|
+
try:
|
|
28
|
+
loop = asyncio.get_event_loop()
|
|
29
|
+
|
|
30
|
+
def do_power():
|
|
31
|
+
from scitex.stats.power._power import power_ttest, sample_size_ttest
|
|
32
|
+
|
|
33
|
+
result = {}
|
|
34
|
+
|
|
35
|
+
if test_type == "ttest":
|
|
36
|
+
if n is not None and effect_size is not None:
|
|
37
|
+
# Calculate power given n and effect size
|
|
38
|
+
calculated_power = power_ttest(
|
|
39
|
+
effect_size=effect_size,
|
|
40
|
+
n1=n,
|
|
41
|
+
n2=int(n * ratio),
|
|
42
|
+
alpha=alpha,
|
|
43
|
+
test_type="two-sample",
|
|
44
|
+
)
|
|
45
|
+
result = {
|
|
46
|
+
"mode": "power_calculation",
|
|
47
|
+
"power": calculated_power,
|
|
48
|
+
"n1": n,
|
|
49
|
+
"n2": int(n * ratio),
|
|
50
|
+
"effect_size": effect_size,
|
|
51
|
+
"alpha": alpha,
|
|
52
|
+
}
|
|
53
|
+
elif effect_size is not None:
|
|
54
|
+
# Calculate required sample size
|
|
55
|
+
n1, n2 = sample_size_ttest(
|
|
56
|
+
effect_size=effect_size,
|
|
57
|
+
power=power,
|
|
58
|
+
alpha=alpha,
|
|
59
|
+
ratio=ratio,
|
|
60
|
+
)
|
|
61
|
+
result = {
|
|
62
|
+
"mode": "sample_size_calculation",
|
|
63
|
+
"required_n1": n1,
|
|
64
|
+
"required_n2": n2,
|
|
65
|
+
"total_n": n1 + n2,
|
|
66
|
+
"effect_size": effect_size,
|
|
67
|
+
"target_power": power,
|
|
68
|
+
"alpha": alpha,
|
|
69
|
+
}
|
|
70
|
+
else:
|
|
71
|
+
raise ValueError("Either n or effect_size must be provided")
|
|
72
|
+
|
|
73
|
+
elif test_type == "anova":
|
|
74
|
+
result = _power_anova(effect_size, alpha, power, n, n_groups)
|
|
75
|
+
|
|
76
|
+
elif test_type == "correlation":
|
|
77
|
+
result = _power_correlation(effect_size, alpha, power, n)
|
|
78
|
+
|
|
79
|
+
elif test_type == "chi2":
|
|
80
|
+
result = _power_chi2(effect_size, alpha, power, n, n_groups)
|
|
81
|
+
|
|
82
|
+
else:
|
|
83
|
+
raise ValueError(f"Unknown test_type: {test_type}")
|
|
84
|
+
|
|
85
|
+
return result
|
|
86
|
+
|
|
87
|
+
result = await loop.run_in_executor(None, do_power)
|
|
88
|
+
|
|
89
|
+
return {
|
|
90
|
+
"success": True,
|
|
91
|
+
"test_type": test_type,
|
|
92
|
+
**result,
|
|
93
|
+
"timestamp": datetime.now().isoformat(),
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
except Exception as e:
|
|
97
|
+
return {"success": False, "error": str(e)}
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _power_anova(
|
|
101
|
+
effect_size: float | None,
|
|
102
|
+
alpha: float,
|
|
103
|
+
power: float,
|
|
104
|
+
n: int | None,
|
|
105
|
+
n_groups: int,
|
|
106
|
+
) -> dict:
|
|
107
|
+
"""ANOVA power calculation."""
|
|
108
|
+
from scipy import stats as scipy_stats
|
|
109
|
+
|
|
110
|
+
if effect_size is None:
|
|
111
|
+
raise ValueError("effect_size required for ANOVA power")
|
|
112
|
+
|
|
113
|
+
if n is not None:
|
|
114
|
+
df1 = n_groups - 1
|
|
115
|
+
df2 = n_groups * n - n_groups
|
|
116
|
+
nc = effect_size**2 * n * n_groups
|
|
117
|
+
f_crit = scipy_stats.f.ppf(1 - alpha, df1, df2)
|
|
118
|
+
power_val = 1 - scipy_stats.ncf.cdf(f_crit, df1, df2, nc)
|
|
119
|
+
return {
|
|
120
|
+
"mode": "power_calculation",
|
|
121
|
+
"power": power_val,
|
|
122
|
+
"n_per_group": n,
|
|
123
|
+
"n_groups": n_groups,
|
|
124
|
+
"effect_size_f": effect_size,
|
|
125
|
+
"alpha": alpha,
|
|
126
|
+
}
|
|
127
|
+
else:
|
|
128
|
+
# Binary search for n
|
|
129
|
+
n_min, n_max = 2, 1000
|
|
130
|
+
while n_max - n_min > 1:
|
|
131
|
+
n_mid = (n_min + n_max) // 2
|
|
132
|
+
df1 = n_groups - 1
|
|
133
|
+
df2 = n_groups * n_mid - n_groups
|
|
134
|
+
nc = effect_size**2 * n_mid * n_groups
|
|
135
|
+
f_crit = scipy_stats.f.ppf(1 - alpha, df1, df2)
|
|
136
|
+
power_val = 1 - scipy_stats.ncf.cdf(f_crit, df1, df2, nc)
|
|
137
|
+
if power_val < power:
|
|
138
|
+
n_min = n_mid
|
|
139
|
+
else:
|
|
140
|
+
n_max = n_mid
|
|
141
|
+
|
|
142
|
+
return {
|
|
143
|
+
"mode": "sample_size_calculation",
|
|
144
|
+
"required_n_per_group": n_max,
|
|
145
|
+
"total_n": n_max * n_groups,
|
|
146
|
+
"n_groups": n_groups,
|
|
147
|
+
"effect_size_f": effect_size,
|
|
148
|
+
"target_power": power,
|
|
149
|
+
"alpha": alpha,
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _power_correlation(
|
|
154
|
+
effect_size: float | None,
|
|
155
|
+
alpha: float,
|
|
156
|
+
power: float,
|
|
157
|
+
n: int | None,
|
|
158
|
+
) -> dict:
|
|
159
|
+
"""Correlation power calculation."""
|
|
160
|
+
from scipy import stats as scipy_stats
|
|
161
|
+
|
|
162
|
+
if effect_size is None:
|
|
163
|
+
raise ValueError("effect_size (r) required for correlation power")
|
|
164
|
+
|
|
165
|
+
if n is not None:
|
|
166
|
+
# Calculate power
|
|
167
|
+
z = 0.5 * np.log((1 + effect_size) / (1 - effect_size))
|
|
168
|
+
se = 1 / np.sqrt(n - 3)
|
|
169
|
+
z_crit = scipy_stats.norm.ppf(1 - alpha / 2)
|
|
170
|
+
power_val = (
|
|
171
|
+
1
|
|
172
|
+
- scipy_stats.norm.cdf(z_crit - z / se)
|
|
173
|
+
+ scipy_stats.norm.cdf(-z_crit - z / se)
|
|
174
|
+
)
|
|
175
|
+
return {
|
|
176
|
+
"mode": "power_calculation",
|
|
177
|
+
"power": power_val,
|
|
178
|
+
"n": n,
|
|
179
|
+
"effect_size_r": effect_size,
|
|
180
|
+
"alpha": alpha,
|
|
181
|
+
}
|
|
182
|
+
else:
|
|
183
|
+
# Calculate required n
|
|
184
|
+
z = 0.5 * np.log((1 + effect_size) / (1 - effect_size))
|
|
185
|
+
z_crit = scipy_stats.norm.ppf(1 - alpha / 2)
|
|
186
|
+
z_power = scipy_stats.norm.ppf(power)
|
|
187
|
+
required_n = int(np.ceil(((z_crit + z_power) / z) ** 2 + 3))
|
|
188
|
+
return {
|
|
189
|
+
"mode": "sample_size_calculation",
|
|
190
|
+
"required_n": required_n,
|
|
191
|
+
"effect_size_r": effect_size,
|
|
192
|
+
"target_power": power,
|
|
193
|
+
"alpha": alpha,
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _power_chi2(
|
|
198
|
+
effect_size: float | None,
|
|
199
|
+
alpha: float,
|
|
200
|
+
power: float,
|
|
201
|
+
n: int | None,
|
|
202
|
+
n_groups: int,
|
|
203
|
+
) -> dict:
|
|
204
|
+
"""Chi-square power calculation."""
|
|
205
|
+
from scipy import stats as scipy_stats
|
|
206
|
+
|
|
207
|
+
if effect_size is None:
|
|
208
|
+
raise ValueError("effect_size (w) required for chi2 power")
|
|
209
|
+
|
|
210
|
+
df = n_groups - 1 # Simplified: using n_groups as number of cells
|
|
211
|
+
|
|
212
|
+
if n is not None:
|
|
213
|
+
nc = effect_size**2 * n
|
|
214
|
+
chi2_crit = scipy_stats.chi2.ppf(1 - alpha, df)
|
|
215
|
+
power_val = 1 - scipy_stats.ncx2.cdf(chi2_crit, df, nc)
|
|
216
|
+
return {
|
|
217
|
+
"mode": "power_calculation",
|
|
218
|
+
"power": power_val,
|
|
219
|
+
"n": n,
|
|
220
|
+
"df": df,
|
|
221
|
+
"effect_size_w": effect_size,
|
|
222
|
+
"alpha": alpha,
|
|
223
|
+
}
|
|
224
|
+
else:
|
|
225
|
+
# Binary search for n
|
|
226
|
+
n_min, n_max = 10, 10000
|
|
227
|
+
while n_max - n_min > 1:
|
|
228
|
+
n_mid = (n_min + n_max) // 2
|
|
229
|
+
nc = effect_size**2 * n_mid
|
|
230
|
+
chi2_crit = scipy_stats.chi2.ppf(1 - alpha, df)
|
|
231
|
+
power_val = 1 - scipy_stats.ncx2.cdf(chi2_crit, df, nc)
|
|
232
|
+
if power_val < power:
|
|
233
|
+
n_min = n_mid
|
|
234
|
+
else:
|
|
235
|
+
n_max = n_mid
|
|
236
|
+
|
|
237
|
+
return {
|
|
238
|
+
"mode": "sample_size_calculation",
|
|
239
|
+
"required_n": n_max,
|
|
240
|
+
"df": df,
|
|
241
|
+
"effect_size_w": effect_size,
|
|
242
|
+
"target_power": power,
|
|
243
|
+
"alpha": alpha,
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
# EOF
|