scitex 2.15.1__py3-none-any.whl → 2.15.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. scitex/__init__.py +68 -61
  2. scitex/_mcp_tools/introspect.py +42 -23
  3. scitex/_mcp_tools/template.py +24 -0
  4. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +30 -1550
  5. scitex/ai/classification/timeseries/_sliding_window_core.py +467 -0
  6. scitex/ai/classification/timeseries/_sliding_window_plotting.py +369 -0
  7. scitex/audio/__init__.py +2 -2
  8. scitex/audio/_tts.py +18 -10
  9. scitex/audio/engines/base.py +17 -10
  10. scitex/audio/engines/elevenlabs_engine.py +1 -1
  11. scitex/canvas/editor/flask_editor/_core/__init__.py +27 -0
  12. scitex/canvas/editor/flask_editor/_core/_bbox_extraction.py +200 -0
  13. scitex/canvas/editor/flask_editor/_core/_editor.py +173 -0
  14. scitex/canvas/editor/flask_editor/_core/_export_helpers.py +353 -0
  15. scitex/canvas/editor/flask_editor/_core/_routes_basic.py +190 -0
  16. scitex/canvas/editor/flask_editor/_core/_routes_export.py +332 -0
  17. scitex/canvas/editor/flask_editor/_core/_routes_panels.py +252 -0
  18. scitex/canvas/editor/flask_editor/_core/_routes_save.py +218 -0
  19. scitex/canvas/editor/flask_editor/_core.py +25 -1684
  20. scitex/cli/introspect.py +112 -74
  21. scitex/cli/main.py +2 -0
  22. scitex/cli/plt.py +357 -0
  23. scitex/cli/repro.py +15 -8
  24. scitex/cli/resource.py +15 -8
  25. scitex/cli/scholar/__init__.py +15 -8
  26. scitex/cli/social.py +6 -6
  27. scitex/cli/stats.py +15 -8
  28. scitex/cli/template.py +129 -12
  29. scitex/cli/tex.py +15 -8
  30. scitex/cli/writer.py +15 -8
  31. scitex/cloud/__init__.py +41 -2
  32. scitex/config/_env_registry.py +84 -19
  33. scitex/context/__init__.py +22 -0
  34. scitex/dev/__init__.py +20 -1
  35. scitex/gen/__init__.py +50 -14
  36. scitex/gen/_list_packages.py +4 -4
  37. scitex/introspect/__init__.py +16 -9
  38. scitex/introspect/_core.py +7 -8
  39. scitex/{gen/_inspect_module.py → introspect/_list_api.py} +43 -54
  40. scitex/introspect/_mcp/__init__.py +10 -6
  41. scitex/introspect/_mcp/handlers.py +37 -12
  42. scitex/introspect/_members.py +7 -3
  43. scitex/introspect/_signature.py +3 -3
  44. scitex/introspect/_source.py +2 -2
  45. scitex/io/_save.py +1 -2
  46. scitex/logging/_formatters.py +19 -9
  47. scitex/mcp_server.py +1 -1
  48. scitex/os/__init__.py +4 -0
  49. scitex/{gen → os}/_check_host.py +4 -5
  50. scitex/plt/__init__.py +11 -14
  51. scitex/session/__init__.py +26 -7
  52. scitex/session/_decorator.py +1 -1
  53. scitex/sh/__init__.py +7 -4
  54. scitex/social/__init__.py +10 -8
  55. scitex/stats/_mcp/_handlers/__init__.py +31 -0
  56. scitex/stats/_mcp/_handlers/_corrections.py +113 -0
  57. scitex/stats/_mcp/_handlers/_descriptive.py +78 -0
  58. scitex/stats/_mcp/_handlers/_effect_size.py +106 -0
  59. scitex/stats/_mcp/_handlers/_format.py +94 -0
  60. scitex/stats/_mcp/_handlers/_normality.py +110 -0
  61. scitex/stats/_mcp/_handlers/_posthoc.py +224 -0
  62. scitex/stats/_mcp/_handlers/_power.py +247 -0
  63. scitex/stats/_mcp/_handlers/_recommend.py +102 -0
  64. scitex/stats/_mcp/_handlers/_run_test.py +279 -0
  65. scitex/stats/_mcp/_handlers/_stars.py +48 -0
  66. scitex/stats/_mcp/handlers.py +19 -1171
  67. scitex/stats/auto/_stat_style.py +175 -0
  68. scitex/stats/auto/_style_definitions.py +411 -0
  69. scitex/stats/auto/_styles.py +22 -620
  70. scitex/stats/descriptive/__init__.py +11 -8
  71. scitex/stats/descriptive/_ci.py +39 -0
  72. scitex/stats/power/_power.py +15 -4
  73. scitex/str/__init__.py +2 -1
  74. scitex/str/_title_case.py +63 -0
  75. scitex/template/__init__.py +25 -10
  76. scitex/template/_code_templates.py +147 -0
  77. scitex/template/_mcp/handlers.py +81 -0
  78. scitex/template/_mcp/tool_schemas.py +55 -0
  79. scitex/template/_templates/__init__.py +51 -0
  80. scitex/template/_templates/audio.py +233 -0
  81. scitex/template/_templates/canvas.py +312 -0
  82. scitex/template/_templates/capture.py +268 -0
  83. scitex/template/_templates/config.py +43 -0
  84. scitex/template/_templates/diagram.py +294 -0
  85. scitex/template/_templates/io.py +107 -0
  86. scitex/template/_templates/module.py +53 -0
  87. scitex/template/_templates/plt.py +202 -0
  88. scitex/template/_templates/scholar.py +267 -0
  89. scitex/template/_templates/session.py +130 -0
  90. scitex/template/_templates/session_minimal.py +43 -0
  91. scitex/template/_templates/session_plot.py +67 -0
  92. scitex/template/_templates/session_stats.py +77 -0
  93. scitex/template/_templates/stats.py +323 -0
  94. scitex/template/_templates/writer.py +296 -0
  95. scitex/ui/_backends/_email.py +10 -2
  96. scitex/ui/_backends/_webhook.py +5 -1
  97. scitex/web/_search_pubmed.py +10 -6
  98. {scitex-2.15.1.dist-info → scitex-2.15.2.dist-info}/METADATA +1 -1
  99. {scitex-2.15.1.dist-info → scitex-2.15.2.dist-info}/RECORD +105 -64
  100. scitex/gen/_ci.py +0 -12
  101. scitex/gen/_title_case.py +0 -89
  102. /scitex/{gen → context}/_detect_environment.py +0 -0
  103. /scitex/{gen → context}/_get_notebook_path.py +0 -0
  104. /scitex/{gen/_shell.py → sh/_shell_legacy.py} +0 -0
  105. {scitex-2.15.1.dist-info → scitex-2.15.2.dist-info}/WHEEL +0 -0
  106. {scitex-2.15.1.dist-info → scitex-2.15.2.dist-info}/entry_points.txt +0 -0
  107. {scitex-2.15.1.dist-info → scitex-2.15.2.dist-info}/licenses/LICENSE +0 -0
@@ -1,16 +1,27 @@
1
1
  #!/usr/bin/env python3
2
- # Timestamp: 2026-01-08
3
- # File: src/scitex/stats/_mcp.handlers.py
4
- # ----------------------------------------
2
+ # Timestamp: 2026-01-25
3
+ # File: src/scitex/stats/_mcp/handlers.py
5
4
 
6
- """Handler implementations for the scitex-stats MCP server."""
5
+ """Handler implementations for the scitex-stats MCP server.
7
6
 
8
- from __future__ import annotations
7
+ This module re-exports handlers from the _handlers subpackage for
8
+ backwards compatibility.
9
+ """
9
10
 
10
- import asyncio
11
- from datetime import datetime
11
+ from __future__ import annotations
12
12
 
13
- import numpy as np
13
+ from ._handlers import (
14
+ correct_pvalues_handler,
15
+ describe_handler,
16
+ effect_size_handler,
17
+ format_results_handler,
18
+ normality_test_handler,
19
+ p_to_stars_handler,
20
+ posthoc_test_handler,
21
+ power_analysis_handler,
22
+ recommend_tests_handler,
23
+ run_test_handler,
24
+ )
14
25
 
15
26
  __all__ = [
16
27
  "recommend_tests_handler",
@@ -25,1167 +36,4 @@ __all__ = [
25
36
  "p_to_stars_handler",
26
37
  ]
27
38
 
28
-
29
- async def recommend_tests_handler(
30
- n_groups: int = 2,
31
- sample_sizes: list[int] | None = None,
32
- outcome_type: str = "continuous",
33
- design: str = "between",
34
- paired: bool = False,
35
- has_control_group: bool = False,
36
- top_k: int = 3,
37
- ) -> dict:
38
- """Recommend appropriate statistical tests based on data characteristics."""
39
- try:
40
- from scitex.stats.auto import StatContext, recommend_tests
41
-
42
- loop = asyncio.get_event_loop()
43
-
44
- def do_recommend():
45
- ctx = StatContext(
46
- n_groups=n_groups,
47
- sample_sizes=sample_sizes or [30] * n_groups,
48
- outcome_type=outcome_type,
49
- design=design,
50
- paired=paired,
51
- has_control_group=has_control_group,
52
- n_factors=1,
53
- )
54
- tests = recommend_tests(ctx, top_k=top_k)
55
-
56
- # Get details about each recommended test
57
- from scitex.stats.auto._rules import TEST_RULES
58
-
59
- recommendations = []
60
- for test_name in tests:
61
- rule = TEST_RULES.get(test_name)
62
- if rule:
63
- recommendations.append(
64
- {
65
- "name": test_name,
66
- "family": rule.family,
67
- "priority": rule.priority,
68
- "needs_normality": rule.needs_normality,
69
- "needs_equal_variance": rule.needs_equal_variance,
70
- "rationale": _get_test_rationale(test_name),
71
- }
72
- )
73
-
74
- return recommendations
75
-
76
- recommendations = await loop.run_in_executor(None, do_recommend)
77
-
78
- return {
79
- "success": True,
80
- "context": {
81
- "n_groups": n_groups,
82
- "sample_sizes": sample_sizes,
83
- "outcome_type": outcome_type,
84
- "design": design,
85
- "paired": paired,
86
- "has_control_group": has_control_group,
87
- },
88
- "recommendations": recommendations,
89
- "timestamp": datetime.now().isoformat(),
90
- }
91
-
92
- except Exception as e:
93
- return {"success": False, "error": str(e)}
94
-
95
-
96
- def _get_test_rationale(test_name: str) -> str:
97
- """Get rationale for recommending a specific test."""
98
- rationales = {
99
- "brunner_munzel": "Robust nonparametric test - no normality/equal variance assumptions",
100
- "ttest_ind": "Classic parametric test for comparing two independent groups",
101
- "ttest_paired": "Parametric test for paired/matched samples",
102
- "ttest_1samp": "One-sample t-test for comparing to a population mean",
103
- "mannwhitneyu": "Nonparametric alternative to independent t-test",
104
- "wilcoxon": "Nonparametric alternative to paired t-test",
105
- "anova": "Parametric test for comparing 3+ groups",
106
- "kruskal": "Nonparametric alternative to one-way ANOVA",
107
- "chi2": "Test for independence in contingency tables",
108
- "fisher_exact": "Exact test for small sample contingency tables",
109
- "pearson": "Parametric correlation coefficient",
110
- "spearman": "Nonparametric rank correlation",
111
- "kendall": "Robust nonparametric correlation for ordinal data",
112
- }
113
- return rationales.get(test_name, "Applicable to the given context")
114
-
115
-
116
- async def run_test_handler(
117
- test_name: str,
118
- data: list[list[float]],
119
- alternative: str = "two-sided",
120
- ) -> dict:
121
- """Execute a statistical test on provided data."""
122
- try:
123
- from scipy import stats as scipy_stats
124
-
125
- loop = asyncio.get_event_loop()
126
-
127
- def do_test():
128
- # Convert data to numpy arrays
129
- groups = [np.array(g, dtype=float) for g in data]
130
-
131
- result = {}
132
-
133
- # Run the appropriate test
134
- if test_name == "ttest_ind":
135
- if len(groups) != 2:
136
- raise ValueError("t-test requires exactly 2 groups")
137
- stat, p_value = scipy_stats.ttest_ind(
138
- groups[0], groups[1], alternative=alternative
139
- )
140
- df = len(groups[0]) + len(groups[1]) - 2
141
- result = {
142
- "test": "Independent t-test",
143
- "statistic": float(stat),
144
- "statistic_name": "t",
145
- "p_value": float(p_value),
146
- "df": df,
147
- }
148
-
149
- elif test_name == "ttest_paired":
150
- if len(groups) != 2:
151
- raise ValueError("Paired t-test requires exactly 2 groups")
152
- stat, p_value = scipy_stats.ttest_rel(
153
- groups[0], groups[1], alternative=alternative
154
- )
155
- df = len(groups[0]) - 1
156
- result = {
157
- "test": "Paired t-test",
158
- "statistic": float(stat),
159
- "statistic_name": "t",
160
- "p_value": float(p_value),
161
- "df": df,
162
- }
163
-
164
- elif test_name == "ttest_1samp":
165
- if len(groups) != 1:
166
- raise ValueError("One-sample t-test requires exactly 1 group")
167
- stat, p_value = scipy_stats.ttest_1samp(
168
- groups[0], 0, alternative=alternative
169
- )
170
- df = len(groups[0]) - 1
171
- result = {
172
- "test": "One-sample t-test",
173
- "statistic": float(stat),
174
- "statistic_name": "t",
175
- "p_value": float(p_value),
176
- "df": df,
177
- }
178
-
179
- elif test_name == "brunner_munzel":
180
- if len(groups) != 2:
181
- raise ValueError("Brunner-Munzel requires exactly 2 groups")
182
- res = scipy_stats.brunnermunzel(
183
- groups[0], groups[1], alternative=alternative
184
- )
185
- result = {
186
- "test": "Brunner-Munzel test",
187
- "statistic": float(res.statistic),
188
- "statistic_name": "BM",
189
- "p_value": float(res.pvalue),
190
- }
191
-
192
- elif test_name == "mannwhitneyu":
193
- if len(groups) != 2:
194
- raise ValueError("Mann-Whitney U requires exactly 2 groups")
195
- stat, p_value = scipy_stats.mannwhitneyu(
196
- groups[0], groups[1], alternative=alternative
197
- )
198
- result = {
199
- "test": "Mann-Whitney U test",
200
- "statistic": float(stat),
201
- "statistic_name": "U",
202
- "p_value": float(p_value),
203
- }
204
-
205
- elif test_name == "wilcoxon":
206
- if len(groups) != 2:
207
- raise ValueError("Wilcoxon requires exactly 2 paired groups")
208
- stat, p_value = scipy_stats.wilcoxon(
209
- groups[0], groups[1], alternative=alternative
210
- )
211
- result = {
212
- "test": "Wilcoxon signed-rank test",
213
- "statistic": float(stat),
214
- "statistic_name": "W",
215
- "p_value": float(p_value),
216
- }
217
-
218
- elif test_name == "anova":
219
- if len(groups) < 2:
220
- raise ValueError("ANOVA requires at least 2 groups")
221
- stat, p_value = scipy_stats.f_oneway(*groups)
222
- df_between = len(groups) - 1
223
- df_within = sum(len(g) for g in groups) - len(groups)
224
- result = {
225
- "test": "One-way ANOVA",
226
- "statistic": float(stat),
227
- "statistic_name": "F",
228
- "p_value": float(p_value),
229
- "df_between": df_between,
230
- "df_within": df_within,
231
- }
232
-
233
- elif test_name == "kruskal":
234
- if len(groups) < 2:
235
- raise ValueError("Kruskal-Wallis requires at least 2 groups")
236
- stat, p_value = scipy_stats.kruskal(*groups)
237
- result = {
238
- "test": "Kruskal-Wallis H test",
239
- "statistic": float(stat),
240
- "statistic_name": "H",
241
- "p_value": float(p_value),
242
- "df": len(groups) - 1,
243
- }
244
-
245
- elif test_name == "chi2":
246
- # Expects contingency table as data
247
- table = np.array(data)
248
- chi2, p_value, dof, expected = scipy_stats.chi2_contingency(table)
249
- result = {
250
- "test": "Chi-square test of independence",
251
- "statistic": float(chi2),
252
- "statistic_name": "chi2",
253
- "p_value": float(p_value),
254
- "df": int(dof),
255
- "expected_frequencies": expected.tolist(),
256
- }
257
-
258
- elif test_name == "fisher_exact":
259
- # Expects 2x2 contingency table
260
- table = np.array(data)
261
- if table.shape != (2, 2):
262
- raise ValueError("Fisher's exact test requires a 2x2 table")
263
- odds_ratio, p_value = scipy_stats.fisher_exact(
264
- table, alternative=alternative
265
- )
266
- result = {
267
- "test": "Fisher's exact test",
268
- "statistic": float(odds_ratio),
269
- "statistic_name": "odds_ratio",
270
- "p_value": float(p_value),
271
- }
272
-
273
- elif test_name == "pearson":
274
- if len(groups) != 2:
275
- raise ValueError("Pearson correlation requires exactly 2 variables")
276
- r, p_value = scipy_stats.pearsonr(groups[0], groups[1])
277
- result = {
278
- "test": "Pearson correlation",
279
- "statistic": float(r),
280
- "statistic_name": "r",
281
- "p_value": float(p_value),
282
- }
283
-
284
- elif test_name == "spearman":
285
- if len(groups) != 2:
286
- raise ValueError(
287
- "Spearman correlation requires exactly 2 variables"
288
- )
289
- r, p_value = scipy_stats.spearmanr(groups[0], groups[1])
290
- result = {
291
- "test": "Spearman correlation",
292
- "statistic": float(r),
293
- "statistic_name": "rho",
294
- "p_value": float(p_value),
295
- }
296
-
297
- elif test_name == "kendall":
298
- if len(groups) != 2:
299
- raise ValueError("Kendall correlation requires exactly 2 variables")
300
- tau, p_value = scipy_stats.kendalltau(groups[0], groups[1])
301
- result = {
302
- "test": "Kendall tau correlation",
303
- "statistic": float(tau),
304
- "statistic_name": "tau",
305
- "p_value": float(p_value),
306
- }
307
-
308
- else:
309
- raise ValueError(f"Unknown test: {test_name}")
310
-
311
- # Calculate effect size if applicable
312
- if test_name in [
313
- "ttest_ind",
314
- "ttest_paired",
315
- "brunner_munzel",
316
- "mannwhitneyu",
317
- ]:
318
- from scitex.stats.effect_sizes import cliffs_delta, cohens_d
319
-
320
- if len(groups) == 2:
321
- d = cohens_d(groups[0], groups[1])
322
- delta = cliffs_delta(groups[0], groups[1])
323
- result["effect_size"] = {
324
- "cohens_d": float(d),
325
- "cliffs_delta": float(delta),
326
- }
327
-
328
- # Add significance determination
329
- alpha = 0.05
330
- result["significant"] = result["p_value"] < alpha
331
- result["alpha"] = alpha
332
-
333
- return result
334
-
335
- result = await loop.run_in_executor(None, do_test)
336
-
337
- return {
338
- "success": True,
339
- "test_name": test_name,
340
- "alternative": alternative,
341
- **result,
342
- "timestamp": datetime.now().isoformat(),
343
- }
344
-
345
- except Exception as e:
346
- return {"success": False, "error": str(e)}
347
-
348
-
349
- async def format_results_handler(
350
- test_name: str,
351
- statistic: float,
352
- p_value: float,
353
- df: float | None = None,
354
- effect_size: float | None = None,
355
- effect_size_name: str | None = None,
356
- style: str = "apa",
357
- ci_lower: float | None = None,
358
- ci_upper: float | None = None,
359
- ) -> dict:
360
- """Format statistical results in journal style."""
361
- try:
362
- loop = asyncio.get_event_loop()
363
-
364
- def do_format():
365
- from scitex.stats.auto import format_test_line, p_to_stars
366
- from scitex.stats.auto._formatting import EffectResultDict, TestResultDict
367
-
368
- # Build test result dict
369
- test_result: TestResultDict = {
370
- "test_name": test_name,
371
- "stat": statistic,
372
- "p_raw": p_value,
373
- }
374
- if df is not None:
375
- test_result["df"] = df
376
-
377
- # Build effect result if provided
378
- effects = None
379
- if effect_size is not None:
380
- effects = [
381
- EffectResultDict(
382
- name=effect_size_name or "d",
383
- label=effect_size_name or "Cohen's d",
384
- value=effect_size,
385
- ci_lower=ci_lower,
386
- ci_upper=ci_upper,
387
- )
388
- ]
389
-
390
- # Map style names
391
- style_map = {
392
- "apa": "apa_latex",
393
- "nature": "nature",
394
- "science": "science",
395
- "brief": "brief",
396
- }
397
- style_id = style_map.get(style, "apa_latex")
398
-
399
- # Format the line
400
- formatted = format_test_line(
401
- test_result,
402
- effects=effects,
403
- style=style_id,
404
- include_n=False,
405
- )
406
-
407
- # Get stars representation
408
- stars = p_to_stars(p_value)
409
-
410
- return {
411
- "formatted": formatted,
412
- "stars": stars,
413
- }
414
-
415
- result = await loop.run_in_executor(None, do_format)
416
-
417
- return {
418
- "success": True,
419
- "style": style,
420
- **result,
421
- "timestamp": datetime.now().isoformat(),
422
- }
423
-
424
- except Exception as e:
425
- return {"success": False, "error": str(e)}
426
-
427
-
428
- async def power_analysis_handler(
429
- test_type: str = "ttest",
430
- effect_size: float | None = None,
431
- alpha: float = 0.05,
432
- power: float = 0.8,
433
- n: int | None = None,
434
- n_groups: int = 2,
435
- ratio: float = 1.0,
436
- ) -> dict:
437
- """Calculate statistical power or required sample size."""
438
- try:
439
- loop = asyncio.get_event_loop()
440
-
441
- def do_power():
442
- from scitex.stats.power._power import power_ttest, sample_size_ttest
443
-
444
- result = {}
445
-
446
- if test_type == "ttest":
447
- if n is not None and effect_size is not None:
448
- # Calculate power given n and effect size
449
- calculated_power = power_ttest(
450
- effect_size=effect_size,
451
- n1=n,
452
- n2=int(n * ratio),
453
- alpha=alpha,
454
- test_type="two-sample",
455
- )
456
- result = {
457
- "mode": "power_calculation",
458
- "power": calculated_power,
459
- "n1": n,
460
- "n2": int(n * ratio),
461
- "effect_size": effect_size,
462
- "alpha": alpha,
463
- }
464
- elif effect_size is not None:
465
- # Calculate required sample size
466
- n1, n2 = sample_size_ttest(
467
- effect_size=effect_size,
468
- power=power,
469
- alpha=alpha,
470
- ratio=ratio,
471
- )
472
- result = {
473
- "mode": "sample_size_calculation",
474
- "required_n1": n1,
475
- "required_n2": n2,
476
- "total_n": n1 + n2,
477
- "effect_size": effect_size,
478
- "target_power": power,
479
- "alpha": alpha,
480
- }
481
- else:
482
- raise ValueError("Either n or effect_size must be provided")
483
-
484
- elif test_type == "anova":
485
- # Simplified ANOVA power (using f = d * sqrt(k-1) / sqrt(2k))
486
- if effect_size is None:
487
- raise ValueError("effect_size required for ANOVA power")
488
-
489
- # Convert Cohen's f to d for approximation
490
- # This is a simplified calculation
491
- from scipy import stats as scipy_stats
492
-
493
- if n is not None:
494
- df1 = n_groups - 1
495
- df2 = n_groups * n - n_groups
496
- nc = effect_size**2 * n * n_groups
497
- f_crit = scipy_stats.f.ppf(1 - alpha, df1, df2)
498
- power_val = 1 - scipy_stats.ncf.cdf(f_crit, df1, df2, nc)
499
- result = {
500
- "mode": "power_calculation",
501
- "power": power_val,
502
- "n_per_group": n,
503
- "n_groups": n_groups,
504
- "effect_size_f": effect_size,
505
- "alpha": alpha,
506
- }
507
- else:
508
- # Binary search for n
509
- n_min, n_max = 2, 1000
510
- while n_max - n_min > 1:
511
- n_mid = (n_min + n_max) // 2
512
- df1 = n_groups - 1
513
- df2 = n_groups * n_mid - n_groups
514
- nc = effect_size**2 * n_mid * n_groups
515
- f_crit = scipy_stats.f.ppf(1 - alpha, df1, df2)
516
- power_val = 1 - scipy_stats.ncf.cdf(f_crit, df1, df2, nc)
517
- if power_val < power:
518
- n_min = n_mid
519
- else:
520
- n_max = n_mid
521
-
522
- result = {
523
- "mode": "sample_size_calculation",
524
- "required_n_per_group": n_max,
525
- "total_n": n_max * n_groups,
526
- "n_groups": n_groups,
527
- "effect_size_f": effect_size,
528
- "target_power": power,
529
- "alpha": alpha,
530
- }
531
-
532
- elif test_type == "correlation":
533
- # Power for correlation coefficient
534
- from scipy import stats as scipy_stats
535
-
536
- if effect_size is None:
537
- raise ValueError("effect_size (r) required for correlation power")
538
-
539
- if n is not None:
540
- # Calculate power
541
- z = 0.5 * np.log((1 + effect_size) / (1 - effect_size))
542
- se = 1 / np.sqrt(n - 3)
543
- z_crit = scipy_stats.norm.ppf(1 - alpha / 2)
544
- power_val = (
545
- 1
546
- - scipy_stats.norm.cdf(z_crit - z / se)
547
- + scipy_stats.norm.cdf(-z_crit - z / se)
548
- )
549
- result = {
550
- "mode": "power_calculation",
551
- "power": power_val,
552
- "n": n,
553
- "effect_size_r": effect_size,
554
- "alpha": alpha,
555
- }
556
- else:
557
- # Calculate required n (binary search)
558
- z = 0.5 * np.log((1 + effect_size) / (1 - effect_size))
559
- z_crit = scipy_stats.norm.ppf(1 - alpha / 2)
560
- z_power = scipy_stats.norm.ppf(power)
561
- required_n = int(np.ceil(((z_crit + z_power) / z) ** 2 + 3))
562
- result = {
563
- "mode": "sample_size_calculation",
564
- "required_n": required_n,
565
- "effect_size_r": effect_size,
566
- "target_power": power,
567
- "alpha": alpha,
568
- }
569
-
570
- elif test_type == "chi2":
571
- # Chi-square power (simplified)
572
- from scipy import stats as scipy_stats
573
-
574
- if effect_size is None:
575
- raise ValueError("effect_size (w) required for chi2 power")
576
-
577
- df = n_groups - 1 # Simplified: using n_groups as number of cells
578
-
579
- if n is not None:
580
- nc = effect_size**2 * n
581
- chi2_crit = scipy_stats.chi2.ppf(1 - alpha, df)
582
- power_val = 1 - scipy_stats.ncx2.cdf(chi2_crit, df, nc)
583
- result = {
584
- "mode": "power_calculation",
585
- "power": power_val,
586
- "n": n,
587
- "df": df,
588
- "effect_size_w": effect_size,
589
- "alpha": alpha,
590
- }
591
- else:
592
- # Binary search for n
593
- n_min, n_max = 10, 10000
594
- while n_max - n_min > 1:
595
- n_mid = (n_min + n_max) // 2
596
- nc = effect_size**2 * n_mid
597
- chi2_crit = scipy_stats.chi2.ppf(1 - alpha, df)
598
- power_val = 1 - scipy_stats.ncx2.cdf(chi2_crit, df, nc)
599
- if power_val < power:
600
- n_min = n_mid
601
- else:
602
- n_max = n_mid
603
-
604
- result = {
605
- "mode": "sample_size_calculation",
606
- "required_n": n_max,
607
- "df": df,
608
- "effect_size_w": effect_size,
609
- "target_power": power,
610
- "alpha": alpha,
611
- }
612
-
613
- else:
614
- raise ValueError(f"Unknown test_type: {test_type}")
615
-
616
- return result
617
-
618
- result = await loop.run_in_executor(None, do_power)
619
-
620
- return {
621
- "success": True,
622
- "test_type": test_type,
623
- **result,
624
- "timestamp": datetime.now().isoformat(),
625
- }
626
-
627
- except Exception as e:
628
- return {"success": False, "error": str(e)}
629
-
630
-
631
- async def correct_pvalues_handler(
632
- pvalues: list[float],
633
- method: str = "fdr_bh",
634
- alpha: float = 0.05,
635
- ) -> dict:
636
- """Apply multiple comparison correction to p-values."""
637
- try:
638
- loop = asyncio.get_event_loop()
639
-
640
- def do_correct():
641
- from statsmodels.stats.multitest import multipletests
642
-
643
- # Map method names
644
- method_map = {
645
- "bonferroni": "bonferroni",
646
- "fdr_bh": "fdr_bh",
647
- "fdr_by": "fdr_by",
648
- "holm": "holm",
649
- "sidak": "sidak",
650
- }
651
- sm_method = method_map.get(method, "fdr_bh")
652
-
653
- pvals = np.array(pvalues)
654
- reject, pvals_corrected, _, _ = multipletests(
655
- pvals, alpha=alpha, method=sm_method
656
- )
657
-
658
- return {
659
- "original_pvalues": pvalues,
660
- "corrected_pvalues": pvals_corrected.tolist(),
661
- "reject_null": reject.tolist(),
662
- "n_significant": int(reject.sum()),
663
- "n_tests": len(pvalues),
664
- }
665
-
666
- result = await loop.run_in_executor(None, do_correct)
667
-
668
- return {
669
- "success": True,
670
- "method": method,
671
- "alpha": alpha,
672
- **result,
673
- "timestamp": datetime.now().isoformat(),
674
- }
675
-
676
- except ImportError:
677
- # Fallback implementation without statsmodels
678
- try:
679
- n = len(pvalues)
680
- pvals = np.array(pvalues)
681
-
682
- if method == "bonferroni":
683
- corrected = np.minimum(pvals * n, 1.0)
684
- elif method == "holm":
685
- sorted_idx = np.argsort(pvals)
686
- corrected = np.empty(n)
687
- cummax = 0.0
688
- for rank, idx in enumerate(sorted_idx, start=1):
689
- adj = min((n - rank + 1) * pvals[idx], 1.0)
690
- adj = max(adj, cummax)
691
- corrected[idx] = adj
692
- cummax = adj
693
- elif method == "fdr_bh":
694
- sorted_idx = np.argsort(pvals)
695
- corrected = np.empty(n)
696
- prev = 1.0
697
- for rank in range(n, 0, -1):
698
- idx = sorted_idx[rank - 1]
699
- bh = pvals[idx] * n / rank
700
- val = min(bh, prev, 1.0)
701
- corrected[idx] = val
702
- prev = val
703
- elif method == "sidak":
704
- corrected = 1 - (1 - pvals) ** n
705
- else:
706
- corrected = pvals
707
-
708
- return {
709
- "success": True,
710
- "method": method,
711
- "alpha": alpha,
712
- "original_pvalues": pvalues,
713
- "corrected_pvalues": corrected.tolist(),
714
- "reject_null": (corrected < alpha).tolist(),
715
- "n_significant": int((corrected < alpha).sum()),
716
- "n_tests": n,
717
- "timestamp": datetime.now().isoformat(),
718
- }
719
-
720
- except Exception as e:
721
- return {"success": False, "error": str(e)}
722
-
723
- except Exception as e:
724
- return {"success": False, "error": str(e)}
725
-
726
-
727
- async def describe_handler(
728
- data: list[float],
729
- percentiles: list[float] | None = None,
730
- ) -> dict:
731
- """Calculate descriptive statistics for data."""
732
- try:
733
- loop = asyncio.get_event_loop()
734
-
735
- def do_describe():
736
- arr = np.array(data, dtype=float)
737
- arr = arr[~np.isnan(arr)] # Remove NaN
738
-
739
- if len(arr) == 0:
740
- return {"error": "No valid data points"}
741
-
742
- percs = percentiles or [25, 50, 75]
743
- percentile_values = np.percentile(arr, percs)
744
-
745
- result = {
746
- "n": int(len(arr)),
747
- "mean": float(np.mean(arr)),
748
- "std": float(np.std(arr, ddof=1)) if len(arr) > 1 else 0.0,
749
- "var": float(np.var(arr, ddof=1)) if len(arr) > 1 else 0.0,
750
- "sem": (
751
- float(np.std(arr, ddof=1) / np.sqrt(len(arr)))
752
- if len(arr) > 1
753
- else 0.0
754
- ),
755
- "min": float(np.min(arr)),
756
- "max": float(np.max(arr)),
757
- "range": float(np.max(arr) - np.min(arr)),
758
- "median": float(np.median(arr)),
759
- "percentiles": {
760
- str(int(p)): float(v) for p, v in zip(percs, percentile_values)
761
- },
762
- "iqr": float(np.percentile(arr, 75) - np.percentile(arr, 25)),
763
- }
764
-
765
- # Add skewness and kurtosis if scipy available
766
- try:
767
- from scipy import stats as scipy_stats
768
-
769
- result["skewness"] = float(scipy_stats.skew(arr))
770
- result["kurtosis"] = float(scipy_stats.kurtosis(arr))
771
- except ImportError:
772
- pass
773
-
774
- return result
775
-
776
- result = await loop.run_in_executor(None, do_describe)
777
-
778
- return {
779
- "success": True,
780
- **result,
781
- "timestamp": datetime.now().isoformat(),
782
- }
783
-
784
- except Exception as e:
785
- return {"success": False, "error": str(e)}
786
-
787
-
788
- async def effect_size_handler(
789
- group1: list[float],
790
- group2: list[float],
791
- measure: str = "cohens_d",
792
- pooled: bool = True,
793
- ) -> dict:
794
- """Calculate effect size between groups."""
795
- try:
796
- from scitex.stats.effect_sizes import (
797
- cliffs_delta,
798
- cohens_d,
799
- interpret_cliffs_delta,
800
- interpret_cohens_d,
801
- )
802
-
803
- loop = asyncio.get_event_loop()
804
-
805
- def do_effect_size():
806
- g1 = np.array(group1, dtype=float)
807
- g2 = np.array(group2, dtype=float)
808
-
809
- result = {}
810
-
811
- if measure == "cohens_d":
812
- d = cohens_d(g1, g2)
813
- result = {
814
- "measure": "Cohen's d",
815
- "value": float(d),
816
- "interpretation": interpret_cohens_d(d),
817
- }
818
-
819
- elif measure == "hedges_g":
820
- # Hedges' g is Cohen's d with bias correction
821
- d = cohens_d(g1, g2)
822
- n1, n2 = len(g1), len(g2)
823
- correction = 1 - (3 / (4 * (n1 + n2) - 9))
824
- g = d * correction
825
- result = {
826
- "measure": "Hedges' g",
827
- "value": float(g),
828
- "interpretation": interpret_cohens_d(g), # Same thresholds
829
- }
830
-
831
- elif measure == "glass_delta":
832
- # Glass's delta uses only control group std
833
- mean_diff = np.mean(g1) - np.mean(g2)
834
- delta = mean_diff / np.std(g2, ddof=1)
835
- result = {
836
- "measure": "Glass's delta",
837
- "value": float(delta),
838
- "interpretation": interpret_cohens_d(delta),
839
- }
840
-
841
- elif measure == "cliffs_delta":
842
- delta = cliffs_delta(g1, g2)
843
- result = {
844
- "measure": "Cliff's delta",
845
- "value": float(delta),
846
- "interpretation": interpret_cliffs_delta(delta),
847
- }
848
-
849
- else:
850
- raise ValueError(f"Unknown measure: {measure}")
851
-
852
- # Add confidence interval approximation for Cohen's d
853
- if measure in ["cohens_d", "hedges_g", "glass_delta"]:
854
- n1, n2 = len(g1), len(g2)
855
- se = np.sqrt(
856
- (n1 + n2) / (n1 * n2) + result["value"] ** 2 / (2 * (n1 + n2))
857
- )
858
- result["ci_lower"] = float(result["value"] - 1.96 * se)
859
- result["ci_upper"] = float(result["value"] + 1.96 * se)
860
-
861
- return result
862
-
863
- result = await loop.run_in_executor(None, do_effect_size)
864
-
865
- return {
866
- "success": True,
867
- "group1_n": len(group1),
868
- "group2_n": len(group2),
869
- **result,
870
- "timestamp": datetime.now().isoformat(),
871
- }
872
-
873
- except Exception as e:
874
- return {"success": False, "error": str(e)}
875
-
876
-
877
- async def normality_test_handler(
878
- data: list[float],
879
- method: str = "shapiro",
880
- ) -> dict:
881
- """Test whether data follows a normal distribution."""
882
- try:
883
- from scipy import stats as scipy_stats
884
-
885
- loop = asyncio.get_event_loop()
886
-
887
- def do_normality():
888
- arr = np.array(data, dtype=float)
889
- arr = arr[~np.isnan(arr)]
890
-
891
- if len(arr) < 3:
892
- return {"error": "Need at least 3 data points"}
893
-
894
- result = {}
895
-
896
- if method == "shapiro":
897
- stat, p_value = scipy_stats.shapiro(arr)
898
- result = {
899
- "test": "Shapiro-Wilk",
900
- "statistic": float(stat),
901
- "statistic_name": "W",
902
- "p_value": float(p_value),
903
- }
904
-
905
- elif method == "dagostino":
906
- if len(arr) < 8:
907
- return {"error": "D'Agostino test requires at least 8 samples"}
908
- stat, p_value = scipy_stats.normaltest(arr)
909
- result = {
910
- "test": "D'Agostino-Pearson",
911
- "statistic": float(stat),
912
- "statistic_name": "K2",
913
- "p_value": float(p_value),
914
- }
915
-
916
- elif method == "anderson":
917
- res = scipy_stats.anderson(arr, dist="norm")
918
- # Use 5% significance level
919
- idx = 2 # Index for 5% level
920
- result = {
921
- "test": "Anderson-Darling",
922
- "statistic": float(res.statistic),
923
- "statistic_name": "A2",
924
- "critical_value_5pct": float(res.critical_values[idx]),
925
- "normal": bool(res.statistic < res.critical_values[idx]),
926
- }
927
-
928
- elif method == "lilliefors":
929
- try:
930
- from statsmodels.stats.diagnostic import lilliefors
931
-
932
- stat, p_value = lilliefors(arr, dist="norm")
933
- result = {
934
- "test": "Lilliefors",
935
- "statistic": float(stat),
936
- "statistic_name": "D",
937
- "p_value": float(p_value),
938
- }
939
- except ImportError:
940
- return {"error": "statsmodels required for Lilliefors test"}
941
-
942
- else:
943
- raise ValueError(f"Unknown method: {method}")
944
-
945
- # Add interpretation
946
- if "p_value" in result:
947
- result["is_normal"] = result["p_value"] >= 0.05
948
- result["interpretation"] = (
949
- "Data appears normally distributed (p >= 0.05)"
950
- if result["is_normal"]
951
- else "Data deviates from normal distribution (p < 0.05)"
952
- )
953
-
954
- return result
955
-
956
- result = await loop.run_in_executor(None, do_normality)
957
-
958
- return {
959
- "success": True,
960
- "method": method,
961
- "n": len(data),
962
- **result,
963
- "timestamp": datetime.now().isoformat(),
964
- }
965
-
966
- except Exception as e:
967
- return {"success": False, "error": str(e)}
968
-
969
-
970
- async def posthoc_test_handler(
971
- groups: list[list[float]],
972
- group_names: list[str] | None = None,
973
- method: str = "tukey",
974
- control_group: int = 0,
975
- ) -> dict:
976
- """Run post-hoc pairwise comparisons."""
977
- try:
978
- loop = asyncio.get_event_loop()
979
-
980
- def do_posthoc():
981
- group_arrays = [np.array(g, dtype=float) for g in groups]
982
- names = group_names or [f"Group_{i + 1}" for i in range(len(groups))]
983
-
984
- comparisons = []
985
-
986
- if method == "tukey":
987
- from scipy import stats as scipy_stats
988
-
989
- # All pairwise comparisons with Tukey HSD approximation
990
- all_data = np.concatenate(group_arrays)
991
- group_labels = np.concatenate(
992
- [[names[i]] * len(g) for i, g in enumerate(group_arrays)]
993
- )
994
-
995
- # Use statsmodels if available, otherwise manual calculation
996
- try:
997
- from statsmodels.stats.multicomp import pairwise_tukeyhsd
998
-
999
- tukey = pairwise_tukeyhsd(all_data, group_labels)
1000
-
1001
- for i in range(len(tukey.summary().data) - 1):
1002
- row = tukey.summary().data[i + 1]
1003
- comparisons.append(
1004
- {
1005
- "group1": str(row[0]),
1006
- "group2": str(row[1]),
1007
- "mean_diff": float(row[2]),
1008
- "p_adj": float(row[3]),
1009
- "ci_lower": float(row[4]),
1010
- "ci_upper": float(row[5]),
1011
- "reject": bool(row[6]),
1012
- }
1013
- )
1014
- except ImportError:
1015
- # Fallback: Bonferroni-corrected t-tests
1016
- n_comparisons = len(groups) * (len(groups) - 1) // 2
1017
- for i in range(len(groups)):
1018
- for j in range(i + 1, len(groups)):
1019
- stat, p = scipy_stats.ttest_ind(
1020
- group_arrays[i], group_arrays[j]
1021
- )
1022
- p_adj = min(p * n_comparisons, 1.0)
1023
- comparisons.append(
1024
- {
1025
- "group1": names[i],
1026
- "group2": names[j],
1027
- "mean_diff": float(
1028
- np.mean(group_arrays[i])
1029
- - np.mean(group_arrays[j])
1030
- ),
1031
- "t_statistic": float(stat),
1032
- "p_value": float(p),
1033
- "p_adj": float(p_adj),
1034
- "reject": p_adj < 0.05,
1035
- }
1036
- )
1037
-
1038
- elif method == "dunnett":
1039
- from scipy import stats as scipy_stats
1040
-
1041
- # Compare all groups to control
1042
- control = group_arrays[control_group]
1043
- n_comparisons = len(groups) - 1
1044
-
1045
- for i, (name, group) in enumerate(zip(names, group_arrays)):
1046
- if i == control_group:
1047
- continue
1048
- stat, p = scipy_stats.ttest_ind(group, control)
1049
- p_adj = min(p * n_comparisons, 1.0)
1050
- comparisons.append(
1051
- {
1052
- "group": name,
1053
- "vs_control": names[control_group],
1054
- "mean_diff": float(np.mean(group) - np.mean(control)),
1055
- "t_statistic": float(stat),
1056
- "p_value": float(p),
1057
- "p_adj": float(p_adj),
1058
- "reject": p_adj < 0.05,
1059
- }
1060
- )
1061
-
1062
- elif method == "games_howell":
1063
- from scipy import stats as scipy_stats
1064
-
1065
- # Games-Howell doesn't assume equal variances
1066
- for i in range(len(groups)):
1067
- for j in range(i + 1, len(groups)):
1068
- g1, g2 = group_arrays[i], group_arrays[j]
1069
- n1, n2 = len(g1), len(g2)
1070
- m1, m2 = np.mean(g1), np.mean(g2)
1071
- v1, v2 = np.var(g1, ddof=1), np.var(g2, ddof=1)
1072
-
1073
- se = np.sqrt(v1 / n1 + v2 / n2)
1074
- t_stat = (m1 - m2) / se
1075
-
1076
- # Welch-Satterthwaite df
1077
- df = (v1 / n1 + v2 / n2) ** 2 / (
1078
- (v1 / n1) ** 2 / (n1 - 1) + (v2 / n2) ** 2 / (n2 - 1)
1079
- )
1080
-
1081
- p = 2 * (1 - scipy_stats.t.cdf(abs(t_stat), df))
1082
- n_comparisons = len(groups) * (len(groups) - 1) // 2
1083
- p_adj = min(p * n_comparisons, 1.0)
1084
-
1085
- comparisons.append(
1086
- {
1087
- "group1": names[i],
1088
- "group2": names[j],
1089
- "mean_diff": float(m1 - m2),
1090
- "t_statistic": float(t_stat),
1091
- "df": float(df),
1092
- "p_value": float(p),
1093
- "p_adj": float(p_adj),
1094
- "reject": p_adj < 0.05,
1095
- }
1096
- )
1097
-
1098
- elif method == "dunn":
1099
- from scipy import stats as scipy_stats
1100
-
1101
- # Dunn's test for Kruskal-Wallis post-hoc
1102
- all_data = np.concatenate(group_arrays)
1103
- ranks = scipy_stats.rankdata(all_data)
1104
-
1105
- # Assign ranks to groups
1106
- idx = 0
1107
- group_ranks = []
1108
- for g in group_arrays:
1109
- group_ranks.append(ranks[idx : idx + len(g)])
1110
- idx += len(g)
1111
-
1112
- n_total = len(all_data)
1113
- n_comparisons = len(groups) * (len(groups) - 1) // 2
1114
-
1115
- for i in range(len(groups)):
1116
- for j in range(i + 1, len(groups)):
1117
- n_i, n_j = len(group_arrays[i]), len(group_arrays[j])
1118
- r_i, r_j = np.mean(group_ranks[i]), np.mean(group_ranks[j])
1119
-
1120
- se = np.sqrt(n_total * (n_total + 1) / 12 * (1 / n_i + 1 / n_j))
1121
- z = (r_i - r_j) / se
1122
- p = 2 * (1 - scipy_stats.norm.cdf(abs(z)))
1123
- p_adj = min(p * n_comparisons, 1.0)
1124
-
1125
- comparisons.append(
1126
- {
1127
- "group1": names[i],
1128
- "group2": names[j],
1129
- "mean_rank_diff": float(r_i - r_j),
1130
- "z_statistic": float(z),
1131
- "p_value": float(p),
1132
- "p_adj": float(p_adj),
1133
- "reject": p_adj < 0.05,
1134
- }
1135
- )
1136
-
1137
- else:
1138
- raise ValueError(f"Unknown method: {method}")
1139
-
1140
- return comparisons
1141
-
1142
- comparisons = await loop.run_in_executor(None, do_posthoc)
1143
-
1144
- return {
1145
- "success": True,
1146
- "method": method,
1147
- "n_groups": len(groups),
1148
- "n_comparisons": len(comparisons),
1149
- "comparisons": comparisons,
1150
- "timestamp": datetime.now().isoformat(),
1151
- }
1152
-
1153
- except Exception as e:
1154
- return {"success": False, "error": str(e)}
1155
-
1156
-
1157
- async def p_to_stars_handler(
1158
- p_value: float,
1159
- thresholds: list[float] | None = None,
1160
- ) -> dict:
1161
- """Convert p-value to significance stars."""
1162
- try:
1163
- thresh = thresholds or [0.001, 0.01, 0.05]
1164
-
1165
- if p_value < thresh[0]:
1166
- stars = "***"
1167
- significance = f"p < {thresh[0]}"
1168
- elif p_value < thresh[1]:
1169
- stars = "**"
1170
- significance = f"p < {thresh[1]}"
1171
- elif p_value < thresh[2]:
1172
- stars = "*"
1173
- significance = f"p < {thresh[2]}"
1174
- else:
1175
- stars = "ns"
1176
- significance = f"p >= {thresh[2]} (not significant)"
1177
-
1178
- return {
1179
- "success": True,
1180
- "p_value": p_value,
1181
- "stars": stars,
1182
- "significance": significance,
1183
- "thresholds": thresh,
1184
- "timestamp": datetime.now().isoformat(),
1185
- }
1186
-
1187
- except Exception as e:
1188
- return {"success": False, "error": str(e)}
1189
-
1190
-
1191
39
  # EOF