openstat-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstat/__init__.py +3 -0
- openstat/__main__.py +4 -0
- openstat/backends/__init__.py +16 -0
- openstat/backends/duckdb_backend.py +70 -0
- openstat/backends/polars_backend.py +52 -0
- openstat/cli.py +92 -0
- openstat/commands/__init__.py +82 -0
- openstat/commands/adv_stat_cmds.py +1255 -0
- openstat/commands/advanced_ml_cmds.py +576 -0
- openstat/commands/advreg_cmds.py +207 -0
- openstat/commands/alias_cmds.py +135 -0
- openstat/commands/arch_cmds.py +82 -0
- openstat/commands/arules_cmds.py +111 -0
- openstat/commands/automodel_cmds.py +212 -0
- openstat/commands/backend_cmds.py +82 -0
- openstat/commands/base.py +170 -0
- openstat/commands/bayes_cmds.py +71 -0
- openstat/commands/causal_cmds.py +269 -0
- openstat/commands/cluster_cmds.py +152 -0
- openstat/commands/data_cmds.py +996 -0
- openstat/commands/datamanip_cmds.py +672 -0
- openstat/commands/dataquality_cmds.py +174 -0
- openstat/commands/datetime_cmds.py +176 -0
- openstat/commands/dimreduce_cmds.py +184 -0
- openstat/commands/discrete_cmds.py +149 -0
- openstat/commands/dsl_cmds.py +143 -0
- openstat/commands/epi_cmds.py +93 -0
- openstat/commands/equiv_tobit_cmds.py +94 -0
- openstat/commands/esttab_cmds.py +196 -0
- openstat/commands/export_beamer_cmds.py +142 -0
- openstat/commands/export_cmds.py +201 -0
- openstat/commands/export_extra_cmds.py +240 -0
- openstat/commands/factor_cmds.py +180 -0
- openstat/commands/groupby_cmds.py +155 -0
- openstat/commands/help_cmds.py +237 -0
- openstat/commands/i18n_cmds.py +43 -0
- openstat/commands/import_extra_cmds.py +561 -0
- openstat/commands/influence_cmds.py +134 -0
- openstat/commands/iv_cmds.py +106 -0
- openstat/commands/manova_cmds.py +105 -0
- openstat/commands/mediate_cmds.py +233 -0
- openstat/commands/meta_cmds.py +284 -0
- openstat/commands/mi_cmds.py +228 -0
- openstat/commands/mixed_cmds.py +79 -0
- openstat/commands/mixture_changepoint_cmds.py +166 -0
- openstat/commands/ml_adv_cmds.py +147 -0
- openstat/commands/ml_cmds.py +178 -0
- openstat/commands/model_eval_cmds.py +142 -0
- openstat/commands/network_cmds.py +288 -0
- openstat/commands/nlquery_cmds.py +161 -0
- openstat/commands/nonparam_cmds.py +149 -0
- openstat/commands/outreg_cmds.py +247 -0
- openstat/commands/panel_cmds.py +141 -0
- openstat/commands/pdf_cmds.py +226 -0
- openstat/commands/pipeline_cmds.py +319 -0
- openstat/commands/plot_cmds.py +189 -0
- openstat/commands/plugin_cmds.py +79 -0
- openstat/commands/posthoc_cmds.py +153 -0
- openstat/commands/power_cmds.py +172 -0
- openstat/commands/profile_cmds.py +246 -0
- openstat/commands/rbridge_cmds.py +81 -0
- openstat/commands/regex_cmds.py +104 -0
- openstat/commands/report_cmds.py +48 -0
- openstat/commands/repro_cmds.py +129 -0
- openstat/commands/resampling_cmds.py +109 -0
- openstat/commands/reshape_cmds.py +223 -0
- openstat/commands/sem_cmds.py +177 -0
- openstat/commands/stat_cmds.py +1040 -0
- openstat/commands/stata_import_cmds.py +215 -0
- openstat/commands/string_cmds.py +124 -0
- openstat/commands/surv_cmds.py +145 -0
- openstat/commands/survey_cmds.py +153 -0
- openstat/commands/textanalysis_cmds.py +192 -0
- openstat/commands/ts_adv_cmds.py +136 -0
- openstat/commands/ts_cmds.py +195 -0
- openstat/commands/tui_cmds.py +111 -0
- openstat/commands/ux_cmds.py +191 -0
- openstat/commands/validate_cmds.py +270 -0
- openstat/commands/viz_adv_cmds.py +312 -0
- openstat/commands/viz_extra_cmds.py +251 -0
- openstat/commands/watch_cmds.py +69 -0
- openstat/config.py +106 -0
- openstat/dsl/__init__.py +0 -0
- openstat/dsl/parser.py +332 -0
- openstat/dsl/tokenizer.py +105 -0
- openstat/i18n.py +120 -0
- openstat/io/__init__.py +0 -0
- openstat/io/loader.py +187 -0
- openstat/jupyter/__init__.py +18 -0
- openstat/jupyter/display.py +18 -0
- openstat/jupyter/magic.py +60 -0
- openstat/logging_config.py +59 -0
- openstat/plots/__init__.py +0 -0
- openstat/plots/plotter.py +437 -0
- openstat/plots/surv_plots.py +32 -0
- openstat/plots/ts_plots.py +59 -0
- openstat/plugins/__init__.py +5 -0
- openstat/plugins/manager.py +69 -0
- openstat/repl.py +457 -0
- openstat/reporting/__init__.py +0 -0
- openstat/reporting/eda.py +208 -0
- openstat/reporting/report.py +67 -0
- openstat/script_runner.py +319 -0
- openstat/session.py +133 -0
- openstat/stats/__init__.py +0 -0
- openstat/stats/advanced_regression.py +269 -0
- openstat/stats/arch_garch.py +84 -0
- openstat/stats/bayesian.py +103 -0
- openstat/stats/causal.py +258 -0
- openstat/stats/clustering.py +206 -0
- openstat/stats/discrete.py +311 -0
- openstat/stats/epidemiology.py +119 -0
- openstat/stats/equiv_tobit.py +163 -0
- openstat/stats/factor.py +174 -0
- openstat/stats/imputation.py +282 -0
- openstat/stats/influence.py +78 -0
- openstat/stats/iv.py +131 -0
- openstat/stats/manova.py +124 -0
- openstat/stats/mixed.py +128 -0
- openstat/stats/ml.py +275 -0
- openstat/stats/ml_advanced.py +117 -0
- openstat/stats/model_eval.py +183 -0
- openstat/stats/models.py +1342 -0
- openstat/stats/nonparametric.py +130 -0
- openstat/stats/panel.py +179 -0
- openstat/stats/power.py +295 -0
- openstat/stats/resampling.py +203 -0
- openstat/stats/survey.py +213 -0
- openstat/stats/survival.py +196 -0
- openstat/stats/timeseries.py +142 -0
- openstat/stats/ts_advanced.py +114 -0
- openstat/types.py +11 -0
- openstat/web/__init__.py +1 -0
- openstat/web/app.py +117 -0
- openstat/web/session_manager.py +73 -0
- openstat/web/static/app.js +117 -0
- openstat/web/static/index.html +38 -0
- openstat/web/static/style.css +103 -0
- openstat_cli-1.0.0.dist-info/METADATA +748 -0
- openstat_cli-1.0.0.dist-info/RECORD +143 -0
- openstat_cli-1.0.0.dist-info/WHEEL +4 -0
- openstat_cli-1.0.0.dist-info/entry_points.txt +2 -0
- openstat_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""Instrumental variable commands: ivregress."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
from openstat.session import Session, ModelResult
|
|
8
|
+
from openstat.commands.base import command, CommandArgs, friendly_error
|
|
9
|
+
from openstat.dsl.parser import parse_formula, ParseError
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _parse_iv_formula(raw: str) -> tuple[str, list[str], list[str], list[str]]:
|
|
13
|
+
"""Parse IV formula: y ~ x1 (x_endog = z1 z2).
|
|
14
|
+
|
|
15
|
+
Returns (dep, exog_vars, endog_vars, instruments).
|
|
16
|
+
"""
|
|
17
|
+
m = re.search(r'\((.+?)\)', raw)
|
|
18
|
+
if not m:
|
|
19
|
+
raise ParseError("IV formula requires parenthesized instruments: (endogenous = instruments)")
|
|
20
|
+
|
|
21
|
+
inner = m.group(1)
|
|
22
|
+
if '=' not in inner:
|
|
23
|
+
raise ParseError("Instrument block must use '=': (x_endog = z1 z2)")
|
|
24
|
+
|
|
25
|
+
endog_part, instr_part = inner.split('=', 1)
|
|
26
|
+
endog_vars = endog_part.split()
|
|
27
|
+
instruments = instr_part.split()
|
|
28
|
+
|
|
29
|
+
if not endog_vars:
|
|
30
|
+
raise ParseError("No endogenous variables specified")
|
|
31
|
+
if not instruments:
|
|
32
|
+
raise ParseError("No instruments specified")
|
|
33
|
+
|
|
34
|
+
# Remove parenthetical from args, parse remaining as formula
|
|
35
|
+
clean = raw[:m.start()] + raw[m.end():]
|
|
36
|
+
clean = clean.strip()
|
|
37
|
+
if '~' in clean:
|
|
38
|
+
dep, exog_vars = parse_formula(clean)
|
|
39
|
+
else:
|
|
40
|
+
parts = clean.split()
|
|
41
|
+
if not parts:
|
|
42
|
+
raise ParseError("No dependent variable specified")
|
|
43
|
+
dep = parts[0]
|
|
44
|
+
exog_vars = parts[1:] if len(parts) > 1 else []
|
|
45
|
+
|
|
46
|
+
return dep, exog_vars, endog_vars, instruments
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@command("ivregress", usage="ivregress 2sls y ~ x1 (x_endog = z1 z2) [--robust]")
|
|
50
|
+
def cmd_ivregress(session: Session, args: str) -> str:
|
|
51
|
+
"""Fit instrumental variable regression via Two-Stage Least Squares."""
|
|
52
|
+
df = session.require_data()
|
|
53
|
+
ca = CommandArgs(args)
|
|
54
|
+
|
|
55
|
+
# First positional should be method (2sls)
|
|
56
|
+
if not ca.positional or ca.positional[0].lower() != "2sls":
|
|
57
|
+
return "Usage: ivregress 2sls y ~ x1 (x_endog = z1 z2) [--robust]"
|
|
58
|
+
|
|
59
|
+
robust = ca.has_flag("--robust")
|
|
60
|
+
formula_str = ca.strip_flags_and_options()
|
|
61
|
+
# Remove "2sls" prefix
|
|
62
|
+
formula_str = re.sub(r'^\s*2sls\s+', '', formula_str, flags=re.IGNORECASE).strip()
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
dep, exog, endog, instruments = _parse_iv_formula(formula_str)
|
|
66
|
+
except ParseError as e:
|
|
67
|
+
return f"Formula error: {e}"
|
|
68
|
+
|
|
69
|
+
try:
|
|
70
|
+
from openstat.stats.iv import fit_iv_2sls
|
|
71
|
+
|
|
72
|
+
result, raw = fit_iv_2sls(df, dep, exog, endog, instruments, robust=robust)
|
|
73
|
+
|
|
74
|
+
# Store in session
|
|
75
|
+
session._last_model = raw
|
|
76
|
+
session._last_model_vars = (dep, exog + endog)
|
|
77
|
+
session._last_fit_result = result
|
|
78
|
+
session._last_fit_kwargs = {"method": "2sls", "endog": endog, "instruments": instruments}
|
|
79
|
+
|
|
80
|
+
all_vars = result.indep_vars
|
|
81
|
+
md = result.to_markdown() if hasattr(result, "to_markdown") else ""
|
|
82
|
+
session.results.append(ModelResult(
|
|
83
|
+
name="IV-2SLS", formula=result.formula,
|
|
84
|
+
table=md, details={
|
|
85
|
+
"n_obs": result.n_obs,
|
|
86
|
+
"params": dict(result.params),
|
|
87
|
+
"r_squared": result.r_squared,
|
|
88
|
+
},
|
|
89
|
+
))
|
|
90
|
+
|
|
91
|
+
output = result.summary_table()
|
|
92
|
+
if result.warnings:
|
|
93
|
+
output += "\n" + "\n".join(result.warnings)
|
|
94
|
+
|
|
95
|
+
# Auto-show first-stage diagnostics
|
|
96
|
+
try:
|
|
97
|
+
from openstat.stats.iv import first_stage_diagnostics
|
|
98
|
+
output += "\n\n" + first_stage_diagnostics(raw)
|
|
99
|
+
except Exception:
|
|
100
|
+
pass
|
|
101
|
+
|
|
102
|
+
return output
|
|
103
|
+
except ImportError as e:
|
|
104
|
+
return str(e)
|
|
105
|
+
except Exception as e:
|
|
106
|
+
return friendly_error(e, "ivregress")
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""MANOVA and two-way ANOVA commands."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
from openstat.commands.base import command
|
|
8
|
+
from openstat.session import Session
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _stata_opts(raw: str) -> tuple[list[str], dict[str, str], set[str]]:
|
|
12
|
+
opts: dict[str, str] = {}
|
|
13
|
+
flags: set[str] = set()
|
|
14
|
+
for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
|
|
15
|
+
opts[m.group(1).lower()] = m.group(2)
|
|
16
|
+
rest = re.sub(r'\w+\([^)]*\)', '', raw)
|
|
17
|
+
positional = []
|
|
18
|
+
for tok in rest.split():
|
|
19
|
+
tok = tok.strip(',')
|
|
20
|
+
if not tok:
|
|
21
|
+
continue
|
|
22
|
+
if tok.startswith('--'):
|
|
23
|
+
flags.add(tok.lstrip('-').lower())
|
|
24
|
+
elif tok:
|
|
25
|
+
positional.append(tok)
|
|
26
|
+
return positional, opts, flags
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@command("anova2", usage="anova2 depvar factor1 factor2 [, --nointeraction]")
|
|
30
|
+
def cmd_anova2(session: Session, args: str) -> str:
|
|
31
|
+
"""Two-way ANOVA with optional interaction term."""
|
|
32
|
+
df = session.require_data()
|
|
33
|
+
positional, opts, flags = _stata_opts(args)
|
|
34
|
+
if len(positional) < 3:
|
|
35
|
+
return "Usage: anova2 depvar factor1 factor2 [, --nointeraction]"
|
|
36
|
+
|
|
37
|
+
dep = positional[0]
|
|
38
|
+
f1 = positional[1]
|
|
39
|
+
f2 = positional[2]
|
|
40
|
+
interaction = "nointeraction" not in flags
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
from openstat.stats.manova import twoway_anova
|
|
44
|
+
result = twoway_anova(df, dep, f1, f2, interaction=interaction)
|
|
45
|
+
except Exception as exc:
|
|
46
|
+
return f"anova2 error: {exc}"
|
|
47
|
+
|
|
48
|
+
lines = [f"\nTwo-way ANOVA: {dep} ~ {f1} + {f2}", "=" * 70]
|
|
49
|
+
lines.append(
|
|
50
|
+
f" {'Source':<35} {'df':>4} {'SS':>12} {'MS':>12} {'F':>8} {'p-value':>8}"
|
|
51
|
+
)
|
|
52
|
+
lines.append(" " + "-" * 66)
|
|
53
|
+
for row in result["table"]:
|
|
54
|
+
src = row["source"][:35]
|
|
55
|
+
f_str = f"{row['F']:>8.3f}" if not (row["F"] != row["F"]) else " ."
|
|
56
|
+
p_str = f"{row['p_value']:>8.4f}" if not (row["p_value"] != row["p_value"]) else " ."
|
|
57
|
+
ss = row["SS"] if row["SS"] == row["SS"] else float("nan")
|
|
58
|
+
ms = row["MS"] if row["MS"] == row["MS"] else float("nan")
|
|
59
|
+
ss_s = f"{ss:>12.4f}" if ss == ss else f"{'':>12}"
|
|
60
|
+
ms_s = f"{ms:>12.4f}" if ms == ms else f"{'':>12}"
|
|
61
|
+
lines.append(f" {src:<35} {row['df']:>4} {ss_s} {ms_s} {f_str} {p_str}")
|
|
62
|
+
lines.append("=" * 70)
|
|
63
|
+
lines.append(f" R² = {result['r_squared']:.4f} N = {result['n_obs']}")
|
|
64
|
+
return "\n".join(lines)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@command("manova", usage="manova depvar1 depvar2 ... = groupvar")
|
|
68
|
+
def cmd_manova(session: Session, args: str) -> str:
|
|
69
|
+
"""One-way MANOVA: test group differences on multiple outcomes."""
|
|
70
|
+
df = session.require_data()
|
|
71
|
+
# parse: "y1 y2 y3 = groupvar"
|
|
72
|
+
if "=" not in args:
|
|
73
|
+
return "Usage: manova depvar1 depvar2 ... = groupvar"
|
|
74
|
+
|
|
75
|
+
parts = args.split("=", 1)
|
|
76
|
+
dep_vars = [c.strip() for c in parts[0].split() if c.strip() in df.columns]
|
|
77
|
+
group = parts[1].strip()
|
|
78
|
+
|
|
79
|
+
if not dep_vars:
|
|
80
|
+
return "No valid dependent variables found."
|
|
81
|
+
if group not in df.columns:
|
|
82
|
+
return f"Group variable '{group}' not found."
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
from openstat.stats.manova import fit_manova
|
|
86
|
+
result = fit_manova(df, dep_vars, group)
|
|
87
|
+
except Exception as exc:
|
|
88
|
+
return f"manova error: {exc}"
|
|
89
|
+
|
|
90
|
+
lines = [
|
|
91
|
+
f"\nMANOVA: {', '.join(dep_vars)} ~ {group}",
|
|
92
|
+
f" N = {result['n_obs']}, Groups = {result['n_groups']}",
|
|
93
|
+
"=" * 75,
|
|
94
|
+
f" {'Effect':<20} {'Test':<20} {'Stat':>8} {'F':>8} {'Num df':>6} {'Den df':>6} {'p':>8}",
|
|
95
|
+
" " + "-" * 71,
|
|
96
|
+
]
|
|
97
|
+
for eff in result["effects"]:
|
|
98
|
+
p_str = f"{eff['p_value']:>8.4f}" if eff['p_value'] == eff['p_value'] else " ."
|
|
99
|
+
lines.append(
|
|
100
|
+
f" {eff['effect'][:20]:<20} {eff['test'][:20]:<20}"
|
|
101
|
+
f" {eff['statistic']:>8.4f} {eff['F']:>8.3f}"
|
|
102
|
+
f" {eff['num_df']:>6.1f} {eff['den_df']:>6.1f} {p_str}"
|
|
103
|
+
)
|
|
104
|
+
lines.append("=" * 75)
|
|
105
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""Mediation and moderated-mediation analysis commands."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from openstat.commands.base import command, CommandArgs, friendly_error
|
|
8
|
+
from openstat.session import Session
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _bootstrap_indirect(x, m, y, n_boot: int = 1000, seed: int | None = None):
|
|
12
|
+
"""Return bootstrap distribution of indirect effect a*b."""
|
|
13
|
+
rng = np.random.default_rng(seed)
|
|
14
|
+
n = len(x)
|
|
15
|
+
ab_boots = []
|
|
16
|
+
for _ in range(n_boot):
|
|
17
|
+
idx = rng.integers(0, n, size=n)
|
|
18
|
+
xb, mb, yb = x[idx], m[idx], y[idx]
|
|
19
|
+
# a path: m ~ x
|
|
20
|
+
xb_ = np.column_stack([np.ones(n), xb])
|
|
21
|
+
try:
|
|
22
|
+
a = np.linalg.lstsq(xb_, mb, rcond=None)[0][1]
|
|
23
|
+
# b path: y ~ x + m
|
|
24
|
+
xmb = np.column_stack([np.ones(n), xb, mb])
|
|
25
|
+
b = np.linalg.lstsq(xmb, yb, rcond=None)[0][2]
|
|
26
|
+
ab_boots.append(a * b)
|
|
27
|
+
except Exception:
|
|
28
|
+
continue
|
|
29
|
+
return np.array(ab_boots)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@command("mediate", usage="mediate <y> <m> <x> [--boot=1000] [--seed=N]")
|
|
33
|
+
def cmd_mediate(session: Session, args: str) -> str:
|
|
34
|
+
"""Baron-Kenny mediation analysis with bootstrap CI for indirect effect.
|
|
35
|
+
|
|
36
|
+
Tests whether m mediates the effect of x on y.
|
|
37
|
+
|
|
38
|
+
Paths:
|
|
39
|
+
a: x → m
|
|
40
|
+
b: m → y (controlling x)
|
|
41
|
+
c: x → y (total)
|
|
42
|
+
c': x → y (direct, controlling m)
|
|
43
|
+
indirect = a × b
|
|
44
|
+
|
|
45
|
+
Examples:
|
|
46
|
+
mediate income educ age
|
|
47
|
+
mediate y mediator x --boot=5000 --seed=42
|
|
48
|
+
"""
|
|
49
|
+
import polars as pl
|
|
50
|
+
ca = CommandArgs(args)
|
|
51
|
+
if len(ca.positional) < 3:
|
|
52
|
+
return "Usage: mediate <y> <m> <x> [--boot=1000] [--seed=N]"
|
|
53
|
+
|
|
54
|
+
y_col, m_col, x_col = ca.positional[0], ca.positional[1], ca.positional[2]
|
|
55
|
+
n_boot = int(ca.options.get("boot", 1000))
|
|
56
|
+
seed = int(ca.options["seed"]) if "seed" in ca.options else getattr(session, "_repro_seed", None)
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
df = session.require_data()
|
|
60
|
+
sub = df.select([y_col, m_col, x_col]).drop_nulls()
|
|
61
|
+
if sub.height < 10:
|
|
62
|
+
return "Need at least 10 complete cases."
|
|
63
|
+
|
|
64
|
+
y = sub[y_col].to_numpy().astype(float)
|
|
65
|
+
m = sub[m_col].to_numpy().astype(float)
|
|
66
|
+
x = sub[x_col].to_numpy().astype(float)
|
|
67
|
+
n = len(y)
|
|
68
|
+
ones = np.ones(n)
|
|
69
|
+
|
|
70
|
+
def ols(X, y_):
|
|
71
|
+
coef, _, _, _ = np.linalg.lstsq(X, y_, rcond=None)
|
|
72
|
+
y_hat = X @ coef
|
|
73
|
+
resid = y_ - y_hat
|
|
74
|
+
sigma2 = np.dot(resid, resid) / max(n - X.shape[1], 1)
|
|
75
|
+
cov = sigma2 * np.linalg.pinv(X.T @ X)
|
|
76
|
+
se = np.sqrt(np.diag(cov))
|
|
77
|
+
return coef, se
|
|
78
|
+
|
|
79
|
+
# a path: m ~ x
|
|
80
|
+
Xa = np.column_stack([ones, x])
|
|
81
|
+
a_coef, a_se = ols(Xa, m)
|
|
82
|
+
a, se_a = a_coef[1], a_se[1]
|
|
83
|
+
|
|
84
|
+
# b & c' paths: y ~ x + m
|
|
85
|
+
Xbc = np.column_stack([ones, x, m])
|
|
86
|
+
bc_coef, bc_se = ols(Xbc, y)
|
|
87
|
+
c_prime, se_cp = bc_coef[1], bc_se[1]
|
|
88
|
+
b, se_b = bc_coef[2], bc_se[2]
|
|
89
|
+
|
|
90
|
+
# c path: y ~ x (total)
|
|
91
|
+
Xc = np.column_stack([ones, x])
|
|
92
|
+
c_coef, c_se = ols(Xc, y)
|
|
93
|
+
c, se_c = c_coef[1], c_se[1]
|
|
94
|
+
|
|
95
|
+
indirect = a * b
|
|
96
|
+
|
|
97
|
+
# Bootstrap CI for indirect
|
|
98
|
+
ab_dist = _bootstrap_indirect(x, m, y, n_boot=n_boot, seed=seed)
|
|
99
|
+
ci_lo = float(np.percentile(ab_dist, 2.5))
|
|
100
|
+
ci_hi = float(np.percentile(ab_dist, 97.5))
|
|
101
|
+
|
|
102
|
+
# z-stats (paths a, b, c, c')
|
|
103
|
+
from scipy import stats as _st
|
|
104
|
+
def _p(coef, se): return 2 * _st.t.sf(abs(coef / se), df=n - 2) if se > 0 else float("nan")
|
|
105
|
+
|
|
106
|
+
mediated_pct = 100 * abs(indirect / c) if abs(c) > 1e-12 else float("nan")
|
|
107
|
+
|
|
108
|
+
lines = [
|
|
109
|
+
f"Mediation Analysis: {y_col} ~ {x_col} → {m_col} → {y_col}",
|
|
110
|
+
f"N = {n}",
|
|
111
|
+
"=" * 56,
|
|
112
|
+
f" {'Path':<20} {'Coef':>9} {'SE':>9} {'p':>9}",
|
|
113
|
+
"-" * 56,
|
|
114
|
+
f" {'a (x->m)':<20} {a:9.4f} {se_a:9.4f} {_p(a,se_a):9.4f}",
|
|
115
|
+
f" {'b (m->y|x)':<20} {b:9.4f} {se_b:9.4f} {_p(b,se_b):9.4f}",
|
|
116
|
+
f" {'c total (x->y)':<20} {c:9.4f} {se_c:9.4f} {_p(c,se_c):9.4f}",
|
|
117
|
+
" {:<20} {:9.4f} {:9.4f} {:9.4f}".format("c' direct(x->y|m)", c_prime, se_cp, _p(c_prime, se_cp)),
|
|
118
|
+
"=" * 56,
|
|
119
|
+
f" Indirect (a×b): {indirect:9.4f}",
|
|
120
|
+
f" Bootstrap 95% CI: [{ci_lo:.4f}, {ci_hi:.4f}] (B={n_boot})",
|
|
121
|
+
f" % Mediated: {mediated_pct:.1f}%" if not np.isnan(mediated_pct) else " % Mediated: N/A",
|
|
122
|
+
"",
|
|
123
|
+
"Mediation: " + ("YES — CI excludes 0" if ci_lo * ci_hi > 0 else "NOT significant (CI includes 0)"),
|
|
124
|
+
]
|
|
125
|
+
return "\n".join(lines)
|
|
126
|
+
|
|
127
|
+
except Exception as e:
|
|
128
|
+
return friendly_error(e, "mediate")
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@command("modmediate", usage="modmediate <y> <m> <x> <w> [--boot=1000]")
|
|
132
|
+
def cmd_modmediate(session: Session, args: str) -> str:
|
|
133
|
+
"""Moderated mediation (Hayes PROCESS Model 7 style).
|
|
134
|
+
|
|
135
|
+
Tests whether the indirect effect of x on y through m
|
|
136
|
+
is moderated by w (moderator of the a-path: x->m).
|
|
137
|
+
|
|
138
|
+
Index of Moderated Mediation (IMM) with bootstrap CI.
|
|
139
|
+
|
|
140
|
+
Examples:
|
|
141
|
+
modmediate outcome mediator predictor moderator --boot=2000
|
|
142
|
+
"""
|
|
143
|
+
import polars as pl
|
|
144
|
+
ca = CommandArgs(args)
|
|
145
|
+
if len(ca.positional) < 4:
|
|
146
|
+
return "Usage: modmediate <y> <m> <x> <w> [--boot=1000] [--seed=N]"
|
|
147
|
+
|
|
148
|
+
y_col, m_col, x_col, w_col = (ca.positional[i] for i in range(4))
|
|
149
|
+
n_boot = int(ca.options.get("boot", 1000))
|
|
150
|
+
seed = int(ca.options["seed"]) if "seed" in ca.options else getattr(session, "_repro_seed", None)
|
|
151
|
+
|
|
152
|
+
try:
|
|
153
|
+
df = session.require_data()
|
|
154
|
+
sub = df.select([y_col, m_col, x_col, w_col]).drop_nulls()
|
|
155
|
+
if sub.height < 20:
|
|
156
|
+
return "Need at least 20 complete cases."
|
|
157
|
+
|
|
158
|
+
y = sub[y_col].to_numpy().astype(float)
|
|
159
|
+
m = sub[m_col].to_numpy().astype(float)
|
|
160
|
+
x = sub[x_col].to_numpy().astype(float)
|
|
161
|
+
w = sub[w_col].to_numpy().astype(float)
|
|
162
|
+
n = len(y)
|
|
163
|
+
ones = np.ones(n)
|
|
164
|
+
|
|
165
|
+
# Standardise w for interaction
|
|
166
|
+
w_c = w - w.mean()
|
|
167
|
+
xw = x * w_c
|
|
168
|
+
|
|
169
|
+
def ols(X, y_):
|
|
170
|
+
coef, _, _, _ = np.linalg.lstsq(X, y_, rcond=None)
|
|
171
|
+
return coef
|
|
172
|
+
|
|
173
|
+
# a-path moderated: m ~ x + w + x*w
|
|
174
|
+
Xa = np.column_stack([ones, x, w_c, xw])
|
|
175
|
+
a_coef = ols(Xa, m)
|
|
176
|
+
a1, a3 = a_coef[1], a_coef[3] # a1=x coef, a3=interaction
|
|
177
|
+
|
|
178
|
+
# b-path: y ~ x + m
|
|
179
|
+
Xb = np.column_stack([ones, x, m])
|
|
180
|
+
b_coef = ols(Xb, y)
|
|
181
|
+
b = b_coef[2]
|
|
182
|
+
|
|
183
|
+
# Conditional indirect at w = mean ± 1SD
|
|
184
|
+
w_sd = w_c.std()
|
|
185
|
+
w_vals = {"Low (−1SD)": -w_sd, "Mean": 0.0, "High (+1SD)": w_sd}
|
|
186
|
+
|
|
187
|
+
def _boot_imm(rng):
|
|
188
|
+
idx = rng.integers(0, n, size=n)
|
|
189
|
+
xb, mb, yb, wb = x[idx], m[idx], y[idx], w_c[idx]
|
|
190
|
+
xwb = xb * wb
|
|
191
|
+
Xab = np.column_stack([np.ones(n), xb, wb, xwb])
|
|
192
|
+
try:
|
|
193
|
+
ac = ols(Xab, mb)
|
|
194
|
+
Xbb = np.column_stack([np.ones(n), xb, mb])
|
|
195
|
+
bc = ols(Xbb, yb)
|
|
196
|
+
return ac[3] * bc[2] # IMM = a3 * b
|
|
197
|
+
except Exception:
|
|
198
|
+
return np.nan
|
|
199
|
+
|
|
200
|
+
rng = np.random.default_rng(seed)
|
|
201
|
+
imm_boots = np.array([_boot_imm(rng) for _ in range(n_boot)])
|
|
202
|
+
imm_boots = imm_boots[~np.isnan(imm_boots)]
|
|
203
|
+
imm = a3 * b
|
|
204
|
+
ci_lo = float(np.percentile(imm_boots, 2.5))
|
|
205
|
+
ci_hi = float(np.percentile(imm_boots, 97.5))
|
|
206
|
+
|
|
207
|
+
lines = [
|
|
208
|
+
f"Moderated Mediation: {y_col} ~ {x_col}→{m_col}→{y_col}, moderated by {w_col}",
|
|
209
|
+
f"N = {n}",
|
|
210
|
+
"=" * 60,
|
|
211
|
+
"Conditional Indirect Effects (a×b at levels of moderator):",
|
|
212
|
+
f" {'Level':<15} {'a+a3*w':>10} {'Indirect':>10}",
|
|
213
|
+
"-" * 60,
|
|
214
|
+
]
|
|
215
|
+
for label, wv in w_vals.items():
|
|
216
|
+
cond_a = a1 + a3 * wv
|
|
217
|
+
indirect = cond_a * b
|
|
218
|
+
lines.append(f" {label:<15} {cond_a:10.4f} {indirect:10.4f}")
|
|
219
|
+
|
|
220
|
+
lines += [
|
|
221
|
+
"=" * 60,
|
|
222
|
+
f" Index of Moderated Mediation (a3×b): {imm:.4f}",
|
|
223
|
+
f" Bootstrap 95% CI: [{ci_lo:.4f}, {ci_hi:.4f}] (B={n_boot})",
|
|
224
|
+
"",
|
|
225
|
+
"Moderated mediation: " + (
|
|
226
|
+
"YES — IMM CI excludes 0" if ci_lo * ci_hi > 0
|
|
227
|
+
else "NOT significant (CI includes 0)"
|
|
228
|
+
),
|
|
229
|
+
]
|
|
230
|
+
return "\n".join(lines)
|
|
231
|
+
|
|
232
|
+
except Exception as e:
|
|
233
|
+
return friendly_error(e, "modmediate")
|