openstat-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. openstat/__init__.py +3 -0
  2. openstat/__main__.py +4 -0
  3. openstat/backends/__init__.py +16 -0
  4. openstat/backends/duckdb_backend.py +70 -0
  5. openstat/backends/polars_backend.py +52 -0
  6. openstat/cli.py +92 -0
  7. openstat/commands/__init__.py +82 -0
  8. openstat/commands/adv_stat_cmds.py +1255 -0
  9. openstat/commands/advanced_ml_cmds.py +576 -0
  10. openstat/commands/advreg_cmds.py +207 -0
  11. openstat/commands/alias_cmds.py +135 -0
  12. openstat/commands/arch_cmds.py +82 -0
  13. openstat/commands/arules_cmds.py +111 -0
  14. openstat/commands/automodel_cmds.py +212 -0
  15. openstat/commands/backend_cmds.py +82 -0
  16. openstat/commands/base.py +170 -0
  17. openstat/commands/bayes_cmds.py +71 -0
  18. openstat/commands/causal_cmds.py +269 -0
  19. openstat/commands/cluster_cmds.py +152 -0
  20. openstat/commands/data_cmds.py +996 -0
  21. openstat/commands/datamanip_cmds.py +672 -0
  22. openstat/commands/dataquality_cmds.py +174 -0
  23. openstat/commands/datetime_cmds.py +176 -0
  24. openstat/commands/dimreduce_cmds.py +184 -0
  25. openstat/commands/discrete_cmds.py +149 -0
  26. openstat/commands/dsl_cmds.py +143 -0
  27. openstat/commands/epi_cmds.py +93 -0
  28. openstat/commands/equiv_tobit_cmds.py +94 -0
  29. openstat/commands/esttab_cmds.py +196 -0
  30. openstat/commands/export_beamer_cmds.py +142 -0
  31. openstat/commands/export_cmds.py +201 -0
  32. openstat/commands/export_extra_cmds.py +240 -0
  33. openstat/commands/factor_cmds.py +180 -0
  34. openstat/commands/groupby_cmds.py +155 -0
  35. openstat/commands/help_cmds.py +237 -0
  36. openstat/commands/i18n_cmds.py +43 -0
  37. openstat/commands/import_extra_cmds.py +561 -0
  38. openstat/commands/influence_cmds.py +134 -0
  39. openstat/commands/iv_cmds.py +106 -0
  40. openstat/commands/manova_cmds.py +105 -0
  41. openstat/commands/mediate_cmds.py +233 -0
  42. openstat/commands/meta_cmds.py +284 -0
  43. openstat/commands/mi_cmds.py +228 -0
  44. openstat/commands/mixed_cmds.py +79 -0
  45. openstat/commands/mixture_changepoint_cmds.py +166 -0
  46. openstat/commands/ml_adv_cmds.py +147 -0
  47. openstat/commands/ml_cmds.py +178 -0
  48. openstat/commands/model_eval_cmds.py +142 -0
  49. openstat/commands/network_cmds.py +288 -0
  50. openstat/commands/nlquery_cmds.py +161 -0
  51. openstat/commands/nonparam_cmds.py +149 -0
  52. openstat/commands/outreg_cmds.py +247 -0
  53. openstat/commands/panel_cmds.py +141 -0
  54. openstat/commands/pdf_cmds.py +226 -0
  55. openstat/commands/pipeline_cmds.py +319 -0
  56. openstat/commands/plot_cmds.py +189 -0
  57. openstat/commands/plugin_cmds.py +79 -0
  58. openstat/commands/posthoc_cmds.py +153 -0
  59. openstat/commands/power_cmds.py +172 -0
  60. openstat/commands/profile_cmds.py +246 -0
  61. openstat/commands/rbridge_cmds.py +81 -0
  62. openstat/commands/regex_cmds.py +104 -0
  63. openstat/commands/report_cmds.py +48 -0
  64. openstat/commands/repro_cmds.py +129 -0
  65. openstat/commands/resampling_cmds.py +109 -0
  66. openstat/commands/reshape_cmds.py +223 -0
  67. openstat/commands/sem_cmds.py +177 -0
  68. openstat/commands/stat_cmds.py +1040 -0
  69. openstat/commands/stata_import_cmds.py +215 -0
  70. openstat/commands/string_cmds.py +124 -0
  71. openstat/commands/surv_cmds.py +145 -0
  72. openstat/commands/survey_cmds.py +153 -0
  73. openstat/commands/textanalysis_cmds.py +192 -0
  74. openstat/commands/ts_adv_cmds.py +136 -0
  75. openstat/commands/ts_cmds.py +195 -0
  76. openstat/commands/tui_cmds.py +111 -0
  77. openstat/commands/ux_cmds.py +191 -0
  78. openstat/commands/validate_cmds.py +270 -0
  79. openstat/commands/viz_adv_cmds.py +312 -0
  80. openstat/commands/viz_extra_cmds.py +251 -0
  81. openstat/commands/watch_cmds.py +69 -0
  82. openstat/config.py +106 -0
  83. openstat/dsl/__init__.py +0 -0
  84. openstat/dsl/parser.py +332 -0
  85. openstat/dsl/tokenizer.py +105 -0
  86. openstat/i18n.py +120 -0
  87. openstat/io/__init__.py +0 -0
  88. openstat/io/loader.py +187 -0
  89. openstat/jupyter/__init__.py +18 -0
  90. openstat/jupyter/display.py +18 -0
  91. openstat/jupyter/magic.py +60 -0
  92. openstat/logging_config.py +59 -0
  93. openstat/plots/__init__.py +0 -0
  94. openstat/plots/plotter.py +437 -0
  95. openstat/plots/surv_plots.py +32 -0
  96. openstat/plots/ts_plots.py +59 -0
  97. openstat/plugins/__init__.py +5 -0
  98. openstat/plugins/manager.py +69 -0
  99. openstat/repl.py +457 -0
  100. openstat/reporting/__init__.py +0 -0
  101. openstat/reporting/eda.py +208 -0
  102. openstat/reporting/report.py +67 -0
  103. openstat/script_runner.py +319 -0
  104. openstat/session.py +133 -0
  105. openstat/stats/__init__.py +0 -0
  106. openstat/stats/advanced_regression.py +269 -0
  107. openstat/stats/arch_garch.py +84 -0
  108. openstat/stats/bayesian.py +103 -0
  109. openstat/stats/causal.py +258 -0
  110. openstat/stats/clustering.py +206 -0
  111. openstat/stats/discrete.py +311 -0
  112. openstat/stats/epidemiology.py +119 -0
  113. openstat/stats/equiv_tobit.py +163 -0
  114. openstat/stats/factor.py +174 -0
  115. openstat/stats/imputation.py +282 -0
  116. openstat/stats/influence.py +78 -0
  117. openstat/stats/iv.py +131 -0
  118. openstat/stats/manova.py +124 -0
  119. openstat/stats/mixed.py +128 -0
  120. openstat/stats/ml.py +275 -0
  121. openstat/stats/ml_advanced.py +117 -0
  122. openstat/stats/model_eval.py +183 -0
  123. openstat/stats/models.py +1342 -0
  124. openstat/stats/nonparametric.py +130 -0
  125. openstat/stats/panel.py +179 -0
  126. openstat/stats/power.py +295 -0
  127. openstat/stats/resampling.py +203 -0
  128. openstat/stats/survey.py +213 -0
  129. openstat/stats/survival.py +196 -0
  130. openstat/stats/timeseries.py +142 -0
  131. openstat/stats/ts_advanced.py +114 -0
  132. openstat/types.py +11 -0
  133. openstat/web/__init__.py +1 -0
  134. openstat/web/app.py +117 -0
  135. openstat/web/session_manager.py +73 -0
  136. openstat/web/static/app.js +117 -0
  137. openstat/web/static/index.html +38 -0
  138. openstat/web/static/style.css +103 -0
  139. openstat_cli-1.0.0.dist-info/METADATA +748 -0
  140. openstat_cli-1.0.0.dist-info/RECORD +143 -0
  141. openstat_cli-1.0.0.dist-info/WHEEL +4 -0
  142. openstat_cli-1.0.0.dist-info/entry_points.txt +2 -0
  143. openstat_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,106 @@
1
+ """Instrumental variable commands: ivregress."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ from openstat.session import Session, ModelResult
8
+ from openstat.commands.base import command, CommandArgs, friendly_error
9
+ from openstat.dsl.parser import parse_formula, ParseError
10
+
11
+
12
+ def _parse_iv_formula(raw: str) -> tuple[str, list[str], list[str], list[str]]:
13
+ """Parse IV formula: y ~ x1 (x_endog = z1 z2).
14
+
15
+ Returns (dep, exog_vars, endog_vars, instruments).
16
+ """
17
+ m = re.search(r'\((.+?)\)', raw)
18
+ if not m:
19
+ raise ParseError("IV formula requires parenthesized instruments: (endogenous = instruments)")
20
+
21
+ inner = m.group(1)
22
+ if '=' not in inner:
23
+ raise ParseError("Instrument block must use '=': (x_endog = z1 z2)")
24
+
25
+ endog_part, instr_part = inner.split('=', 1)
26
+ endog_vars = endog_part.split()
27
+ instruments = instr_part.split()
28
+
29
+ if not endog_vars:
30
+ raise ParseError("No endogenous variables specified")
31
+ if not instruments:
32
+ raise ParseError("No instruments specified")
33
+
34
+ # Remove parenthetical from args, parse remaining as formula
35
+ clean = raw[:m.start()] + raw[m.end():]
36
+ clean = clean.strip()
37
+ if '~' in clean:
38
+ dep, exog_vars = parse_formula(clean)
39
+ else:
40
+ parts = clean.split()
41
+ if not parts:
42
+ raise ParseError("No dependent variable specified")
43
+ dep = parts[0]
44
+ exog_vars = parts[1:] if len(parts) > 1 else []
45
+
46
+ return dep, exog_vars, endog_vars, instruments
47
+
48
+
49
+ @command("ivregress", usage="ivregress 2sls y ~ x1 (x_endog = z1 z2) [--robust]")
50
+ def cmd_ivregress(session: Session, args: str) -> str:
51
+ """Fit instrumental variable regression via Two-Stage Least Squares."""
52
+ df = session.require_data()
53
+ ca = CommandArgs(args)
54
+
55
+ # First positional should be method (2sls)
56
+ if not ca.positional or ca.positional[0].lower() != "2sls":
57
+ return "Usage: ivregress 2sls y ~ x1 (x_endog = z1 z2) [--robust]"
58
+
59
+ robust = ca.has_flag("--robust")
60
+ formula_str = ca.strip_flags_and_options()
61
+ # Remove "2sls" prefix
62
+ formula_str = re.sub(r'^\s*2sls\s+', '', formula_str, flags=re.IGNORECASE).strip()
63
+
64
+ try:
65
+ dep, exog, endog, instruments = _parse_iv_formula(formula_str)
66
+ except ParseError as e:
67
+ return f"Formula error: {e}"
68
+
69
+ try:
70
+ from openstat.stats.iv import fit_iv_2sls
71
+
72
+ result, raw = fit_iv_2sls(df, dep, exog, endog, instruments, robust=robust)
73
+
74
+ # Store in session
75
+ session._last_model = raw
76
+ session._last_model_vars = (dep, exog + endog)
77
+ session._last_fit_result = result
78
+ session._last_fit_kwargs = {"method": "2sls", "endog": endog, "instruments": instruments}
79
+
80
+ all_vars = result.indep_vars
81
+ md = result.to_markdown() if hasattr(result, "to_markdown") else ""
82
+ session.results.append(ModelResult(
83
+ name="IV-2SLS", formula=result.formula,
84
+ table=md, details={
85
+ "n_obs": result.n_obs,
86
+ "params": dict(result.params),
87
+ "r_squared": result.r_squared,
88
+ },
89
+ ))
90
+
91
+ output = result.summary_table()
92
+ if result.warnings:
93
+ output += "\n" + "\n".join(result.warnings)
94
+
95
+ # Auto-show first-stage diagnostics
96
+ try:
97
+ from openstat.stats.iv import first_stage_diagnostics
98
+ output += "\n\n" + first_stage_diagnostics(raw)
99
+ except Exception:
100
+ pass
101
+
102
+ return output
103
+ except ImportError as e:
104
+ return str(e)
105
+ except Exception as e:
106
+ return friendly_error(e, "ivregress")
@@ -0,0 +1,105 @@
1
+ """MANOVA and two-way ANOVA commands."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ from openstat.commands.base import command
8
+ from openstat.session import Session
9
+
10
+
11
+ def _stata_opts(raw: str) -> tuple[list[str], dict[str, str], set[str]]:
12
+ opts: dict[str, str] = {}
13
+ flags: set[str] = set()
14
+ for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
15
+ opts[m.group(1).lower()] = m.group(2)
16
+ rest = re.sub(r'\w+\([^)]*\)', '', raw)
17
+ positional = []
18
+ for tok in rest.split():
19
+ tok = tok.strip(',')
20
+ if not tok:
21
+ continue
22
+ if tok.startswith('--'):
23
+ flags.add(tok.lstrip('-').lower())
24
+ elif tok:
25
+ positional.append(tok)
26
+ return positional, opts, flags
27
+
28
+
29
+ @command("anova2", usage="anova2 depvar factor1 factor2 [, --nointeraction]")
30
+ def cmd_anova2(session: Session, args: str) -> str:
31
+ """Two-way ANOVA with optional interaction term."""
32
+ df = session.require_data()
33
+ positional, opts, flags = _stata_opts(args)
34
+ if len(positional) < 3:
35
+ return "Usage: anova2 depvar factor1 factor2 [, --nointeraction]"
36
+
37
+ dep = positional[0]
38
+ f1 = positional[1]
39
+ f2 = positional[2]
40
+ interaction = "nointeraction" not in flags
41
+
42
+ try:
43
+ from openstat.stats.manova import twoway_anova
44
+ result = twoway_anova(df, dep, f1, f2, interaction=interaction)
45
+ except Exception as exc:
46
+ return f"anova2 error: {exc}"
47
+
48
+ lines = [f"\nTwo-way ANOVA: {dep} ~ {f1} + {f2}", "=" * 70]
49
+ lines.append(
50
+ f" {'Source':<35} {'df':>4} {'SS':>12} {'MS':>12} {'F':>8} {'p-value':>8}"
51
+ )
52
+ lines.append(" " + "-" * 66)
53
+ for row in result["table"]:
54
+ src = row["source"][:35]
55
+ f_str = f"{row['F']:>8.3f}" if not (row["F"] != row["F"]) else " ."
56
+ p_str = f"{row['p_value']:>8.4f}" if not (row["p_value"] != row["p_value"]) else " ."
57
+ ss = row["SS"] if row["SS"] == row["SS"] else float("nan")
58
+ ms = row["MS"] if row["MS"] == row["MS"] else float("nan")
59
+ ss_s = f"{ss:>12.4f}" if ss == ss else f"{'':>12}"
60
+ ms_s = f"{ms:>12.4f}" if ms == ms else f"{'':>12}"
61
+ lines.append(f" {src:<35} {row['df']:>4} {ss_s} {ms_s} {f_str} {p_str}")
62
+ lines.append("=" * 70)
63
+ lines.append(f" R² = {result['r_squared']:.4f} N = {result['n_obs']}")
64
+ return "\n".join(lines)
65
+
66
+
67
+ @command("manova", usage="manova depvar1 depvar2 ... = groupvar")
68
+ def cmd_manova(session: Session, args: str) -> str:
69
+ """One-way MANOVA: test group differences on multiple outcomes."""
70
+ df = session.require_data()
71
+ # parse: "y1 y2 y3 = groupvar"
72
+ if "=" not in args:
73
+ return "Usage: manova depvar1 depvar2 ... = groupvar"
74
+
75
+ parts = args.split("=", 1)
76
+ dep_vars = [c.strip() for c in parts[0].split() if c.strip() in df.columns]
77
+ group = parts[1].strip()
78
+
79
+ if not dep_vars:
80
+ return "No valid dependent variables found."
81
+ if group not in df.columns:
82
+ return f"Group variable '{group}' not found."
83
+
84
+ try:
85
+ from openstat.stats.manova import fit_manova
86
+ result = fit_manova(df, dep_vars, group)
87
+ except Exception as exc:
88
+ return f"manova error: {exc}"
89
+
90
+ lines = [
91
+ f"\nMANOVA: {', '.join(dep_vars)} ~ {group}",
92
+ f" N = {result['n_obs']}, Groups = {result['n_groups']}",
93
+ "=" * 75,
94
+ f" {'Effect':<20} {'Test':<20} {'Stat':>8} {'F':>8} {'Num df':>6} {'Den df':>6} {'p':>8}",
95
+ " " + "-" * 71,
96
+ ]
97
+ for eff in result["effects"]:
98
+ p_str = f"{eff['p_value']:>8.4f}" if eff['p_value'] == eff['p_value'] else " ."
99
+ lines.append(
100
+ f" {eff['effect'][:20]:<20} {eff['test'][:20]:<20}"
101
+ f" {eff['statistic']:>8.4f} {eff['F']:>8.3f}"
102
+ f" {eff['num_df']:>6.1f} {eff['den_df']:>6.1f} {p_str}"
103
+ )
104
+ lines.append("=" * 75)
105
+ return "\n".join(lines)
@@ -0,0 +1,233 @@
1
+ """Mediation and moderated-mediation analysis commands."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import numpy as np
6
+
7
+ from openstat.commands.base import command, CommandArgs, friendly_error
8
+ from openstat.session import Session
9
+
10
+
11
+ def _bootstrap_indirect(x, m, y, n_boot: int = 1000, seed: int | None = None):
12
+ """Return bootstrap distribution of indirect effect a*b."""
13
+ rng = np.random.default_rng(seed)
14
+ n = len(x)
15
+ ab_boots = []
16
+ for _ in range(n_boot):
17
+ idx = rng.integers(0, n, size=n)
18
+ xb, mb, yb = x[idx], m[idx], y[idx]
19
+ # a path: m ~ x
20
+ xb_ = np.column_stack([np.ones(n), xb])
21
+ try:
22
+ a = np.linalg.lstsq(xb_, mb, rcond=None)[0][1]
23
+ # b path: y ~ x + m
24
+ xmb = np.column_stack([np.ones(n), xb, mb])
25
+ b = np.linalg.lstsq(xmb, yb, rcond=None)[0][2]
26
+ ab_boots.append(a * b)
27
+ except Exception:
28
+ continue
29
+ return np.array(ab_boots)
30
+
31
+
32
+ @command("mediate", usage="mediate <y> <m> <x> [--boot=1000] [--seed=N]")
33
+ def cmd_mediate(session: Session, args: str) -> str:
34
+ """Baron-Kenny mediation analysis with bootstrap CI for indirect effect.
35
+
36
+ Tests whether m mediates the effect of x on y.
37
+
38
+ Paths:
39
+ a: x → m
40
+ b: m → y (controlling x)
41
+ c: x → y (total)
42
+ c': x → y (direct, controlling m)
43
+ indirect = a × b
44
+
45
+ Examples:
46
+ mediate income educ age
47
+ mediate y mediator x --boot=5000 --seed=42
48
+ """
49
+ import polars as pl
50
+ ca = CommandArgs(args)
51
+ if len(ca.positional) < 3:
52
+ return "Usage: mediate <y> <m> <x> [--boot=1000] [--seed=N]"
53
+
54
+ y_col, m_col, x_col = ca.positional[0], ca.positional[1], ca.positional[2]
55
+ n_boot = int(ca.options.get("boot", 1000))
56
+ seed = int(ca.options["seed"]) if "seed" in ca.options else getattr(session, "_repro_seed", None)
57
+
58
+ try:
59
+ df = session.require_data()
60
+ sub = df.select([y_col, m_col, x_col]).drop_nulls()
61
+ if sub.height < 10:
62
+ return "Need at least 10 complete cases."
63
+
64
+ y = sub[y_col].to_numpy().astype(float)
65
+ m = sub[m_col].to_numpy().astype(float)
66
+ x = sub[x_col].to_numpy().astype(float)
67
+ n = len(y)
68
+ ones = np.ones(n)
69
+
70
+ def ols(X, y_):
71
+ coef, _, _, _ = np.linalg.lstsq(X, y_, rcond=None)
72
+ y_hat = X @ coef
73
+ resid = y_ - y_hat
74
+ sigma2 = np.dot(resid, resid) / max(n - X.shape[1], 1)
75
+ cov = sigma2 * np.linalg.pinv(X.T @ X)
76
+ se = np.sqrt(np.diag(cov))
77
+ return coef, se
78
+
79
+ # a path: m ~ x
80
+ Xa = np.column_stack([ones, x])
81
+ a_coef, a_se = ols(Xa, m)
82
+ a, se_a = a_coef[1], a_se[1]
83
+
84
+ # b & c' paths: y ~ x + m
85
+ Xbc = np.column_stack([ones, x, m])
86
+ bc_coef, bc_se = ols(Xbc, y)
87
+ c_prime, se_cp = bc_coef[1], bc_se[1]
88
+ b, se_b = bc_coef[2], bc_se[2]
89
+
90
+ # c path: y ~ x (total)
91
+ Xc = np.column_stack([ones, x])
92
+ c_coef, c_se = ols(Xc, y)
93
+ c, se_c = c_coef[1], c_se[1]
94
+
95
+ indirect = a * b
96
+
97
+ # Bootstrap CI for indirect
98
+ ab_dist = _bootstrap_indirect(x, m, y, n_boot=n_boot, seed=seed)
99
+ ci_lo = float(np.percentile(ab_dist, 2.5))
100
+ ci_hi = float(np.percentile(ab_dist, 97.5))
101
+
102
+ # z-stats (paths a, b, c, c')
103
+ from scipy import stats as _st
104
+ def _p(coef, se): return 2 * _st.t.sf(abs(coef / se), df=n - 2) if se > 0 else float("nan")
105
+
106
+ mediated_pct = 100 * abs(indirect / c) if abs(c) > 1e-12 else float("nan")
107
+
108
+ lines = [
109
+ f"Mediation Analysis: {y_col} ~ {x_col} → {m_col} → {y_col}",
110
+ f"N = {n}",
111
+ "=" * 56,
112
+ f" {'Path':<20} {'Coef':>9} {'SE':>9} {'p':>9}",
113
+ "-" * 56,
114
+ f" {'a (x->m)':<20} {a:9.4f} {se_a:9.4f} {_p(a,se_a):9.4f}",
115
+ f" {'b (m->y|x)':<20} {b:9.4f} {se_b:9.4f} {_p(b,se_b):9.4f}",
116
+ f" {'c total (x->y)':<20} {c:9.4f} {se_c:9.4f} {_p(c,se_c):9.4f}",
117
+ " {:<20} {:9.4f} {:9.4f} {:9.4f}".format("c' direct(x->y|m)", c_prime, se_cp, _p(c_prime, se_cp)),
118
+ "=" * 56,
119
+ f" Indirect (a×b): {indirect:9.4f}",
120
+ f" Bootstrap 95% CI: [{ci_lo:.4f}, {ci_hi:.4f}] (B={n_boot})",
121
+ f" % Mediated: {mediated_pct:.1f}%" if not np.isnan(mediated_pct) else " % Mediated: N/A",
122
+ "",
123
+ "Mediation: " + ("YES — CI excludes 0" if ci_lo * ci_hi > 0 else "NOT significant (CI includes 0)"),
124
+ ]
125
+ return "\n".join(lines)
126
+
127
+ except Exception as e:
128
+ return friendly_error(e, "mediate")
129
+
130
+
131
+ @command("modmediate", usage="modmediate <y> <m> <x> <w> [--boot=1000]")
132
+ def cmd_modmediate(session: Session, args: str) -> str:
133
+ """Moderated mediation (Hayes PROCESS Model 7 style).
134
+
135
+ Tests whether the indirect effect of x on y through m
136
+ is moderated by w (moderator of the a-path: x->m).
137
+
138
+ Index of Moderated Mediation (IMM) with bootstrap CI.
139
+
140
+ Examples:
141
+ modmediate outcome mediator predictor moderator --boot=2000
142
+ """
143
+ import polars as pl
144
+ ca = CommandArgs(args)
145
+ if len(ca.positional) < 4:
146
+ return "Usage: modmediate <y> <m> <x> <w> [--boot=1000] [--seed=N]"
147
+
148
+ y_col, m_col, x_col, w_col = (ca.positional[i] for i in range(4))
149
+ n_boot = int(ca.options.get("boot", 1000))
150
+ seed = int(ca.options["seed"]) if "seed" in ca.options else getattr(session, "_repro_seed", None)
151
+
152
+ try:
153
+ df = session.require_data()
154
+ sub = df.select([y_col, m_col, x_col, w_col]).drop_nulls()
155
+ if sub.height < 20:
156
+ return "Need at least 20 complete cases."
157
+
158
+ y = sub[y_col].to_numpy().astype(float)
159
+ m = sub[m_col].to_numpy().astype(float)
160
+ x = sub[x_col].to_numpy().astype(float)
161
+ w = sub[w_col].to_numpy().astype(float)
162
+ n = len(y)
163
+ ones = np.ones(n)
164
+
165
+ # Standardise w for interaction
166
+ w_c = w - w.mean()
167
+ xw = x * w_c
168
+
169
+ def ols(X, y_):
170
+ coef, _, _, _ = np.linalg.lstsq(X, y_, rcond=None)
171
+ return coef
172
+
173
+ # a-path moderated: m ~ x + w + x*w
174
+ Xa = np.column_stack([ones, x, w_c, xw])
175
+ a_coef = ols(Xa, m)
176
+ a1, a3 = a_coef[1], a_coef[3] # a1=x coef, a3=interaction
177
+
178
+ # b-path: y ~ x + m
179
+ Xb = np.column_stack([ones, x, m])
180
+ b_coef = ols(Xb, y)
181
+ b = b_coef[2]
182
+
183
+ # Conditional indirect at w = mean ± 1SD
184
+ w_sd = w_c.std()
185
+ w_vals = {"Low (−1SD)": -w_sd, "Mean": 0.0, "High (+1SD)": w_sd}
186
+
187
+ def _boot_imm(rng):
188
+ idx = rng.integers(0, n, size=n)
189
+ xb, mb, yb, wb = x[idx], m[idx], y[idx], w_c[idx]
190
+ xwb = xb * wb
191
+ Xab = np.column_stack([np.ones(n), xb, wb, xwb])
192
+ try:
193
+ ac = ols(Xab, mb)
194
+ Xbb = np.column_stack([np.ones(n), xb, mb])
195
+ bc = ols(Xbb, yb)
196
+ return ac[3] * bc[2] # IMM = a3 * b
197
+ except Exception:
198
+ return np.nan
199
+
200
+ rng = np.random.default_rng(seed)
201
+ imm_boots = np.array([_boot_imm(rng) for _ in range(n_boot)])
202
+ imm_boots = imm_boots[~np.isnan(imm_boots)]
203
+ imm = a3 * b
204
+ ci_lo = float(np.percentile(imm_boots, 2.5))
205
+ ci_hi = float(np.percentile(imm_boots, 97.5))
206
+
207
+ lines = [
208
+ f"Moderated Mediation: {y_col} ~ {x_col}→{m_col}→{y_col}, moderated by {w_col}",
209
+ f"N = {n}",
210
+ "=" * 60,
211
+ "Conditional Indirect Effects (a×b at levels of moderator):",
212
+ f" {'Level':<15} {'a+a3*w':>10} {'Indirect':>10}",
213
+ "-" * 60,
214
+ ]
215
+ for label, wv in w_vals.items():
216
+ cond_a = a1 + a3 * wv
217
+ indirect = cond_a * b
218
+ lines.append(f" {label:<15} {cond_a:10.4f} {indirect:10.4f}")
219
+
220
+ lines += [
221
+ "=" * 60,
222
+ f" Index of Moderated Mediation (a3×b): {imm:.4f}",
223
+ f" Bootstrap 95% CI: [{ci_lo:.4f}, {ci_hi:.4f}] (B={n_boot})",
224
+ "",
225
+ "Moderated mediation: " + (
226
+ "YES — IMM CI excludes 0" if ci_lo * ci_hi > 0
227
+ else "NOT significant (CI includes 0)"
228
+ ),
229
+ ]
230
+ return "\n".join(lines)
231
+
232
+ except Exception as e:
233
+ return friendly_error(e, "modmediate")