openstat-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. openstat/__init__.py +3 -0
  2. openstat/__main__.py +4 -0
  3. openstat/backends/__init__.py +16 -0
  4. openstat/backends/duckdb_backend.py +70 -0
  5. openstat/backends/polars_backend.py +52 -0
  6. openstat/cli.py +92 -0
  7. openstat/commands/__init__.py +82 -0
  8. openstat/commands/adv_stat_cmds.py +1255 -0
  9. openstat/commands/advanced_ml_cmds.py +576 -0
  10. openstat/commands/advreg_cmds.py +207 -0
  11. openstat/commands/alias_cmds.py +135 -0
  12. openstat/commands/arch_cmds.py +82 -0
  13. openstat/commands/arules_cmds.py +111 -0
  14. openstat/commands/automodel_cmds.py +212 -0
  15. openstat/commands/backend_cmds.py +82 -0
  16. openstat/commands/base.py +170 -0
  17. openstat/commands/bayes_cmds.py +71 -0
  18. openstat/commands/causal_cmds.py +269 -0
  19. openstat/commands/cluster_cmds.py +152 -0
  20. openstat/commands/data_cmds.py +996 -0
  21. openstat/commands/datamanip_cmds.py +672 -0
  22. openstat/commands/dataquality_cmds.py +174 -0
  23. openstat/commands/datetime_cmds.py +176 -0
  24. openstat/commands/dimreduce_cmds.py +184 -0
  25. openstat/commands/discrete_cmds.py +149 -0
  26. openstat/commands/dsl_cmds.py +143 -0
  27. openstat/commands/epi_cmds.py +93 -0
  28. openstat/commands/equiv_tobit_cmds.py +94 -0
  29. openstat/commands/esttab_cmds.py +196 -0
  30. openstat/commands/export_beamer_cmds.py +142 -0
  31. openstat/commands/export_cmds.py +201 -0
  32. openstat/commands/export_extra_cmds.py +240 -0
  33. openstat/commands/factor_cmds.py +180 -0
  34. openstat/commands/groupby_cmds.py +155 -0
  35. openstat/commands/help_cmds.py +237 -0
  36. openstat/commands/i18n_cmds.py +43 -0
  37. openstat/commands/import_extra_cmds.py +561 -0
  38. openstat/commands/influence_cmds.py +134 -0
  39. openstat/commands/iv_cmds.py +106 -0
  40. openstat/commands/manova_cmds.py +105 -0
  41. openstat/commands/mediate_cmds.py +233 -0
  42. openstat/commands/meta_cmds.py +284 -0
  43. openstat/commands/mi_cmds.py +228 -0
  44. openstat/commands/mixed_cmds.py +79 -0
  45. openstat/commands/mixture_changepoint_cmds.py +166 -0
  46. openstat/commands/ml_adv_cmds.py +147 -0
  47. openstat/commands/ml_cmds.py +178 -0
  48. openstat/commands/model_eval_cmds.py +142 -0
  49. openstat/commands/network_cmds.py +288 -0
  50. openstat/commands/nlquery_cmds.py +161 -0
  51. openstat/commands/nonparam_cmds.py +149 -0
  52. openstat/commands/outreg_cmds.py +247 -0
  53. openstat/commands/panel_cmds.py +141 -0
  54. openstat/commands/pdf_cmds.py +226 -0
  55. openstat/commands/pipeline_cmds.py +319 -0
  56. openstat/commands/plot_cmds.py +189 -0
  57. openstat/commands/plugin_cmds.py +79 -0
  58. openstat/commands/posthoc_cmds.py +153 -0
  59. openstat/commands/power_cmds.py +172 -0
  60. openstat/commands/profile_cmds.py +246 -0
  61. openstat/commands/rbridge_cmds.py +81 -0
  62. openstat/commands/regex_cmds.py +104 -0
  63. openstat/commands/report_cmds.py +48 -0
  64. openstat/commands/repro_cmds.py +129 -0
  65. openstat/commands/resampling_cmds.py +109 -0
  66. openstat/commands/reshape_cmds.py +223 -0
  67. openstat/commands/sem_cmds.py +177 -0
  68. openstat/commands/stat_cmds.py +1040 -0
  69. openstat/commands/stata_import_cmds.py +215 -0
  70. openstat/commands/string_cmds.py +124 -0
  71. openstat/commands/surv_cmds.py +145 -0
  72. openstat/commands/survey_cmds.py +153 -0
  73. openstat/commands/textanalysis_cmds.py +192 -0
  74. openstat/commands/ts_adv_cmds.py +136 -0
  75. openstat/commands/ts_cmds.py +195 -0
  76. openstat/commands/tui_cmds.py +111 -0
  77. openstat/commands/ux_cmds.py +191 -0
  78. openstat/commands/validate_cmds.py +270 -0
  79. openstat/commands/viz_adv_cmds.py +312 -0
  80. openstat/commands/viz_extra_cmds.py +251 -0
  81. openstat/commands/watch_cmds.py +69 -0
  82. openstat/config.py +106 -0
  83. openstat/dsl/__init__.py +0 -0
  84. openstat/dsl/parser.py +332 -0
  85. openstat/dsl/tokenizer.py +105 -0
  86. openstat/i18n.py +120 -0
  87. openstat/io/__init__.py +0 -0
  88. openstat/io/loader.py +187 -0
  89. openstat/jupyter/__init__.py +18 -0
  90. openstat/jupyter/display.py +18 -0
  91. openstat/jupyter/magic.py +60 -0
  92. openstat/logging_config.py +59 -0
  93. openstat/plots/__init__.py +0 -0
  94. openstat/plots/plotter.py +437 -0
  95. openstat/plots/surv_plots.py +32 -0
  96. openstat/plots/ts_plots.py +59 -0
  97. openstat/plugins/__init__.py +5 -0
  98. openstat/plugins/manager.py +69 -0
  99. openstat/repl.py +457 -0
  100. openstat/reporting/__init__.py +0 -0
  101. openstat/reporting/eda.py +208 -0
  102. openstat/reporting/report.py +67 -0
  103. openstat/script_runner.py +319 -0
  104. openstat/session.py +133 -0
  105. openstat/stats/__init__.py +0 -0
  106. openstat/stats/advanced_regression.py +269 -0
  107. openstat/stats/arch_garch.py +84 -0
  108. openstat/stats/bayesian.py +103 -0
  109. openstat/stats/causal.py +258 -0
  110. openstat/stats/clustering.py +206 -0
  111. openstat/stats/discrete.py +311 -0
  112. openstat/stats/epidemiology.py +119 -0
  113. openstat/stats/equiv_tobit.py +163 -0
  114. openstat/stats/factor.py +174 -0
  115. openstat/stats/imputation.py +282 -0
  116. openstat/stats/influence.py +78 -0
  117. openstat/stats/iv.py +131 -0
  118. openstat/stats/manova.py +124 -0
  119. openstat/stats/mixed.py +128 -0
  120. openstat/stats/ml.py +275 -0
  121. openstat/stats/ml_advanced.py +117 -0
  122. openstat/stats/model_eval.py +183 -0
  123. openstat/stats/models.py +1342 -0
  124. openstat/stats/nonparametric.py +130 -0
  125. openstat/stats/panel.py +179 -0
  126. openstat/stats/power.py +295 -0
  127. openstat/stats/resampling.py +203 -0
  128. openstat/stats/survey.py +213 -0
  129. openstat/stats/survival.py +196 -0
  130. openstat/stats/timeseries.py +142 -0
  131. openstat/stats/ts_advanced.py +114 -0
  132. openstat/types.py +11 -0
  133. openstat/web/__init__.py +1 -0
  134. openstat/web/app.py +117 -0
  135. openstat/web/session_manager.py +73 -0
  136. openstat/web/static/app.js +117 -0
  137. openstat/web/static/index.html +38 -0
  138. openstat/web/static/style.css +103 -0
  139. openstat_cli-1.0.0.dist-info/METADATA +748 -0
  140. openstat_cli-1.0.0.dist-info/RECORD +143 -0
  141. openstat_cli-1.0.0.dist-info/WHEEL +4 -0
  142. openstat_cli-1.0.0.dist-info/entry_points.txt +2 -0
  143. openstat_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,143 @@
1
+ """DSL commands: local, global, forval, foreach, assert, display."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ from openstat.commands.base import command
8
+ from openstat.session import Session
9
+
10
+
11
+ def _stata_opts(raw: str) -> tuple[list[str], dict[str, str]]:
12
+ opts: dict[str, str] = {}
13
+ for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
14
+ opts[m.group(1).lower()] = m.group(2)
15
+ rest = re.sub(r'\w+\([^)]*\)', '', raw)
16
+ positional = [t.strip(',') for t in rest.split() if t.strip(',')]
17
+ return positional, opts
18
+
19
+
20
+ @command("local", usage="local name value")
21
+ def cmd_local(session: Session, args: str) -> str:
22
+ """Define a local macro variable."""
23
+ parts = args.strip().split(None, 1)
24
+ if len(parts) < 2:
25
+ return "Usage: local name value"
26
+ name, value = parts[0], parts[1]
27
+ if not hasattr(session, "_locals"):
28
+ session._locals = {}
29
+ session._locals[name] = value
30
+ return f"local `{name}' = {value}"
31
+
32
+
33
+ @command("global", usage="global name value")
34
+ def cmd_global(session: Session, args: str) -> str:
35
+ """Define a global macro variable."""
36
+ parts = args.strip().split(None, 1)
37
+ if len(parts) < 2:
38
+ return "Usage: global name value"
39
+ name, value = parts[0], parts[1]
40
+ if not hasattr(session, "_globals"):
41
+ session._globals = {}
42
+ session._globals[name] = value
43
+ return f"global ${name} = {value}"
44
+
45
+
46
+ @command("display", usage="display expression_or_text")
47
+ def cmd_display(session: Session, args: str) -> str:
48
+ """Display text or evaluate a simple numeric expression."""
49
+ text = args.strip().strip('"').strip("'")
50
+ # Substitute local macros `name'
51
+ if hasattr(session, "_locals"):
52
+ for k, v in session._locals.items():
53
+ text = text.replace(f"`{k}'", v)
54
+ # Substitute global macros $name
55
+ if hasattr(session, "_globals"):
56
+ for k, v in session._globals.items():
57
+ text = text.replace(f"${k}", v)
58
+ # Try simple arithmetic evaluation
59
+ try:
60
+ result = eval(text, {"__builtins__": {}}) # noqa: S307
61
+ return str(result)
62
+ except Exception:
63
+ return text
64
+
65
+
66
+ @command("assert", usage="assert condition_description [var op value]")
67
+ def cmd_assert(session: Session, args: str) -> str:
68
+ """Assert that a condition holds in the data. Returns pass/fail."""
69
+ df = session.require_data()
70
+ positional, opts = _stata_opts(args)
71
+ # Simple form: assert var op value (e.g., assert age > 0)
72
+ # Use polars expression
73
+ expr_str = args.strip()
74
+ # Try to parse var op value
75
+ m = re.match(r'(\w+)\s*(==|!=|>=|<=|>|<)\s*(.+)', expr_str)
76
+ if not m:
77
+ return f"assert syntax: varname op value (e.g., assert age > 0)"
78
+ var, op, val_str = m.group(1), m.group(2), m.group(3).strip()
79
+ if var not in df.columns:
80
+ return f"Column '{var}' not found."
81
+ try:
82
+ val = float(val_str)
83
+ import polars as pl
84
+ ops = {"==": pl.col(var) == val, "!=": pl.col(var) != val,
85
+ ">": pl.col(var) > val, "<": pl.col(var) < val,
86
+ ">=": pl.col(var) >= val, "<=": pl.col(var) <= val}
87
+ mask = ops[op]
88
+ n_fail = int(df.filter(~mask).height)
89
+ if n_fail == 0:
90
+ return f"Assertion passed: {var} {op} {val} holds for all {df.height} observations."
91
+ else:
92
+ return f"Assertion FAILED: {n_fail} of {df.height} observations violate {var} {op} {val}."
93
+ except Exception as exc:
94
+ return f"assert error: {exc}"
95
+
96
+
97
+ @command("forval", usage="forval i=start/end : command args")
98
+ def cmd_forval(session: Session, args: str) -> str:
99
+ """Execute a command for each value in a range. forval i=1/5 : display `i'"""
100
+ m = re.match(r'(\w+)\s*=\s*(\d+)\s*/\s*(\d+)\s*:\s*(.+)', args.strip())
101
+ if not m:
102
+ return "Usage: forval i=start/end : command args"
103
+ var, start, end, cmd_str = m.group(1), int(m.group(2)), int(m.group(3)), m.group(4).strip()
104
+ from openstat.commands.base import run_command
105
+ outputs = []
106
+ for i in range(start, end + 1):
107
+ if not hasattr(session, "_locals"):
108
+ session._locals = {}
109
+ session._locals[var] = str(i)
110
+ expanded = cmd_str.replace(f"`{var}'", str(i))
111
+ try:
112
+ out = run_command(session, expanded)
113
+ if out:
114
+ outputs.append(out)
115
+ except Exception as exc:
116
+ outputs.append(f"forval error at i={i}: {exc}")
117
+ break
118
+ return "\n".join(outputs) if outputs else f"forval completed {end - start + 1} iterations."
119
+
120
+
121
+ @command("foreach", usage="foreach var in list : command")
122
+ def cmd_foreach(session: Session, args: str) -> str:
123
+ """Execute a command for each item in a list."""
124
+ m = re.match(r'(\w+)\s+in\s+(.+?)\s*:\s*(.+)', args.strip())
125
+ if not m:
126
+ return "Usage: foreach var in item1 item2 ... : command"
127
+ var, items_str, cmd_str = m.group(1), m.group(2).strip(), m.group(3).strip()
128
+ items = items_str.split()
129
+ from openstat.commands.base import run_command
130
+ outputs = []
131
+ for item in items:
132
+ if not hasattr(session, "_locals"):
133
+ session._locals = {}
134
+ session._locals[var] = item
135
+ expanded = cmd_str.replace(f"`{var}'", item)
136
+ try:
137
+ out = run_command(session, expanded)
138
+ if out:
139
+ outputs.append(out)
140
+ except Exception as exc:
141
+ outputs.append(f"foreach error at {var}={item}: {exc}")
142
+ break
143
+ return "\n".join(outputs) if outputs else f"foreach completed {len(items)} iterations."
@@ -0,0 +1,93 @@
1
+ """Epidemiology commands: cs (cohort study), cc (case-control), ir (incidence rate)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ from openstat.commands.base import command
8
+ from openstat.session import Session
9
+
10
+
11
+ def _stata_opts(raw: str) -> tuple[list[str], dict[str, str]]:
12
+ opts: dict[str, str] = {}
13
+ for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
14
+ opts[m.group(1).lower()] = m.group(2)
15
+ rest = re.sub(r'\w+\([^)]*\)', '', raw)
16
+ positional = [t.strip(',') for t in rest.split() if t.strip(',')]
17
+ return positional, opts
18
+
19
+
20
+ def _fmt_epi(r: dict) -> str:
21
+ lines = [f"\n{r.get('test', 'Result')}", "=" * 50]
22
+ skip = {"test", "table_2x2", "_model"}
23
+ for k, v in r.items():
24
+ if k in skip:
25
+ continue
26
+ if isinstance(v, float):
27
+ lines.append(f" {k:<30} {v:.4f}")
28
+ else:
29
+ lines.append(f" {k:<30} {v}")
30
+ t = r.get("table_2x2")
31
+ if t:
32
+ lines.append("\n 2x2 Table:")
33
+ lines.append(f" {'':15} Exposed Unexposed")
34
+ lines.append(f" {'Cases':15} {t['a']:>8} {t['b']:>8}")
35
+ lines.append(f" {'Non-cases':15} {t['c']:>8} {t['d']:>8}")
36
+ return "\n".join(lines)
37
+
38
+
39
+ @command("cs", usage="cs outcome exposure")
40
+ def cmd_cs(session: Session, args: str) -> str:
41
+ """Cohort study analysis: risk ratio, ARR, NNT."""
42
+ from openstat.stats.epidemiology import cohort_study
43
+ df = session.require_data()
44
+ positional, opts = _stata_opts(args)
45
+ if len(positional) < 2:
46
+ return "Usage: cs outcome exposure"
47
+ outcome, exposure = positional[0], positional[1]
48
+ for v in (outcome, exposure):
49
+ if v not in df.columns:
50
+ return f"Column '{v}' not found."
51
+ try:
52
+ r = cohort_study(df, outcome, exposure)
53
+ return _fmt_epi(r)
54
+ except Exception as exc:
55
+ return f"cs error: {exc}"
56
+
57
+
58
+ @command("cc", usage="cc outcome exposure")
59
+ def cmd_cc(session: Session, args: str) -> str:
60
+ """Case-control analysis: odds ratio with 95% CI."""
61
+ from openstat.stats.epidemiology import case_control
62
+ df = session.require_data()
63
+ positional, opts = _stata_opts(args)
64
+ if len(positional) < 2:
65
+ return "Usage: cc outcome exposure"
66
+ outcome, exposure = positional[0], positional[1]
67
+ for v in (outcome, exposure):
68
+ if v not in df.columns:
69
+ return f"Column '{v}' not found."
70
+ try:
71
+ r = case_control(df, outcome, exposure)
72
+ return _fmt_epi(r)
73
+ except Exception as exc:
74
+ return f"cc error: {exc}"
75
+
76
+
77
+ @command("ir", usage="ir outcome person_time_var")
78
+ def cmd_ir(session: Session, args: str) -> str:
79
+ """Incidence rate analysis."""
80
+ from openstat.stats.epidemiology import incidence_rate
81
+ df = session.require_data()
82
+ positional, opts = _stata_opts(args)
83
+ if len(positional) < 2:
84
+ return "Usage: ir outcome person_time_var"
85
+ outcome, pt_var = positional[0], positional[1]
86
+ for v in (outcome, pt_var):
87
+ if v not in df.columns:
88
+ return f"Column '{v}' not found."
89
+ try:
90
+ r = incidence_rate(df, outcome, pt_var)
91
+ return _fmt_epi(r)
92
+ except Exception as exc:
93
+ return f"ir error: {exc}"
@@ -0,0 +1,94 @@
1
+ """Equivalence test and Tobit commands."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ from openstat.commands.base import command
8
+ from openstat.session import Session
9
+
10
+
11
+ def _stata_opts(raw: str) -> tuple[list[str], dict[str, str]]:
12
+ opts: dict[str, str] = {}
13
+ for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
14
+ opts[m.group(1).lower()] = m.group(2)
15
+ rest = re.sub(r'\w+\([^)]*\)', '', raw)
16
+ positional = [t.strip(',') for t in rest.split() if t.strip(',')]
17
+ return positional, opts
18
+
19
+
20
+ def _fmt(r: dict) -> str:
21
+ lines = [f"\n{r.get('test', 'Result')}", "=" * 55]
22
+ skip = {"test", "groups", "_model"}
23
+ for k, v in r.items():
24
+ if k in skip:
25
+ continue
26
+ if isinstance(v, float):
27
+ lines.append(f" {k:<35} {v:.6f}")
28
+ elif isinstance(v, list):
29
+ lines.append(f" {k:<35} {v}")
30
+ else:
31
+ lines.append(f" {k:<35} {v}")
32
+ lines.append("=" * 55)
33
+ return "\n".join(lines)
34
+
35
+
36
+ @command("tost", usage="tost var [by(group)] [mu(0) delta(0.5) alpha(0.05)]")
37
+ def cmd_tost(session: Session, args: str) -> str:
38
+ """Two One-Sided Tests (TOST) for equivalence."""
39
+ from openstat.stats.equiv_tobit import tost_onemean, tost_twomeans
40
+ df = session.require_data()
41
+ positional, opts = _stata_opts(args)
42
+ if not positional:
43
+ return "Usage: tost var [by(group)] [mu(0) delta(0.5) alpha(0.05)]"
44
+ col = positional[0]
45
+ if col not in df.columns:
46
+ return f"Column '{col}' not found."
47
+ by = opts.get("by")
48
+ delta = float(opts.get("delta", 0.5))
49
+ alpha = float(opts.get("alpha", 0.05))
50
+ try:
51
+ if by:
52
+ if by not in df.columns:
53
+ return f"Group column '{by}' not found."
54
+ r = tost_twomeans(df, col, by, delta=delta, alpha=alpha)
55
+ else:
56
+ mu = float(opts.get("mu", 0.0))
57
+ r = tost_onemean(df, col, mu=mu, delta=delta, alpha=alpha)
58
+ return _fmt(r)
59
+ except Exception as exc:
60
+ return f"tost error: {exc}"
61
+
62
+
63
+ @command("tobit", usage="tobit dep var1 var2 ... [ll(0) ul(none)]")
64
+ def cmd_tobit(session: Session, args: str) -> str:
65
+ """Tobit regression for censored outcomes."""
66
+ from openstat.stats.equiv_tobit import fit_tobit
67
+ df = session.require_data()
68
+ positional, opts = _stata_opts(args)
69
+ if len(positional) < 2:
70
+ return "Usage: tobit dep var1 [var2 ...] [ll(0) ul(none)]"
71
+ dep = positional[0]
72
+ indeps = [c for c in positional[1:] if c in df.columns]
73
+ if dep not in df.columns:
74
+ return f"Column '{dep}' not found."
75
+ left = float(opts["ll"]) if "ll" in opts else 0.0
76
+ right = float(opts["ul"]) if "ul" in opts else None
77
+ try:
78
+ r = fit_tobit(df, dep, indeps, left=left, right=right)
79
+ session._last_model = r
80
+ lines = ["\nTobit Regression", "=" * 55]
81
+ lines.append(f" {'Dep. Variable':<30} {dep}")
82
+ lines.append(f" {'N obs':<30} {r['n_obs']}")
83
+ lines.append(f" {'Left censoring':<30} {r['left_censoring']} (n={r['n_censored_left']})")
84
+ lines.append(f" {'Right censoring':<30} {r['right_censoring']} (n={r['n_censored_right']})")
85
+ lines.append(f" {'Log-likelihood':<30} {r['log_likelihood']:.4f}")
86
+ lines.append(f" {'AIC':<30} {r['aic']:.4f}")
87
+ lines.append(f" {'Sigma':<30} {r['sigma']:.4f}")
88
+ lines.append(f"\n {'Variable':<25} {'Coef':>10}")
89
+ lines.append(" " + "-" * 37)
90
+ for nm, coef in r["params"].items():
91
+ lines.append(f" {nm:<25} {coef:>10.4f}")
92
+ return "\n".join(lines)
93
+ except Exception as exc:
94
+ return f"tobit error: {exc}"
@@ -0,0 +1,196 @@
1
+ """esttab and tabstat commands for multi-model comparison tables."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ import polars as pl
8
+
9
+ from openstat.commands.base import command
10
+ from openstat.session import Session
11
+
12
+
13
+ def _stata_opts(raw: str) -> tuple[list[str], dict[str, str]]:
14
+ opts: dict[str, str] = {}
15
+ for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
16
+ opts[m.group(1).lower()] = m.group(2)
17
+ rest = re.sub(r'\w+\([^)]*\)', '', raw)
18
+ positional = [t.strip(',') for t in rest.split() if t.strip(',')]
19
+ return positional, opts
20
+
21
+
22
+ @command("esttab", usage="esttab [stats(coef,se,pval)] [stars]")
23
+ def cmd_esttab(session: Session, args: str) -> str:
24
+ """Display a publication-style comparison table of all stored regression results."""
25
+ positional, opts = _stata_opts(args)
26
+ stats_req = opts.get("stats", "coef,se").split(",")
27
+ show_stars = "stars" in args
28
+
29
+ raw_results = session.results
30
+ if not raw_results:
31
+ return "No stored results. Run regression commands first."
32
+
33
+ # Normalize: ModelResult objects → dict via .details
34
+ results = []
35
+ for res in raw_results:
36
+ if isinstance(res, dict):
37
+ results.append(res)
38
+ elif hasattr(res, "details"):
39
+ results.append(res.details)
40
+
41
+ # Collect all parameter names across models
42
+ all_params: list[str] = []
43
+ for res in results:
44
+ if "params" in res:
45
+ for p in res["params"]:
46
+ if p not in all_params:
47
+ all_params.append(p)
48
+ elif "coefficients" in res:
49
+ for p in res["coefficients"]:
50
+ if p not in all_params:
51
+ all_params.append(p)
52
+
53
+ if not all_params:
54
+ return "No regression results with coefficients found."
55
+
56
+ col_w = 14
57
+
58
+ def _get_coef(res, param):
59
+ if "coefficients" in res:
60
+ c = res["coefficients"].get(param, {})
61
+ return c.get("mean", float("nan"))
62
+ if "params" in res:
63
+ return res["params"].get(param, float("nan"))
64
+ return float("nan")
65
+
66
+ def _get_se(res, param):
67
+ if "coefficients" in res:
68
+ c = res["coefficients"].get(param, {})
69
+ return c.get("std", float("nan"))
70
+ if "std_errors" in res:
71
+ return res["std_errors"].get(param, float("nan"))
72
+ return float("nan")
73
+
74
+ def _get_pval(res, param):
75
+ if "p_values" in res:
76
+ return res["p_values"].get(param, float("nan"))
77
+ if "coefficients" in res:
78
+ c = res["coefficients"].get(param, {})
79
+ return c.get("prob_positive", float("nan"))
80
+ return float("nan")
81
+
82
+ def _stars(p):
83
+ if p != p: return ""
84
+ if p < 0.001: return "***"
85
+ if p < 0.01: return "**"
86
+ if p < 0.05: return "*"
87
+ return ""
88
+
89
+ model_list = [r for r in results if "params" in r or "coefficients" in r]
90
+ header = f"{'':25}" + "".join(f" {'('+str(i+1)+')':>{col_w}}" for i in range(len(model_list)))
91
+ sep = "-" * (25 + (col_w + 2) * len(model_list))
92
+ lines = ["\nesttab — Regression Comparison", sep, header, sep]
93
+
94
+ for param in all_params:
95
+ coef_row = f"{param:<25}"
96
+ se_row = f"{'':25}"
97
+ for res in model_list:
98
+ coef = _get_coef(res, param)
99
+ se = _get_se(res, param)
100
+ pval = _get_pval(res, param)
101
+ stars = _stars(pval) if show_stars else ""
102
+ if coef != coef:
103
+ coef_row += f" {'':>{col_w}}"
104
+ se_row += f" {'':>{col_w}}"
105
+ else:
106
+ coef_str = f"{coef:.4f}{stars}"
107
+ coef_row += f" {coef_str:>{col_w}}"
108
+ se_str = f"({se:.4f})" if se == se else ""
109
+ se_row += f" {se_str:>{col_w}}"
110
+ lines.append(coef_row)
111
+ if "se" in stats_req:
112
+ lines.append(se_row)
113
+
114
+ lines.append(sep)
115
+ # Model-level stats
116
+ n_row = f"{'N':25}"
117
+ r2_row = f"{'R-squared':25}"
118
+ for res in model_list:
119
+ n = res.get("n_obs", "")
120
+ r2 = res.get("r_squared", res.get("pseudo_r2", ""))
121
+ n_row += f" {str(n):>{col_w}}"
122
+ r2_row += f" {(f'{r2:.4f}' if isinstance(r2, float) else ''):>{col_w}}"
123
+ lines.append(n_row)
124
+ lines.append(r2_row)
125
+ lines.append(sep)
126
+ if show_stars:
127
+ lines.append("* p<0.05 ** p<0.01 *** p<0.001")
128
+ return "\n".join(lines)
129
+
130
+
131
+ @command("tabstat", usage="tabstat var1 [var2 ...] [, stats(mean sd min max n) by(groupvar)]")
132
+ def cmd_tabstat(session: Session, args: str) -> str:
133
+ """Display summary statistics table (enhanced version of summarize)."""
134
+ df = session.require_data()
135
+ positional, opts = _stata_opts(args)
136
+ cols = [c for c in positional if c in df.columns]
137
+ if not cols:
138
+ return "No valid numeric variables found."
139
+ stats_req = [s.strip() for s in opts.get("stats", "mean,sd,min,max,n").split(",")]
140
+ by_raw = opts.get("by", "")
141
+ by_var = by_raw.strip() if by_raw.strip() in df.columns else None
142
+
143
+ def _compute_stats(series: pl.Series, stats: list[str]) -> dict:
144
+ res = {}
145
+ if "n" in stats: res["N"] = series.drop_nulls().len()
146
+ if "mean" in stats: res["Mean"] = float(series.mean()) if series.len() else float("nan")
147
+ if "sd" in stats: res["Std Dev"] = float(series.std()) if series.len() > 1 else float("nan")
148
+ if "min" in stats: res["Min"] = float(series.min()) if series.len() else float("nan")
149
+ if "max" in stats: res["Max"] = float(series.max()) if series.len() else float("nan")
150
+ if "median" in stats or "p50" in stats: res["Median"] = float(series.median()) if series.len() else float("nan")
151
+ if "sum" in stats: res["Sum"] = float(series.sum()) if series.len() else float("nan")
152
+ if "var" in stats: res["Variance"] = float(series.var()) if series.len() > 1 else float("nan")
153
+ return res
154
+
155
+ stat_labels = []
156
+ for s in stats_req:
157
+ lbl = {"n": "N", "mean": "Mean", "sd": "Std Dev", "min": "Min", "max": "Max",
158
+ "median": "Median", "p50": "Median", "sum": "Sum", "var": "Variance"}.get(s, s)
159
+ if lbl not in stat_labels:
160
+ stat_labels.append(lbl)
161
+
162
+ col_w = 12
163
+ lines = ["\ntabstat", "=" * (22 + col_w * len(stat_labels))]
164
+ header = f"{'Variable':<20}" + "".join(f" {s:>{col_w}}" for s in stat_labels)
165
+ lines.append(header)
166
+ lines.append("-" * (22 + col_w * len(stat_labels)))
167
+
168
+ def _add_rows(data: pl.DataFrame, prefix: str = ""):
169
+ for col in cols:
170
+ try:
171
+ s = data[col].cast(pl.Float64)
172
+ except Exception:
173
+ continue
174
+ stat_vals = _compute_stats(s, stats_req)
175
+ row = f"{prefix + col:<20}"
176
+ for lbl in stat_labels:
177
+ val = stat_vals.get(lbl, float("nan"))
178
+ if isinstance(val, float) and val != val:
179
+ row += f" {'':>{col_w}}"
180
+ elif isinstance(val, int):
181
+ row += f" {val:>{col_w}}"
182
+ else:
183
+ row += f" {val:>{col_w}.4f}"
184
+ lines.append(row)
185
+
186
+ if by_var:
187
+ groups = df[by_var].unique().sort().to_list()
188
+ for g in groups:
189
+ lines.append(f"\n {by_var} = {g}")
190
+ lines.append("-" * (22 + col_w * len(stat_labels)))
191
+ _add_rows(df.filter(pl.col(by_var) == g), " ")
192
+ else:
193
+ _add_rows(df)
194
+
195
+ lines.append("=" * (22 + col_w * len(stat_labels)))
196
+ return "\n".join(lines)
@@ -0,0 +1,142 @@
1
+ """Export results as LaTeX Beamer presentation."""
2
+
3
+ from __future__ import annotations
4
+ import os
5
+ from datetime import date
6
+ from pathlib import Path
7
+
8
+ from openstat.commands.base import command, CommandArgs, friendly_error
9
+ from openstat.session import Session
10
+
11
+
12
+ @command("export beamer", usage="export beamer [path] [--title=...] [--author=...] [--theme=Madrid]")
13
+ def cmd_export_beamer(session: Session, args: str) -> str:
14
+ """Export analysis results as a LaTeX Beamer presentation (.tex).
15
+
16
+ Generates a slide deck with dataset summary, model results,
17
+ and references to saved plots.
18
+
19
+ Options:
20
+ --title=<txt> presentation title (default: 'OpenStat Analysis')
21
+ --author=<txt> author name
22
+ --theme=<t> Beamer theme (default: Madrid)
23
+ --colortheme=<t> Beamer colour theme (default: beaver)
24
+ --out=<path> output .tex path
25
+
26
+ Examples:
27
+ export beamer
28
+ export beamer results/slides.tex --title="Income Analysis" --author="J. Smith"
29
+ export beamer --theme=Berlin --colortheme=whale
30
+ """
31
+ ca = CommandArgs(args)
32
+ out_path = (
33
+ ca.options.get("out")
34
+ or (ca.positional[0] if ca.positional else None)
35
+ or "outputs/presentation.tex"
36
+ )
37
+ title = ca.options.get("title", "OpenStat Analysis")
38
+ author = ca.options.get("author", "OpenStat")
39
+ theme = ca.options.get("theme", "Madrid")
40
+ color_theme = ca.options.get("colortheme", "beaver")
41
+
42
+ try:
43
+ lines = []
44
+
45
+ def L(s=""):
46
+ lines.append(s)
47
+
48
+ L(r"\documentclass{beamer}")
49
+ L(r"\usetheme{" + theme + "}")
50
+ L(r"\usecolortheme{" + color_theme + "}")
51
+ L(r"\usepackage{booktabs}")
52
+ L(r"\usepackage{graphicx}")
53
+ L(r"\usepackage{amsmath}")
54
+ L()
55
+ L(r"\title{" + title.replace("_", r"\_") + "}")
56
+ L(r"\author{" + author.replace("_", r"\_") + "}")
57
+ L(r"\date{" + date.today().isoformat() + "}")
58
+ L()
59
+ L(r"\begin{document}")
60
+ L()
61
+ L(r"\begin{frame}")
62
+ L(r" \titlepage")
63
+ L(r"\end{frame}")
64
+ L()
65
+
66
+ # Dataset overview slide
67
+ ds_name = (session.dataset_name or "Unknown").replace("_", r"\_")
68
+ shape_str = session.shape_str
69
+ L(r"\begin{frame}{Dataset Overview}")
70
+ L(r" \begin{itemize}")
71
+ L(r" \item \textbf{Dataset:} " + ds_name)
72
+ L(r" \item \textbf{Shape:} " + shape_str)
73
+ L(r" \item \textbf{Date:} " + date.today().isoformat())
74
+ L(r" \end{itemize}")
75
+ L(r"\end{frame}")
76
+ L()
77
+
78
+ # Model result slides
79
+ for mr in session.results:
80
+ model_title = f"{mr.name} — {mr.formula}"[:60].replace("_", r"\_")
81
+ L(r"\begin{frame}{" + model_title + "}")
82
+ L(r" \scriptsize")
83
+ L(r" \begin{verbatim}")
84
+ # Truncate table to fit slide
85
+ table_lines = mr.table.split("\n")[:25]
86
+ for tl in table_lines:
87
+ L(" " + tl[:80])
88
+ L(r" \end{verbatim}")
89
+ # Model stats
90
+ d = mr.details
91
+ stats_parts = []
92
+ if d.get("n"):
93
+ stats_parts.append(f"N={d['n']}")
94
+ if d.get("r2") is not None:
95
+ stats_parts.append(f"R²={d['r2']:.3f}")
96
+ if d.get("aic") is not None:
97
+ stats_parts.append(f"AIC={d['aic']:.1f}")
98
+ if stats_parts:
99
+ L(r" \medskip")
100
+ L(r" \normalsize " + " \\quad ".join(stats_parts))
101
+ L(r"\end{frame}")
102
+ L()
103
+
104
+ # Plot slides
105
+ for plot_path in session.plot_paths:
106
+ if os.path.exists(plot_path):
107
+ safe_path = plot_path.replace("\\", "/").replace("_", r"\_")
108
+ base = os.path.basename(plot_path).replace("_", r"\_")
109
+ raw_path = plot_path.replace("\\", "/")
110
+ L(r"\begin{frame}{" + base + "}")
111
+ L(r" \centering")
112
+ L(r" \includegraphics[width=0.85\textwidth]{" + raw_path + "}")
113
+ L(r"\end{frame}")
114
+ L()
115
+
116
+ # Commands history slide
117
+ if session.history:
118
+ L(r"\begin{frame}[fragile]{Command History}")
119
+ L(r" \scriptsize")
120
+ L(r" \begin{verbatim}")
121
+ for h in session.history[-15:]:
122
+ L(" " + h[:80])
123
+ L(r" \end{verbatim}")
124
+ L(r"\end{frame}")
125
+ L()
126
+
127
+ L(r"\end{document}")
128
+ L()
129
+
130
+ tex_content = "\n".join(lines)
131
+ Path(out_path).parent.mkdir(parents=True, exist_ok=True)
132
+ Path(out_path).write_text(tex_content, encoding="utf-8")
133
+
134
+ abs_path = os.path.abspath(out_path)
135
+ n_slides = tex_content.count(r"\begin{frame}")
136
+ return (
137
+ f"LaTeX Beamer presentation saved: {abs_path}\n"
138
+ f" Slides: {n_slides} Theme: {theme}/{color_theme}\n"
139
+ f" Compile: pdflatex {out_path}"
140
+ )
141
+ except Exception as e:
142
+ return friendly_error(e, "export beamer")