openstat-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstat/__init__.py +3 -0
- openstat/__main__.py +4 -0
- openstat/backends/__init__.py +16 -0
- openstat/backends/duckdb_backend.py +70 -0
- openstat/backends/polars_backend.py +52 -0
- openstat/cli.py +92 -0
- openstat/commands/__init__.py +82 -0
- openstat/commands/adv_stat_cmds.py +1255 -0
- openstat/commands/advanced_ml_cmds.py +576 -0
- openstat/commands/advreg_cmds.py +207 -0
- openstat/commands/alias_cmds.py +135 -0
- openstat/commands/arch_cmds.py +82 -0
- openstat/commands/arules_cmds.py +111 -0
- openstat/commands/automodel_cmds.py +212 -0
- openstat/commands/backend_cmds.py +82 -0
- openstat/commands/base.py +170 -0
- openstat/commands/bayes_cmds.py +71 -0
- openstat/commands/causal_cmds.py +269 -0
- openstat/commands/cluster_cmds.py +152 -0
- openstat/commands/data_cmds.py +996 -0
- openstat/commands/datamanip_cmds.py +672 -0
- openstat/commands/dataquality_cmds.py +174 -0
- openstat/commands/datetime_cmds.py +176 -0
- openstat/commands/dimreduce_cmds.py +184 -0
- openstat/commands/discrete_cmds.py +149 -0
- openstat/commands/dsl_cmds.py +143 -0
- openstat/commands/epi_cmds.py +93 -0
- openstat/commands/equiv_tobit_cmds.py +94 -0
- openstat/commands/esttab_cmds.py +196 -0
- openstat/commands/export_beamer_cmds.py +142 -0
- openstat/commands/export_cmds.py +201 -0
- openstat/commands/export_extra_cmds.py +240 -0
- openstat/commands/factor_cmds.py +180 -0
- openstat/commands/groupby_cmds.py +155 -0
- openstat/commands/help_cmds.py +237 -0
- openstat/commands/i18n_cmds.py +43 -0
- openstat/commands/import_extra_cmds.py +561 -0
- openstat/commands/influence_cmds.py +134 -0
- openstat/commands/iv_cmds.py +106 -0
- openstat/commands/manova_cmds.py +105 -0
- openstat/commands/mediate_cmds.py +233 -0
- openstat/commands/meta_cmds.py +284 -0
- openstat/commands/mi_cmds.py +228 -0
- openstat/commands/mixed_cmds.py +79 -0
- openstat/commands/mixture_changepoint_cmds.py +166 -0
- openstat/commands/ml_adv_cmds.py +147 -0
- openstat/commands/ml_cmds.py +178 -0
- openstat/commands/model_eval_cmds.py +142 -0
- openstat/commands/network_cmds.py +288 -0
- openstat/commands/nlquery_cmds.py +161 -0
- openstat/commands/nonparam_cmds.py +149 -0
- openstat/commands/outreg_cmds.py +247 -0
- openstat/commands/panel_cmds.py +141 -0
- openstat/commands/pdf_cmds.py +226 -0
- openstat/commands/pipeline_cmds.py +319 -0
- openstat/commands/plot_cmds.py +189 -0
- openstat/commands/plugin_cmds.py +79 -0
- openstat/commands/posthoc_cmds.py +153 -0
- openstat/commands/power_cmds.py +172 -0
- openstat/commands/profile_cmds.py +246 -0
- openstat/commands/rbridge_cmds.py +81 -0
- openstat/commands/regex_cmds.py +104 -0
- openstat/commands/report_cmds.py +48 -0
- openstat/commands/repro_cmds.py +129 -0
- openstat/commands/resampling_cmds.py +109 -0
- openstat/commands/reshape_cmds.py +223 -0
- openstat/commands/sem_cmds.py +177 -0
- openstat/commands/stat_cmds.py +1040 -0
- openstat/commands/stata_import_cmds.py +215 -0
- openstat/commands/string_cmds.py +124 -0
- openstat/commands/surv_cmds.py +145 -0
- openstat/commands/survey_cmds.py +153 -0
- openstat/commands/textanalysis_cmds.py +192 -0
- openstat/commands/ts_adv_cmds.py +136 -0
- openstat/commands/ts_cmds.py +195 -0
- openstat/commands/tui_cmds.py +111 -0
- openstat/commands/ux_cmds.py +191 -0
- openstat/commands/validate_cmds.py +270 -0
- openstat/commands/viz_adv_cmds.py +312 -0
- openstat/commands/viz_extra_cmds.py +251 -0
- openstat/commands/watch_cmds.py +69 -0
- openstat/config.py +106 -0
- openstat/dsl/__init__.py +0 -0
- openstat/dsl/parser.py +332 -0
- openstat/dsl/tokenizer.py +105 -0
- openstat/i18n.py +120 -0
- openstat/io/__init__.py +0 -0
- openstat/io/loader.py +187 -0
- openstat/jupyter/__init__.py +18 -0
- openstat/jupyter/display.py +18 -0
- openstat/jupyter/magic.py +60 -0
- openstat/logging_config.py +59 -0
- openstat/plots/__init__.py +0 -0
- openstat/plots/plotter.py +437 -0
- openstat/plots/surv_plots.py +32 -0
- openstat/plots/ts_plots.py +59 -0
- openstat/plugins/__init__.py +5 -0
- openstat/plugins/manager.py +69 -0
- openstat/repl.py +457 -0
- openstat/reporting/__init__.py +0 -0
- openstat/reporting/eda.py +208 -0
- openstat/reporting/report.py +67 -0
- openstat/script_runner.py +319 -0
- openstat/session.py +133 -0
- openstat/stats/__init__.py +0 -0
- openstat/stats/advanced_regression.py +269 -0
- openstat/stats/arch_garch.py +84 -0
- openstat/stats/bayesian.py +103 -0
- openstat/stats/causal.py +258 -0
- openstat/stats/clustering.py +206 -0
- openstat/stats/discrete.py +311 -0
- openstat/stats/epidemiology.py +119 -0
- openstat/stats/equiv_tobit.py +163 -0
- openstat/stats/factor.py +174 -0
- openstat/stats/imputation.py +282 -0
- openstat/stats/influence.py +78 -0
- openstat/stats/iv.py +131 -0
- openstat/stats/manova.py +124 -0
- openstat/stats/mixed.py +128 -0
- openstat/stats/ml.py +275 -0
- openstat/stats/ml_advanced.py +117 -0
- openstat/stats/model_eval.py +183 -0
- openstat/stats/models.py +1342 -0
- openstat/stats/nonparametric.py +130 -0
- openstat/stats/panel.py +179 -0
- openstat/stats/power.py +295 -0
- openstat/stats/resampling.py +203 -0
- openstat/stats/survey.py +213 -0
- openstat/stats/survival.py +196 -0
- openstat/stats/timeseries.py +142 -0
- openstat/stats/ts_advanced.py +114 -0
- openstat/types.py +11 -0
- openstat/web/__init__.py +1 -0
- openstat/web/app.py +117 -0
- openstat/web/session_manager.py +73 -0
- openstat/web/static/app.js +117 -0
- openstat/web/static/index.html +38 -0
- openstat/web/static/style.css +103 -0
- openstat_cli-1.0.0.dist-info/METADATA +748 -0
- openstat_cli-1.0.0.dist-info/RECORD +143 -0
- openstat_cli-1.0.0.dist-info/WHEEL +4 -0
- openstat_cli-1.0.0.dist-info/entry_points.txt +2 -0
- openstat_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""DSL commands: local, global, forval, foreach, assert, display."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
from openstat.commands.base import command
|
|
8
|
+
from openstat.session import Session
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _stata_opts(raw: str) -> tuple[list[str], dict[str, str]]:
|
|
12
|
+
opts: dict[str, str] = {}
|
|
13
|
+
for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
|
|
14
|
+
opts[m.group(1).lower()] = m.group(2)
|
|
15
|
+
rest = re.sub(r'\w+\([^)]*\)', '', raw)
|
|
16
|
+
positional = [t.strip(',') for t in rest.split() if t.strip(',')]
|
|
17
|
+
return positional, opts
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@command("local", usage="local name value")
|
|
21
|
+
def cmd_local(session: Session, args: str) -> str:
|
|
22
|
+
"""Define a local macro variable."""
|
|
23
|
+
parts = args.strip().split(None, 1)
|
|
24
|
+
if len(parts) < 2:
|
|
25
|
+
return "Usage: local name value"
|
|
26
|
+
name, value = parts[0], parts[1]
|
|
27
|
+
if not hasattr(session, "_locals"):
|
|
28
|
+
session._locals = {}
|
|
29
|
+
session._locals[name] = value
|
|
30
|
+
return f"local `{name}' = {value}"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@command("global", usage="global name value")
|
|
34
|
+
def cmd_global(session: Session, args: str) -> str:
|
|
35
|
+
"""Define a global macro variable."""
|
|
36
|
+
parts = args.strip().split(None, 1)
|
|
37
|
+
if len(parts) < 2:
|
|
38
|
+
return "Usage: global name value"
|
|
39
|
+
name, value = parts[0], parts[1]
|
|
40
|
+
if not hasattr(session, "_globals"):
|
|
41
|
+
session._globals = {}
|
|
42
|
+
session._globals[name] = value
|
|
43
|
+
return f"global ${name} = {value}"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@command("display", usage="display expression_or_text")
|
|
47
|
+
def cmd_display(session: Session, args: str) -> str:
|
|
48
|
+
"""Display text or evaluate a simple numeric expression."""
|
|
49
|
+
text = args.strip().strip('"').strip("'")
|
|
50
|
+
# Substitute local macros `name'
|
|
51
|
+
if hasattr(session, "_locals"):
|
|
52
|
+
for k, v in session._locals.items():
|
|
53
|
+
text = text.replace(f"`{k}'", v)
|
|
54
|
+
# Substitute global macros $name
|
|
55
|
+
if hasattr(session, "_globals"):
|
|
56
|
+
for k, v in session._globals.items():
|
|
57
|
+
text = text.replace(f"${k}", v)
|
|
58
|
+
# Try simple arithmetic evaluation
|
|
59
|
+
try:
|
|
60
|
+
result = eval(text, {"__builtins__": {}}) # noqa: S307
|
|
61
|
+
return str(result)
|
|
62
|
+
except Exception:
|
|
63
|
+
return text
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@command("assert", usage="assert condition_description [var op value]")
|
|
67
|
+
def cmd_assert(session: Session, args: str) -> str:
|
|
68
|
+
"""Assert that a condition holds in the data. Returns pass/fail."""
|
|
69
|
+
df = session.require_data()
|
|
70
|
+
positional, opts = _stata_opts(args)
|
|
71
|
+
# Simple form: assert var op value (e.g., assert age > 0)
|
|
72
|
+
# Use polars expression
|
|
73
|
+
expr_str = args.strip()
|
|
74
|
+
# Try to parse var op value
|
|
75
|
+
m = re.match(r'(\w+)\s*(==|!=|>=|<=|>|<)\s*(.+)', expr_str)
|
|
76
|
+
if not m:
|
|
77
|
+
return f"assert syntax: varname op value (e.g., assert age > 0)"
|
|
78
|
+
var, op, val_str = m.group(1), m.group(2), m.group(3).strip()
|
|
79
|
+
if var not in df.columns:
|
|
80
|
+
return f"Column '{var}' not found."
|
|
81
|
+
try:
|
|
82
|
+
val = float(val_str)
|
|
83
|
+
import polars as pl
|
|
84
|
+
ops = {"==": pl.col(var) == val, "!=": pl.col(var) != val,
|
|
85
|
+
">": pl.col(var) > val, "<": pl.col(var) < val,
|
|
86
|
+
">=": pl.col(var) >= val, "<=": pl.col(var) <= val}
|
|
87
|
+
mask = ops[op]
|
|
88
|
+
n_fail = int(df.filter(~mask).height)
|
|
89
|
+
if n_fail == 0:
|
|
90
|
+
return f"Assertion passed: {var} {op} {val} holds for all {df.height} observations."
|
|
91
|
+
else:
|
|
92
|
+
return f"Assertion FAILED: {n_fail} of {df.height} observations violate {var} {op} {val}."
|
|
93
|
+
except Exception as exc:
|
|
94
|
+
return f"assert error: {exc}"
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@command("forval", usage="forval i=start/end : command args")
|
|
98
|
+
def cmd_forval(session: Session, args: str) -> str:
|
|
99
|
+
"""Execute a command for each value in a range. forval i=1/5 : display `i'"""
|
|
100
|
+
m = re.match(r'(\w+)\s*=\s*(\d+)\s*/\s*(\d+)\s*:\s*(.+)', args.strip())
|
|
101
|
+
if not m:
|
|
102
|
+
return "Usage: forval i=start/end : command args"
|
|
103
|
+
var, start, end, cmd_str = m.group(1), int(m.group(2)), int(m.group(3)), m.group(4).strip()
|
|
104
|
+
from openstat.commands.base import run_command
|
|
105
|
+
outputs = []
|
|
106
|
+
for i in range(start, end + 1):
|
|
107
|
+
if not hasattr(session, "_locals"):
|
|
108
|
+
session._locals = {}
|
|
109
|
+
session._locals[var] = str(i)
|
|
110
|
+
expanded = cmd_str.replace(f"`{var}'", str(i))
|
|
111
|
+
try:
|
|
112
|
+
out = run_command(session, expanded)
|
|
113
|
+
if out:
|
|
114
|
+
outputs.append(out)
|
|
115
|
+
except Exception as exc:
|
|
116
|
+
outputs.append(f"forval error at i={i}: {exc}")
|
|
117
|
+
break
|
|
118
|
+
return "\n".join(outputs) if outputs else f"forval completed {end - start + 1} iterations."
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@command("foreach", usage="foreach var in list : command")
|
|
122
|
+
def cmd_foreach(session: Session, args: str) -> str:
|
|
123
|
+
"""Execute a command for each item in a list."""
|
|
124
|
+
m = re.match(r'(\w+)\s+in\s+(.+?)\s*:\s*(.+)', args.strip())
|
|
125
|
+
if not m:
|
|
126
|
+
return "Usage: foreach var in item1 item2 ... : command"
|
|
127
|
+
var, items_str, cmd_str = m.group(1), m.group(2).strip(), m.group(3).strip()
|
|
128
|
+
items = items_str.split()
|
|
129
|
+
from openstat.commands.base import run_command
|
|
130
|
+
outputs = []
|
|
131
|
+
for item in items:
|
|
132
|
+
if not hasattr(session, "_locals"):
|
|
133
|
+
session._locals = {}
|
|
134
|
+
session._locals[var] = item
|
|
135
|
+
expanded = cmd_str.replace(f"`{var}'", item)
|
|
136
|
+
try:
|
|
137
|
+
out = run_command(session, expanded)
|
|
138
|
+
if out:
|
|
139
|
+
outputs.append(out)
|
|
140
|
+
except Exception as exc:
|
|
141
|
+
outputs.append(f"foreach error at {var}={item}: {exc}")
|
|
142
|
+
break
|
|
143
|
+
return "\n".join(outputs) if outputs else f"foreach completed {len(items)} iterations."
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""Epidemiology commands: cs (cohort study), cc (case-control), ir (incidence rate)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
from openstat.commands.base import command
|
|
8
|
+
from openstat.session import Session
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _stata_opts(raw: str) -> tuple[list[str], dict[str, str]]:
|
|
12
|
+
opts: dict[str, str] = {}
|
|
13
|
+
for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
|
|
14
|
+
opts[m.group(1).lower()] = m.group(2)
|
|
15
|
+
rest = re.sub(r'\w+\([^)]*\)', '', raw)
|
|
16
|
+
positional = [t.strip(',') for t in rest.split() if t.strip(',')]
|
|
17
|
+
return positional, opts
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _fmt_epi(r: dict) -> str:
|
|
21
|
+
lines = [f"\n{r.get('test', 'Result')}", "=" * 50]
|
|
22
|
+
skip = {"test", "table_2x2", "_model"}
|
|
23
|
+
for k, v in r.items():
|
|
24
|
+
if k in skip:
|
|
25
|
+
continue
|
|
26
|
+
if isinstance(v, float):
|
|
27
|
+
lines.append(f" {k:<30} {v:.4f}")
|
|
28
|
+
else:
|
|
29
|
+
lines.append(f" {k:<30} {v}")
|
|
30
|
+
t = r.get("table_2x2")
|
|
31
|
+
if t:
|
|
32
|
+
lines.append("\n 2x2 Table:")
|
|
33
|
+
lines.append(f" {'':15} Exposed Unexposed")
|
|
34
|
+
lines.append(f" {'Cases':15} {t['a']:>8} {t['b']:>8}")
|
|
35
|
+
lines.append(f" {'Non-cases':15} {t['c']:>8} {t['d']:>8}")
|
|
36
|
+
return "\n".join(lines)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@command("cs", usage="cs outcome exposure")
|
|
40
|
+
def cmd_cs(session: Session, args: str) -> str:
|
|
41
|
+
"""Cohort study analysis: risk ratio, ARR, NNT."""
|
|
42
|
+
from openstat.stats.epidemiology import cohort_study
|
|
43
|
+
df = session.require_data()
|
|
44
|
+
positional, opts = _stata_opts(args)
|
|
45
|
+
if len(positional) < 2:
|
|
46
|
+
return "Usage: cs outcome exposure"
|
|
47
|
+
outcome, exposure = positional[0], positional[1]
|
|
48
|
+
for v in (outcome, exposure):
|
|
49
|
+
if v not in df.columns:
|
|
50
|
+
return f"Column '{v}' not found."
|
|
51
|
+
try:
|
|
52
|
+
r = cohort_study(df, outcome, exposure)
|
|
53
|
+
return _fmt_epi(r)
|
|
54
|
+
except Exception as exc:
|
|
55
|
+
return f"cs error: {exc}"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@command("cc", usage="cc outcome exposure")
|
|
59
|
+
def cmd_cc(session: Session, args: str) -> str:
|
|
60
|
+
"""Case-control analysis: odds ratio with 95% CI."""
|
|
61
|
+
from openstat.stats.epidemiology import case_control
|
|
62
|
+
df = session.require_data()
|
|
63
|
+
positional, opts = _stata_opts(args)
|
|
64
|
+
if len(positional) < 2:
|
|
65
|
+
return "Usage: cc outcome exposure"
|
|
66
|
+
outcome, exposure = positional[0], positional[1]
|
|
67
|
+
for v in (outcome, exposure):
|
|
68
|
+
if v not in df.columns:
|
|
69
|
+
return f"Column '{v}' not found."
|
|
70
|
+
try:
|
|
71
|
+
r = case_control(df, outcome, exposure)
|
|
72
|
+
return _fmt_epi(r)
|
|
73
|
+
except Exception as exc:
|
|
74
|
+
return f"cc error: {exc}"
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@command("ir", usage="ir outcome person_time_var")
|
|
78
|
+
def cmd_ir(session: Session, args: str) -> str:
|
|
79
|
+
"""Incidence rate analysis."""
|
|
80
|
+
from openstat.stats.epidemiology import incidence_rate
|
|
81
|
+
df = session.require_data()
|
|
82
|
+
positional, opts = _stata_opts(args)
|
|
83
|
+
if len(positional) < 2:
|
|
84
|
+
return "Usage: ir outcome person_time_var"
|
|
85
|
+
outcome, pt_var = positional[0], positional[1]
|
|
86
|
+
for v in (outcome, pt_var):
|
|
87
|
+
if v not in df.columns:
|
|
88
|
+
return f"Column '{v}' not found."
|
|
89
|
+
try:
|
|
90
|
+
r = incidence_rate(df, outcome, pt_var)
|
|
91
|
+
return _fmt_epi(r)
|
|
92
|
+
except Exception as exc:
|
|
93
|
+
return f"ir error: {exc}"
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""Equivalence test and Tobit commands."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
from openstat.commands.base import command
|
|
8
|
+
from openstat.session import Session
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _stata_opts(raw: str) -> tuple[list[str], dict[str, str]]:
|
|
12
|
+
opts: dict[str, str] = {}
|
|
13
|
+
for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
|
|
14
|
+
opts[m.group(1).lower()] = m.group(2)
|
|
15
|
+
rest = re.sub(r'\w+\([^)]*\)', '', raw)
|
|
16
|
+
positional = [t.strip(',') for t in rest.split() if t.strip(',')]
|
|
17
|
+
return positional, opts
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _fmt(r: dict) -> str:
|
|
21
|
+
lines = [f"\n{r.get('test', 'Result')}", "=" * 55]
|
|
22
|
+
skip = {"test", "groups", "_model"}
|
|
23
|
+
for k, v in r.items():
|
|
24
|
+
if k in skip:
|
|
25
|
+
continue
|
|
26
|
+
if isinstance(v, float):
|
|
27
|
+
lines.append(f" {k:<35} {v:.6f}")
|
|
28
|
+
elif isinstance(v, list):
|
|
29
|
+
lines.append(f" {k:<35} {v}")
|
|
30
|
+
else:
|
|
31
|
+
lines.append(f" {k:<35} {v}")
|
|
32
|
+
lines.append("=" * 55)
|
|
33
|
+
return "\n".join(lines)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@command("tost", usage="tost var [by(group)] [mu(0) delta(0.5) alpha(0.05)]")
|
|
37
|
+
def cmd_tost(session: Session, args: str) -> str:
|
|
38
|
+
"""Two One-Sided Tests (TOST) for equivalence."""
|
|
39
|
+
from openstat.stats.equiv_tobit import tost_onemean, tost_twomeans
|
|
40
|
+
df = session.require_data()
|
|
41
|
+
positional, opts = _stata_opts(args)
|
|
42
|
+
if not positional:
|
|
43
|
+
return "Usage: tost var [by(group)] [mu(0) delta(0.5) alpha(0.05)]"
|
|
44
|
+
col = positional[0]
|
|
45
|
+
if col not in df.columns:
|
|
46
|
+
return f"Column '{col}' not found."
|
|
47
|
+
by = opts.get("by")
|
|
48
|
+
delta = float(opts.get("delta", 0.5))
|
|
49
|
+
alpha = float(opts.get("alpha", 0.05))
|
|
50
|
+
try:
|
|
51
|
+
if by:
|
|
52
|
+
if by not in df.columns:
|
|
53
|
+
return f"Group column '{by}' not found."
|
|
54
|
+
r = tost_twomeans(df, col, by, delta=delta, alpha=alpha)
|
|
55
|
+
else:
|
|
56
|
+
mu = float(opts.get("mu", 0.0))
|
|
57
|
+
r = tost_onemean(df, col, mu=mu, delta=delta, alpha=alpha)
|
|
58
|
+
return _fmt(r)
|
|
59
|
+
except Exception as exc:
|
|
60
|
+
return f"tost error: {exc}"
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@command("tobit", usage="tobit dep var1 var2 ... [ll(0) ul(none)]")
|
|
64
|
+
def cmd_tobit(session: Session, args: str) -> str:
|
|
65
|
+
"""Tobit regression for censored outcomes."""
|
|
66
|
+
from openstat.stats.equiv_tobit import fit_tobit
|
|
67
|
+
df = session.require_data()
|
|
68
|
+
positional, opts = _stata_opts(args)
|
|
69
|
+
if len(positional) < 2:
|
|
70
|
+
return "Usage: tobit dep var1 [var2 ...] [ll(0) ul(none)]"
|
|
71
|
+
dep = positional[0]
|
|
72
|
+
indeps = [c for c in positional[1:] if c in df.columns]
|
|
73
|
+
if dep not in df.columns:
|
|
74
|
+
return f"Column '{dep}' not found."
|
|
75
|
+
left = float(opts["ll"]) if "ll" in opts else 0.0
|
|
76
|
+
right = float(opts["ul"]) if "ul" in opts else None
|
|
77
|
+
try:
|
|
78
|
+
r = fit_tobit(df, dep, indeps, left=left, right=right)
|
|
79
|
+
session._last_model = r
|
|
80
|
+
lines = ["\nTobit Regression", "=" * 55]
|
|
81
|
+
lines.append(f" {'Dep. Variable':<30} {dep}")
|
|
82
|
+
lines.append(f" {'N obs':<30} {r['n_obs']}")
|
|
83
|
+
lines.append(f" {'Left censoring':<30} {r['left_censoring']} (n={r['n_censored_left']})")
|
|
84
|
+
lines.append(f" {'Right censoring':<30} {r['right_censoring']} (n={r['n_censored_right']})")
|
|
85
|
+
lines.append(f" {'Log-likelihood':<30} {r['log_likelihood']:.4f}")
|
|
86
|
+
lines.append(f" {'AIC':<30} {r['aic']:.4f}")
|
|
87
|
+
lines.append(f" {'Sigma':<30} {r['sigma']:.4f}")
|
|
88
|
+
lines.append(f"\n {'Variable':<25} {'Coef':>10}")
|
|
89
|
+
lines.append(" " + "-" * 37)
|
|
90
|
+
for nm, coef in r["params"].items():
|
|
91
|
+
lines.append(f" {nm:<25} {coef:>10.4f}")
|
|
92
|
+
return "\n".join(lines)
|
|
93
|
+
except Exception as exc:
|
|
94
|
+
return f"tobit error: {exc}"
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""esttab and tabstat commands for multi-model comparison tables."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
import polars as pl
|
|
8
|
+
|
|
9
|
+
from openstat.commands.base import command
|
|
10
|
+
from openstat.session import Session
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _stata_opts(raw: str) -> tuple[list[str], dict[str, str]]:
|
|
14
|
+
opts: dict[str, str] = {}
|
|
15
|
+
for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
|
|
16
|
+
opts[m.group(1).lower()] = m.group(2)
|
|
17
|
+
rest = re.sub(r'\w+\([^)]*\)', '', raw)
|
|
18
|
+
positional = [t.strip(',') for t in rest.split() if t.strip(',')]
|
|
19
|
+
return positional, opts
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@command("esttab", usage="esttab [stats(coef,se,pval)] [stars]")
|
|
23
|
+
def cmd_esttab(session: Session, args: str) -> str:
|
|
24
|
+
"""Display a publication-style comparison table of all stored regression results."""
|
|
25
|
+
positional, opts = _stata_opts(args)
|
|
26
|
+
stats_req = opts.get("stats", "coef,se").split(",")
|
|
27
|
+
show_stars = "stars" in args
|
|
28
|
+
|
|
29
|
+
raw_results = session.results
|
|
30
|
+
if not raw_results:
|
|
31
|
+
return "No stored results. Run regression commands first."
|
|
32
|
+
|
|
33
|
+
# Normalize: ModelResult objects → dict via .details
|
|
34
|
+
results = []
|
|
35
|
+
for res in raw_results:
|
|
36
|
+
if isinstance(res, dict):
|
|
37
|
+
results.append(res)
|
|
38
|
+
elif hasattr(res, "details"):
|
|
39
|
+
results.append(res.details)
|
|
40
|
+
|
|
41
|
+
# Collect all parameter names across models
|
|
42
|
+
all_params: list[str] = []
|
|
43
|
+
for res in results:
|
|
44
|
+
if "params" in res:
|
|
45
|
+
for p in res["params"]:
|
|
46
|
+
if p not in all_params:
|
|
47
|
+
all_params.append(p)
|
|
48
|
+
elif "coefficients" in res:
|
|
49
|
+
for p in res["coefficients"]:
|
|
50
|
+
if p not in all_params:
|
|
51
|
+
all_params.append(p)
|
|
52
|
+
|
|
53
|
+
if not all_params:
|
|
54
|
+
return "No regression results with coefficients found."
|
|
55
|
+
|
|
56
|
+
col_w = 14
|
|
57
|
+
|
|
58
|
+
def _get_coef(res, param):
|
|
59
|
+
if "coefficients" in res:
|
|
60
|
+
c = res["coefficients"].get(param, {})
|
|
61
|
+
return c.get("mean", float("nan"))
|
|
62
|
+
if "params" in res:
|
|
63
|
+
return res["params"].get(param, float("nan"))
|
|
64
|
+
return float("nan")
|
|
65
|
+
|
|
66
|
+
def _get_se(res, param):
|
|
67
|
+
if "coefficients" in res:
|
|
68
|
+
c = res["coefficients"].get(param, {})
|
|
69
|
+
return c.get("std", float("nan"))
|
|
70
|
+
if "std_errors" in res:
|
|
71
|
+
return res["std_errors"].get(param, float("nan"))
|
|
72
|
+
return float("nan")
|
|
73
|
+
|
|
74
|
+
def _get_pval(res, param):
|
|
75
|
+
if "p_values" in res:
|
|
76
|
+
return res["p_values"].get(param, float("nan"))
|
|
77
|
+
if "coefficients" in res:
|
|
78
|
+
c = res["coefficients"].get(param, {})
|
|
79
|
+
return c.get("prob_positive", float("nan"))
|
|
80
|
+
return float("nan")
|
|
81
|
+
|
|
82
|
+
def _stars(p):
|
|
83
|
+
if p != p: return ""
|
|
84
|
+
if p < 0.001: return "***"
|
|
85
|
+
if p < 0.01: return "**"
|
|
86
|
+
if p < 0.05: return "*"
|
|
87
|
+
return ""
|
|
88
|
+
|
|
89
|
+
model_list = [r for r in results if "params" in r or "coefficients" in r]
|
|
90
|
+
header = f"{'':25}" + "".join(f" {'('+str(i+1)+')':>{col_w}}" for i in range(len(model_list)))
|
|
91
|
+
sep = "-" * (25 + (col_w + 2) * len(model_list))
|
|
92
|
+
lines = ["\nesttab — Regression Comparison", sep, header, sep]
|
|
93
|
+
|
|
94
|
+
for param in all_params:
|
|
95
|
+
coef_row = f"{param:<25}"
|
|
96
|
+
se_row = f"{'':25}"
|
|
97
|
+
for res in model_list:
|
|
98
|
+
coef = _get_coef(res, param)
|
|
99
|
+
se = _get_se(res, param)
|
|
100
|
+
pval = _get_pval(res, param)
|
|
101
|
+
stars = _stars(pval) if show_stars else ""
|
|
102
|
+
if coef != coef:
|
|
103
|
+
coef_row += f" {'':>{col_w}}"
|
|
104
|
+
se_row += f" {'':>{col_w}}"
|
|
105
|
+
else:
|
|
106
|
+
coef_str = f"{coef:.4f}{stars}"
|
|
107
|
+
coef_row += f" {coef_str:>{col_w}}"
|
|
108
|
+
se_str = f"({se:.4f})" if se == se else ""
|
|
109
|
+
se_row += f" {se_str:>{col_w}}"
|
|
110
|
+
lines.append(coef_row)
|
|
111
|
+
if "se" in stats_req:
|
|
112
|
+
lines.append(se_row)
|
|
113
|
+
|
|
114
|
+
lines.append(sep)
|
|
115
|
+
# Model-level stats
|
|
116
|
+
n_row = f"{'N':25}"
|
|
117
|
+
r2_row = f"{'R-squared':25}"
|
|
118
|
+
for res in model_list:
|
|
119
|
+
n = res.get("n_obs", "")
|
|
120
|
+
r2 = res.get("r_squared", res.get("pseudo_r2", ""))
|
|
121
|
+
n_row += f" {str(n):>{col_w}}"
|
|
122
|
+
r2_row += f" {(f'{r2:.4f}' if isinstance(r2, float) else ''):>{col_w}}"
|
|
123
|
+
lines.append(n_row)
|
|
124
|
+
lines.append(r2_row)
|
|
125
|
+
lines.append(sep)
|
|
126
|
+
if show_stars:
|
|
127
|
+
lines.append("* p<0.05 ** p<0.01 *** p<0.001")
|
|
128
|
+
return "\n".join(lines)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@command("tabstat", usage="tabstat var1 [var2 ...] [, stats(mean sd min max n) by(groupvar)]")
|
|
132
|
+
def cmd_tabstat(session: Session, args: str) -> str:
|
|
133
|
+
"""Display summary statistics table (enhanced version of summarize)."""
|
|
134
|
+
df = session.require_data()
|
|
135
|
+
positional, opts = _stata_opts(args)
|
|
136
|
+
cols = [c for c in positional if c in df.columns]
|
|
137
|
+
if not cols:
|
|
138
|
+
return "No valid numeric variables found."
|
|
139
|
+
stats_req = [s.strip() for s in opts.get("stats", "mean,sd,min,max,n").split(",")]
|
|
140
|
+
by_raw = opts.get("by", "")
|
|
141
|
+
by_var = by_raw.strip() if by_raw.strip() in df.columns else None
|
|
142
|
+
|
|
143
|
+
def _compute_stats(series: pl.Series, stats: list[str]) -> dict:
|
|
144
|
+
res = {}
|
|
145
|
+
if "n" in stats: res["N"] = series.drop_nulls().len()
|
|
146
|
+
if "mean" in stats: res["Mean"] = float(series.mean()) if series.len() else float("nan")
|
|
147
|
+
if "sd" in stats: res["Std Dev"] = float(series.std()) if series.len() > 1 else float("nan")
|
|
148
|
+
if "min" in stats: res["Min"] = float(series.min()) if series.len() else float("nan")
|
|
149
|
+
if "max" in stats: res["Max"] = float(series.max()) if series.len() else float("nan")
|
|
150
|
+
if "median" in stats or "p50" in stats: res["Median"] = float(series.median()) if series.len() else float("nan")
|
|
151
|
+
if "sum" in stats: res["Sum"] = float(series.sum()) if series.len() else float("nan")
|
|
152
|
+
if "var" in stats: res["Variance"] = float(series.var()) if series.len() > 1 else float("nan")
|
|
153
|
+
return res
|
|
154
|
+
|
|
155
|
+
stat_labels = []
|
|
156
|
+
for s in stats_req:
|
|
157
|
+
lbl = {"n": "N", "mean": "Mean", "sd": "Std Dev", "min": "Min", "max": "Max",
|
|
158
|
+
"median": "Median", "p50": "Median", "sum": "Sum", "var": "Variance"}.get(s, s)
|
|
159
|
+
if lbl not in stat_labels:
|
|
160
|
+
stat_labels.append(lbl)
|
|
161
|
+
|
|
162
|
+
col_w = 12
|
|
163
|
+
lines = ["\ntabstat", "=" * (22 + col_w * len(stat_labels))]
|
|
164
|
+
header = f"{'Variable':<20}" + "".join(f" {s:>{col_w}}" for s in stat_labels)
|
|
165
|
+
lines.append(header)
|
|
166
|
+
lines.append("-" * (22 + col_w * len(stat_labels)))
|
|
167
|
+
|
|
168
|
+
def _add_rows(data: pl.DataFrame, prefix: str = ""):
|
|
169
|
+
for col in cols:
|
|
170
|
+
try:
|
|
171
|
+
s = data[col].cast(pl.Float64)
|
|
172
|
+
except Exception:
|
|
173
|
+
continue
|
|
174
|
+
stat_vals = _compute_stats(s, stats_req)
|
|
175
|
+
row = f"{prefix + col:<20}"
|
|
176
|
+
for lbl in stat_labels:
|
|
177
|
+
val = stat_vals.get(lbl, float("nan"))
|
|
178
|
+
if isinstance(val, float) and val != val:
|
|
179
|
+
row += f" {'':>{col_w}}"
|
|
180
|
+
elif isinstance(val, int):
|
|
181
|
+
row += f" {val:>{col_w}}"
|
|
182
|
+
else:
|
|
183
|
+
row += f" {val:>{col_w}.4f}"
|
|
184
|
+
lines.append(row)
|
|
185
|
+
|
|
186
|
+
if by_var:
|
|
187
|
+
groups = df[by_var].unique().sort().to_list()
|
|
188
|
+
for g in groups:
|
|
189
|
+
lines.append(f"\n {by_var} = {g}")
|
|
190
|
+
lines.append("-" * (22 + col_w * len(stat_labels)))
|
|
191
|
+
_add_rows(df.filter(pl.col(by_var) == g), " ")
|
|
192
|
+
else:
|
|
193
|
+
_add_rows(df)
|
|
194
|
+
|
|
195
|
+
lines.append("=" * (22 + col_w * len(stat_labels)))
|
|
196
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""Export results as LaTeX Beamer presentation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
import os
|
|
5
|
+
from datetime import date
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from openstat.commands.base import command, CommandArgs, friendly_error
|
|
9
|
+
from openstat.session import Session
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@command("export beamer", usage="export beamer [path] [--title=...] [--author=...] [--theme=Madrid]")
|
|
13
|
+
def cmd_export_beamer(session: Session, args: str) -> str:
|
|
14
|
+
"""Export analysis results as a LaTeX Beamer presentation (.tex).
|
|
15
|
+
|
|
16
|
+
Generates a slide deck with dataset summary, model results,
|
|
17
|
+
and references to saved plots.
|
|
18
|
+
|
|
19
|
+
Options:
|
|
20
|
+
--title=<txt> presentation title (default: 'OpenStat Analysis')
|
|
21
|
+
--author=<txt> author name
|
|
22
|
+
--theme=<t> Beamer theme (default: Madrid)
|
|
23
|
+
--colortheme=<t> Beamer colour theme (default: beaver)
|
|
24
|
+
--out=<path> output .tex path
|
|
25
|
+
|
|
26
|
+
Examples:
|
|
27
|
+
export beamer
|
|
28
|
+
export beamer results/slides.tex --title="Income Analysis" --author="J. Smith"
|
|
29
|
+
export beamer --theme=Berlin --colortheme=whale
|
|
30
|
+
"""
|
|
31
|
+
ca = CommandArgs(args)
|
|
32
|
+
out_path = (
|
|
33
|
+
ca.options.get("out")
|
|
34
|
+
or (ca.positional[0] if ca.positional else None)
|
|
35
|
+
or "outputs/presentation.tex"
|
|
36
|
+
)
|
|
37
|
+
title = ca.options.get("title", "OpenStat Analysis")
|
|
38
|
+
author = ca.options.get("author", "OpenStat")
|
|
39
|
+
theme = ca.options.get("theme", "Madrid")
|
|
40
|
+
color_theme = ca.options.get("colortheme", "beaver")
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
lines = []
|
|
44
|
+
|
|
45
|
+
def L(s=""):
|
|
46
|
+
lines.append(s)
|
|
47
|
+
|
|
48
|
+
L(r"\documentclass{beamer}")
|
|
49
|
+
L(r"\usetheme{" + theme + "}")
|
|
50
|
+
L(r"\usecolortheme{" + color_theme + "}")
|
|
51
|
+
L(r"\usepackage{booktabs}")
|
|
52
|
+
L(r"\usepackage{graphicx}")
|
|
53
|
+
L(r"\usepackage{amsmath}")
|
|
54
|
+
L()
|
|
55
|
+
L(r"\title{" + title.replace("_", r"\_") + "}")
|
|
56
|
+
L(r"\author{" + author.replace("_", r"\_") + "}")
|
|
57
|
+
L(r"\date{" + date.today().isoformat() + "}")
|
|
58
|
+
L()
|
|
59
|
+
L(r"\begin{document}")
|
|
60
|
+
L()
|
|
61
|
+
L(r"\begin{frame}")
|
|
62
|
+
L(r" \titlepage")
|
|
63
|
+
L(r"\end{frame}")
|
|
64
|
+
L()
|
|
65
|
+
|
|
66
|
+
# Dataset overview slide
|
|
67
|
+
ds_name = (session.dataset_name or "Unknown").replace("_", r"\_")
|
|
68
|
+
shape_str = session.shape_str
|
|
69
|
+
L(r"\begin{frame}{Dataset Overview}")
|
|
70
|
+
L(r" \begin{itemize}")
|
|
71
|
+
L(r" \item \textbf{Dataset:} " + ds_name)
|
|
72
|
+
L(r" \item \textbf{Shape:} " + shape_str)
|
|
73
|
+
L(r" \item \textbf{Date:} " + date.today().isoformat())
|
|
74
|
+
L(r" \end{itemize}")
|
|
75
|
+
L(r"\end{frame}")
|
|
76
|
+
L()
|
|
77
|
+
|
|
78
|
+
# Model result slides
|
|
79
|
+
for mr in session.results:
|
|
80
|
+
model_title = f"{mr.name} — {mr.formula}"[:60].replace("_", r"\_")
|
|
81
|
+
L(r"\begin{frame}{" + model_title + "}")
|
|
82
|
+
L(r" \scriptsize")
|
|
83
|
+
L(r" \begin{verbatim}")
|
|
84
|
+
# Truncate table to fit slide
|
|
85
|
+
table_lines = mr.table.split("\n")[:25]
|
|
86
|
+
for tl in table_lines:
|
|
87
|
+
L(" " + tl[:80])
|
|
88
|
+
L(r" \end{verbatim}")
|
|
89
|
+
# Model stats
|
|
90
|
+
d = mr.details
|
|
91
|
+
stats_parts = []
|
|
92
|
+
if d.get("n"):
|
|
93
|
+
stats_parts.append(f"N={d['n']}")
|
|
94
|
+
if d.get("r2") is not None:
|
|
95
|
+
stats_parts.append(f"R²={d['r2']:.3f}")
|
|
96
|
+
if d.get("aic") is not None:
|
|
97
|
+
stats_parts.append(f"AIC={d['aic']:.1f}")
|
|
98
|
+
if stats_parts:
|
|
99
|
+
L(r" \medskip")
|
|
100
|
+
L(r" \normalsize " + " \\quad ".join(stats_parts))
|
|
101
|
+
L(r"\end{frame}")
|
|
102
|
+
L()
|
|
103
|
+
|
|
104
|
+
# Plot slides
|
|
105
|
+
for plot_path in session.plot_paths:
|
|
106
|
+
if os.path.exists(plot_path):
|
|
107
|
+
safe_path = plot_path.replace("\\", "/").replace("_", r"\_")
|
|
108
|
+
base = os.path.basename(plot_path).replace("_", r"\_")
|
|
109
|
+
raw_path = plot_path.replace("\\", "/")
|
|
110
|
+
L(r"\begin{frame}{" + base + "}")
|
|
111
|
+
L(r" \centering")
|
|
112
|
+
L(r" \includegraphics[width=0.85\textwidth]{" + raw_path + "}")
|
|
113
|
+
L(r"\end{frame}")
|
|
114
|
+
L()
|
|
115
|
+
|
|
116
|
+
# Commands history slide
|
|
117
|
+
if session.history:
|
|
118
|
+
L(r"\begin{frame}[fragile]{Command History}")
|
|
119
|
+
L(r" \scriptsize")
|
|
120
|
+
L(r" \begin{verbatim}")
|
|
121
|
+
for h in session.history[-15:]:
|
|
122
|
+
L(" " + h[:80])
|
|
123
|
+
L(r" \end{verbatim}")
|
|
124
|
+
L(r"\end{frame}")
|
|
125
|
+
L()
|
|
126
|
+
|
|
127
|
+
L(r"\end{document}")
|
|
128
|
+
L()
|
|
129
|
+
|
|
130
|
+
tex_content = "\n".join(lines)
|
|
131
|
+
Path(out_path).parent.mkdir(parents=True, exist_ok=True)
|
|
132
|
+
Path(out_path).write_text(tex_content, encoding="utf-8")
|
|
133
|
+
|
|
134
|
+
abs_path = os.path.abspath(out_path)
|
|
135
|
+
n_slides = tex_content.count(r"\begin{frame}")
|
|
136
|
+
return (
|
|
137
|
+
f"LaTeX Beamer presentation saved: {abs_path}\n"
|
|
138
|
+
f" Slides: {n_slides} Theme: {theme}/{color_theme}\n"
|
|
139
|
+
f" Compile: pdflatex {out_path}"
|
|
140
|
+
)
|
|
141
|
+
except Exception as e:
|
|
142
|
+
return friendly_error(e, "export beamer")
|