openstat-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstat/__init__.py +3 -0
- openstat/__main__.py +4 -0
- openstat/backends/__init__.py +16 -0
- openstat/backends/duckdb_backend.py +70 -0
- openstat/backends/polars_backend.py +52 -0
- openstat/cli.py +92 -0
- openstat/commands/__init__.py +82 -0
- openstat/commands/adv_stat_cmds.py +1255 -0
- openstat/commands/advanced_ml_cmds.py +576 -0
- openstat/commands/advreg_cmds.py +207 -0
- openstat/commands/alias_cmds.py +135 -0
- openstat/commands/arch_cmds.py +82 -0
- openstat/commands/arules_cmds.py +111 -0
- openstat/commands/automodel_cmds.py +212 -0
- openstat/commands/backend_cmds.py +82 -0
- openstat/commands/base.py +170 -0
- openstat/commands/bayes_cmds.py +71 -0
- openstat/commands/causal_cmds.py +269 -0
- openstat/commands/cluster_cmds.py +152 -0
- openstat/commands/data_cmds.py +996 -0
- openstat/commands/datamanip_cmds.py +672 -0
- openstat/commands/dataquality_cmds.py +174 -0
- openstat/commands/datetime_cmds.py +176 -0
- openstat/commands/dimreduce_cmds.py +184 -0
- openstat/commands/discrete_cmds.py +149 -0
- openstat/commands/dsl_cmds.py +143 -0
- openstat/commands/epi_cmds.py +93 -0
- openstat/commands/equiv_tobit_cmds.py +94 -0
- openstat/commands/esttab_cmds.py +196 -0
- openstat/commands/export_beamer_cmds.py +142 -0
- openstat/commands/export_cmds.py +201 -0
- openstat/commands/export_extra_cmds.py +240 -0
- openstat/commands/factor_cmds.py +180 -0
- openstat/commands/groupby_cmds.py +155 -0
- openstat/commands/help_cmds.py +237 -0
- openstat/commands/i18n_cmds.py +43 -0
- openstat/commands/import_extra_cmds.py +561 -0
- openstat/commands/influence_cmds.py +134 -0
- openstat/commands/iv_cmds.py +106 -0
- openstat/commands/manova_cmds.py +105 -0
- openstat/commands/mediate_cmds.py +233 -0
- openstat/commands/meta_cmds.py +284 -0
- openstat/commands/mi_cmds.py +228 -0
- openstat/commands/mixed_cmds.py +79 -0
- openstat/commands/mixture_changepoint_cmds.py +166 -0
- openstat/commands/ml_adv_cmds.py +147 -0
- openstat/commands/ml_cmds.py +178 -0
- openstat/commands/model_eval_cmds.py +142 -0
- openstat/commands/network_cmds.py +288 -0
- openstat/commands/nlquery_cmds.py +161 -0
- openstat/commands/nonparam_cmds.py +149 -0
- openstat/commands/outreg_cmds.py +247 -0
- openstat/commands/panel_cmds.py +141 -0
- openstat/commands/pdf_cmds.py +226 -0
- openstat/commands/pipeline_cmds.py +319 -0
- openstat/commands/plot_cmds.py +189 -0
- openstat/commands/plugin_cmds.py +79 -0
- openstat/commands/posthoc_cmds.py +153 -0
- openstat/commands/power_cmds.py +172 -0
- openstat/commands/profile_cmds.py +246 -0
- openstat/commands/rbridge_cmds.py +81 -0
- openstat/commands/regex_cmds.py +104 -0
- openstat/commands/report_cmds.py +48 -0
- openstat/commands/repro_cmds.py +129 -0
- openstat/commands/resampling_cmds.py +109 -0
- openstat/commands/reshape_cmds.py +223 -0
- openstat/commands/sem_cmds.py +177 -0
- openstat/commands/stat_cmds.py +1040 -0
- openstat/commands/stata_import_cmds.py +215 -0
- openstat/commands/string_cmds.py +124 -0
- openstat/commands/surv_cmds.py +145 -0
- openstat/commands/survey_cmds.py +153 -0
- openstat/commands/textanalysis_cmds.py +192 -0
- openstat/commands/ts_adv_cmds.py +136 -0
- openstat/commands/ts_cmds.py +195 -0
- openstat/commands/tui_cmds.py +111 -0
- openstat/commands/ux_cmds.py +191 -0
- openstat/commands/validate_cmds.py +270 -0
- openstat/commands/viz_adv_cmds.py +312 -0
- openstat/commands/viz_extra_cmds.py +251 -0
- openstat/commands/watch_cmds.py +69 -0
- openstat/config.py +106 -0
- openstat/dsl/__init__.py +0 -0
- openstat/dsl/parser.py +332 -0
- openstat/dsl/tokenizer.py +105 -0
- openstat/i18n.py +120 -0
- openstat/io/__init__.py +0 -0
- openstat/io/loader.py +187 -0
- openstat/jupyter/__init__.py +18 -0
- openstat/jupyter/display.py +18 -0
- openstat/jupyter/magic.py +60 -0
- openstat/logging_config.py +59 -0
- openstat/plots/__init__.py +0 -0
- openstat/plots/plotter.py +437 -0
- openstat/plots/surv_plots.py +32 -0
- openstat/plots/ts_plots.py +59 -0
- openstat/plugins/__init__.py +5 -0
- openstat/plugins/manager.py +69 -0
- openstat/repl.py +457 -0
- openstat/reporting/__init__.py +0 -0
- openstat/reporting/eda.py +208 -0
- openstat/reporting/report.py +67 -0
- openstat/script_runner.py +319 -0
- openstat/session.py +133 -0
- openstat/stats/__init__.py +0 -0
- openstat/stats/advanced_regression.py +269 -0
- openstat/stats/arch_garch.py +84 -0
- openstat/stats/bayesian.py +103 -0
- openstat/stats/causal.py +258 -0
- openstat/stats/clustering.py +206 -0
- openstat/stats/discrete.py +311 -0
- openstat/stats/epidemiology.py +119 -0
- openstat/stats/equiv_tobit.py +163 -0
- openstat/stats/factor.py +174 -0
- openstat/stats/imputation.py +282 -0
- openstat/stats/influence.py +78 -0
- openstat/stats/iv.py +131 -0
- openstat/stats/manova.py +124 -0
- openstat/stats/mixed.py +128 -0
- openstat/stats/ml.py +275 -0
- openstat/stats/ml_advanced.py +117 -0
- openstat/stats/model_eval.py +183 -0
- openstat/stats/models.py +1342 -0
- openstat/stats/nonparametric.py +130 -0
- openstat/stats/panel.py +179 -0
- openstat/stats/power.py +295 -0
- openstat/stats/resampling.py +203 -0
- openstat/stats/survey.py +213 -0
- openstat/stats/survival.py +196 -0
- openstat/stats/timeseries.py +142 -0
- openstat/stats/ts_advanced.py +114 -0
- openstat/types.py +11 -0
- openstat/web/__init__.py +1 -0
- openstat/web/app.py +117 -0
- openstat/web/session_manager.py +73 -0
- openstat/web/static/app.js +117 -0
- openstat/web/static/index.html +38 -0
- openstat/web/static/style.css +103 -0
- openstat_cli-1.0.0.dist-info/METADATA +748 -0
- openstat_cli-1.0.0.dist-info/RECORD +143 -0
- openstat_cli-1.0.0.dist-info/WHEEL +4 -0
- openstat_cli-1.0.0.dist-info/entry_points.txt +2 -0
- openstat_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
"""outreg: export regression results to LaTeX or HTML tables. log: session logging."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
|
|
8
|
+
from openstat.commands.base import command
|
|
9
|
+
from openstat.session import Session
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _stata_opts(raw: str) -> tuple[list[str], dict[str, str]]:
|
|
13
|
+
opts: dict[str, str] = {}
|
|
14
|
+
for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
|
|
15
|
+
opts[m.group(1).lower()] = m.group(2)
|
|
16
|
+
rest = re.sub(r'\w+\([^)]*\)', '', raw)
|
|
17
|
+
positional = [t.strip(',') for t in rest.split() if t.strip(',')]
|
|
18
|
+
return positional, opts
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _results_to_table(results, fmt: str = "latex", stars: bool = True) -> str:
|
|
22
|
+
"""Convert session results to LaTeX or HTML table."""
|
|
23
|
+
# Normalize ModelResult → dict
|
|
24
|
+
model_list = []
|
|
25
|
+
for res in results:
|
|
26
|
+
if isinstance(res, dict) and ("params" in res or "coefficients" in res):
|
|
27
|
+
model_list.append(res)
|
|
28
|
+
elif hasattr(res, "details"):
|
|
29
|
+
d = res.details
|
|
30
|
+
if "params" in d or "coefficients" in d:
|
|
31
|
+
model_list.append(d)
|
|
32
|
+
|
|
33
|
+
if not model_list:
|
|
34
|
+
return ""
|
|
35
|
+
|
|
36
|
+
all_params: list[str] = []
|
|
37
|
+
for res in model_list:
|
|
38
|
+
src = res.get("params") or res.get("coefficients") or {}
|
|
39
|
+
for p in src:
|
|
40
|
+
if p not in all_params:
|
|
41
|
+
all_params.append(p)
|
|
42
|
+
|
|
43
|
+
def _coef(res, param):
|
|
44
|
+
if "params" in res:
|
|
45
|
+
return res["params"].get(param, float("nan"))
|
|
46
|
+
c = res.get("coefficients", {}).get(param, {})
|
|
47
|
+
return c.get("mean", float("nan"))
|
|
48
|
+
|
|
49
|
+
def _se(res, param):
|
|
50
|
+
if "std_errors" in res:
|
|
51
|
+
return res["std_errors"].get(param, float("nan"))
|
|
52
|
+
c = res.get("coefficients", {}).get(param, {})
|
|
53
|
+
return c.get("std", float("nan"))
|
|
54
|
+
|
|
55
|
+
def _pval(res, param):
|
|
56
|
+
if "p_values" in res:
|
|
57
|
+
return res["p_values"].get(param, float("nan"))
|
|
58
|
+
return float("nan")
|
|
59
|
+
|
|
60
|
+
def _star(p):
|
|
61
|
+
if p != p: return ""
|
|
62
|
+
if p < 0.001: return "^{***}" if fmt == "latex" else "***"
|
|
63
|
+
if p < 0.01: return "^{**}" if fmt == "latex" else "**"
|
|
64
|
+
if p < 0.05: return "^{*}" if fmt == "latex" else "*"
|
|
65
|
+
return ""
|
|
66
|
+
|
|
67
|
+
n_models = len(model_list)
|
|
68
|
+
|
|
69
|
+
if fmt == "latex":
|
|
70
|
+
col_spec = "l" + "c" * n_models
|
|
71
|
+
lines = [
|
|
72
|
+
"\\begin{table}[htbp]",
|
|
73
|
+
"\\centering",
|
|
74
|
+
f"\\begin{{tabular}}{{{col_spec}}}",
|
|
75
|
+
"\\hline\\hline",
|
|
76
|
+
]
|
|
77
|
+
header = " & " + " & ".join(f"({i+1})" for i in range(n_models)) + " \\\\"
|
|
78
|
+
lines.append(header)
|
|
79
|
+
lines.append("\\hline")
|
|
80
|
+
for param in all_params:
|
|
81
|
+
coef_row = param.replace("_", "\\_") + " & "
|
|
82
|
+
se_row = " & "
|
|
83
|
+
vals = []
|
|
84
|
+
ses = []
|
|
85
|
+
for res in model_list:
|
|
86
|
+
c = _coef(res, param)
|
|
87
|
+
s = _se(res, param)
|
|
88
|
+
p = _pval(res, param)
|
|
89
|
+
st = _star(p) if stars else ""
|
|
90
|
+
vals.append(f"{c:.4f}{st}" if c == c else "")
|
|
91
|
+
ses.append(f"({s:.4f})" if s == s else "")
|
|
92
|
+
coef_row += " & ".join(vals) + " \\\\"
|
|
93
|
+
se_row += " & ".join(ses) + " \\\\"
|
|
94
|
+
lines.append(coef_row)
|
|
95
|
+
lines.append(se_row)
|
|
96
|
+
lines.append("\\hline")
|
|
97
|
+
n_row = "N & " + " & ".join(str(res.get("n_obs", "")) for res in model_list) + " \\\\"
|
|
98
|
+
r2_row = "$R^2$ & " + " & ".join(
|
|
99
|
+
f"{res.get('r_squared', float('nan')):.4f}" if isinstance(res.get('r_squared'), float) else ""
|
|
100
|
+
for res in model_list
|
|
101
|
+
) + " \\\\"
|
|
102
|
+
lines.extend([n_row, r2_row, "\\hline\\hline"])
|
|
103
|
+
if stars:
|
|
104
|
+
lines.append("\\multicolumn{" + str(n_models + 1) + "}{l}{\\footnotesize $^{*}p<0.05$, $^{**}p<0.01$, $^{***}p<0.001$}")
|
|
105
|
+
lines.extend(["\\end{tabular}", "\\end{table}"])
|
|
106
|
+
return "\n".join(lines)
|
|
107
|
+
|
|
108
|
+
else: # html
|
|
109
|
+
lines = ["<table border='1' cellpadding='5' style='border-collapse:collapse'>"]
|
|
110
|
+
lines.append("<tr><th>Variable</th>" + "".join(f"<th>({i+1})</th>" for i in range(n_models)) + "</tr>")
|
|
111
|
+
for param in all_params:
|
|
112
|
+
coef_cells = ""
|
|
113
|
+
se_cells = ""
|
|
114
|
+
for res in model_list:
|
|
115
|
+
c = _coef(res, param)
|
|
116
|
+
s = _se(res, param)
|
|
117
|
+
p = _pval(res, param)
|
|
118
|
+
st = _star(p) if stars else ""
|
|
119
|
+
coef_cells += f"<td align='center'>{f'{c:.4f}{st}' if c==c else ''}</td>"
|
|
120
|
+
se_cells += f"<td align='center'><small>{f'({s:.4f})' if s==s else ''}</small></td>"
|
|
121
|
+
lines.append(f"<tr><td><b>{param}</b></td>{coef_cells}</tr>")
|
|
122
|
+
lines.append(f"<tr><td></td>{se_cells}</tr>")
|
|
123
|
+
n_row = "<tr><td>N</td>" + "".join(f"<td align='center'>{res.get('n_obs','')}</td>" for res in model_list) + "</tr>"
|
|
124
|
+
def _r2_cell(res):
|
|
125
|
+
r2 = res.get("r_squared", None)
|
|
126
|
+
val = f"{r2:.4f}" if isinstance(r2, float) else ""
|
|
127
|
+
return f"<td align='center'>{val}</td>"
|
|
128
|
+
r2_row = "<tr><td>R\u00b2</td>" + "".join(_r2_cell(res) for res in model_list) + "</tr>"
|
|
129
|
+
lines.extend([n_row, r2_row, "</table>"])
|
|
130
|
+
if stars: lines.append("<p><small>* p<0.05, ** p<0.01, *** p<0.001</small></p>")
|
|
131
|
+
return "\n".join(lines)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@command("outreg", usage="outreg [using path] [format(latex|html)] [--stars]")
|
|
135
|
+
def cmd_outreg(session: Session, args: str) -> str:
|
|
136
|
+
"""Export regression comparison table to LaTeX or HTML."""
|
|
137
|
+
positional, opts = _stata_opts(args)
|
|
138
|
+
fmt = opts.get("format", "latex")
|
|
139
|
+
show_stars = "stars" in args or "--stars" in args
|
|
140
|
+
path = opts.get("using")
|
|
141
|
+
# Also accept: outreg using file.tex
|
|
142
|
+
if not path and "using" in positional:
|
|
143
|
+
idx = positional.index("using")
|
|
144
|
+
if idx + 1 < len(positional):
|
|
145
|
+
path = positional[idx + 1]
|
|
146
|
+
|
|
147
|
+
if not session.results:
|
|
148
|
+
return "No stored results. Run regression commands first."
|
|
149
|
+
|
|
150
|
+
table = _results_to_table(session.results, fmt=fmt, stars=show_stars)
|
|
151
|
+
if not table:
|
|
152
|
+
return "No regression results with coefficients found."
|
|
153
|
+
|
|
154
|
+
if path:
|
|
155
|
+
try:
|
|
156
|
+
import os
|
|
157
|
+
os.makedirs(os.path.dirname(path) if os.path.dirname(path) else ".", exist_ok=True)
|
|
158
|
+
with open(path, "w") as f:
|
|
159
|
+
f.write(table)
|
|
160
|
+
return f"outreg: table saved to {path} ({fmt} format)"
|
|
161
|
+
except Exception as exc:
|
|
162
|
+
return f"outreg write error: {exc}"
|
|
163
|
+
|
|
164
|
+
return f"\noutreg ({fmt}):\n\n{table}"
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@command("log", usage="log using <path> | log close | log status | log display | log clear")
|
|
168
|
+
def cmd_log(session: Session, args: str) -> str:
|
|
169
|
+
"""Session logging: real-time capture or history export.
|
|
170
|
+
|
|
171
|
+
Examples:
|
|
172
|
+
log using analysis.log — start real-time logging
|
|
173
|
+
log status — check if logging is active
|
|
174
|
+
log close — stop real-time log and close file
|
|
175
|
+
log display — print command history to screen
|
|
176
|
+
log clear — clear command history
|
|
177
|
+
"""
|
|
178
|
+
import os
|
|
179
|
+
from pathlib import Path
|
|
180
|
+
|
|
181
|
+
stripped = args.strip()
|
|
182
|
+
positional, opts = _stata_opts(args)
|
|
183
|
+
subcmd = positional[0].lower() if positional else "display"
|
|
184
|
+
|
|
185
|
+
# ---- Real-time logging ----
|
|
186
|
+
if subcmd == "using":
|
|
187
|
+
log_path = positional[1] if len(positional) >= 2 else opts.get("using", "outputs/session.log")
|
|
188
|
+
# Close existing real-time log if open
|
|
189
|
+
if session._log_file is not None:
|
|
190
|
+
try:
|
|
191
|
+
session._log_file.write(f"\nLog replaced {datetime.now().isoformat()}\n")
|
|
192
|
+
session._log_file.close()
|
|
193
|
+
except Exception:
|
|
194
|
+
pass
|
|
195
|
+
path = Path(log_path)
|
|
196
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
197
|
+
try:
|
|
198
|
+
session._log_file = open(path, "w", encoding="utf-8")
|
|
199
|
+
session._log_path = str(path)
|
|
200
|
+
session._log_file.write("OpenStat session log\n")
|
|
201
|
+
session._log_file.write(f"Started: {datetime.now().isoformat()}\n")
|
|
202
|
+
session._log_file.write("=" * 60 + "\n\n")
|
|
203
|
+
session._log_file.flush()
|
|
204
|
+
return f"Log opened: {path}"
|
|
205
|
+
except OSError as exc:
|
|
206
|
+
return f"log error: {exc}"
|
|
207
|
+
|
|
208
|
+
elif subcmd in ("close", "off"):
|
|
209
|
+
if session._log_file is None:
|
|
210
|
+
return "No active log. Use: log using <path>"
|
|
211
|
+
closed_path = session._log_path
|
|
212
|
+
try:
|
|
213
|
+
session._log_file.write(f"\nLog closed {datetime.now().isoformat()}\n")
|
|
214
|
+
session._log_file.close()
|
|
215
|
+
except Exception:
|
|
216
|
+
pass
|
|
217
|
+
session._log_file = None
|
|
218
|
+
session._log_path = None
|
|
219
|
+
return f"Log closed: {closed_path}"
|
|
220
|
+
|
|
221
|
+
elif subcmd == "status":
|
|
222
|
+
if session._log_file is None:
|
|
223
|
+
return "Logging: OFF"
|
|
224
|
+
return f"Logging: ON → {session._log_path}"
|
|
225
|
+
|
|
226
|
+
# ---- History display / export ----
|
|
227
|
+
elif subcmd == "display":
|
|
228
|
+
if not session.history:
|
|
229
|
+
return "No commands in session history."
|
|
230
|
+
lines = ["\nSession Log:", "=" * 50]
|
|
231
|
+
for i, cmd_line in enumerate(session.history, 1):
|
|
232
|
+
lines.append(f" {i:>4}. {cmd_line}")
|
|
233
|
+
return "\n".join(lines)
|
|
234
|
+
|
|
235
|
+
elif subcmd == "clear":
|
|
236
|
+
session.history.clear()
|
|
237
|
+
return "Session history cleared."
|
|
238
|
+
|
|
239
|
+
else:
|
|
240
|
+
return (
|
|
241
|
+
"Usage:\n"
|
|
242
|
+
" log using <path> — start real-time logging to file\n"
|
|
243
|
+
" log status — check if logging is active\n"
|
|
244
|
+
" log close — stop logging\n"
|
|
245
|
+
" log display — show command history\n"
|
|
246
|
+
" log clear — clear command history"
|
|
247
|
+
)
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""Panel data commands: xtset, xtreg, hausman."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
from openstat.session import Session, ModelResult
|
|
8
|
+
from openstat.commands.base import command, CommandArgs, friendly_error
|
|
9
|
+
from openstat.dsl.parser import parse_formula, ParseError
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _store_panel_model(session, result, raw_model, dep, indeps, fit_kwargs=None):
|
|
13
|
+
"""Store panel model result in session."""
|
|
14
|
+
session._last_model = raw_model
|
|
15
|
+
session._last_model_vars = (dep, indeps)
|
|
16
|
+
session._last_fit_result = result
|
|
17
|
+
session._last_fit_kwargs = fit_kwargs or {}
|
|
18
|
+
md = result.to_markdown() if hasattr(result, "to_markdown") else str(result)
|
|
19
|
+
details = {
|
|
20
|
+
"n_obs": result.n_obs,
|
|
21
|
+
"params": dict(result.params),
|
|
22
|
+
"std_errors": dict(result.std_errors),
|
|
23
|
+
}
|
|
24
|
+
if result.r_squared is not None:
|
|
25
|
+
details["r_squared"] = result.r_squared
|
|
26
|
+
session.results.append(ModelResult(
|
|
27
|
+
name=result.model_type, formula=result.formula,
|
|
28
|
+
table=md, details=details,
|
|
29
|
+
))
|
|
30
|
+
output = result.summary_table()
|
|
31
|
+
if result.warnings:
|
|
32
|
+
output += "\n" + "\n".join(result.warnings)
|
|
33
|
+
return output
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@command("xtset", usage="xtset <panel_var> <time_var>")
|
|
37
|
+
def cmd_xtset(session: Session, args: str) -> str:
|
|
38
|
+
"""Declare panel structure: entity and time variables."""
|
|
39
|
+
df = session.require_data()
|
|
40
|
+
parts = args.strip().split()
|
|
41
|
+
if len(parts) < 2:
|
|
42
|
+
return "Usage: xtset <panel_var> <time_var>"
|
|
43
|
+
|
|
44
|
+
panel_var, time_var = parts[0], parts[1]
|
|
45
|
+
for v in (panel_var, time_var):
|
|
46
|
+
if v not in df.columns:
|
|
47
|
+
return f"Column not found: {v}"
|
|
48
|
+
|
|
49
|
+
n_entities = df[panel_var].n_unique()
|
|
50
|
+
n_periods = df[time_var].n_unique()
|
|
51
|
+
session._panel_var = panel_var
|
|
52
|
+
session._time_var = time_var
|
|
53
|
+
|
|
54
|
+
return (
|
|
55
|
+
f"Panel variable: {panel_var} ({n_entities} entities)\n"
|
|
56
|
+
f"Time variable: {time_var} ({n_periods} periods)\n"
|
|
57
|
+
f"Observations: {df.height}"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@command("xtreg", usage="xtreg y ~ x1 + x2, fe|re|be [--robust] [--cluster]")
|
|
62
|
+
def cmd_xtreg(session: Session, args: str) -> str:
|
|
63
|
+
"""Fit panel data regression: fixed effects, random effects, or between."""
|
|
64
|
+
df = session.require_data()
|
|
65
|
+
|
|
66
|
+
if session._panel_var is None or session._time_var is None:
|
|
67
|
+
return "Panel structure not set. Use: xtset <panel_var> <time_var>"
|
|
68
|
+
|
|
69
|
+
# Split on comma to get formula and estimator
|
|
70
|
+
if "," in args:
|
|
71
|
+
formula_part, options_part = args.rsplit(",", 1)
|
|
72
|
+
else:
|
|
73
|
+
return "Usage: xtreg y ~ x1 + x2, fe|re|be [--robust]"
|
|
74
|
+
|
|
75
|
+
ca = CommandArgs(options_part)
|
|
76
|
+
estimator = None
|
|
77
|
+
for est in ("fe", "re", "be"):
|
|
78
|
+
if est in [p.lower() for p in ca.positional]:
|
|
79
|
+
estimator = est
|
|
80
|
+
break
|
|
81
|
+
if estimator is None:
|
|
82
|
+
return "Specify estimator: fe (fixed effects), re (random effects), or be (between)"
|
|
83
|
+
|
|
84
|
+
robust = ca.has_flag("--robust") or "--robust" in formula_part
|
|
85
|
+
cluster = ca.get_option("cluster")
|
|
86
|
+
formula_clean = formula_part.replace("--robust", "").strip()
|
|
87
|
+
# Remove cluster option from formula
|
|
88
|
+
formula_clean = re.sub(r'--cluster=\S+', '', formula_clean).strip()
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
dep, indeps = parse_formula(formula_clean)
|
|
92
|
+
except ParseError as e:
|
|
93
|
+
return f"Formula error: {e}"
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
from openstat.stats.panel import fit_panel_fe, fit_panel_re, fit_panel_be
|
|
97
|
+
|
|
98
|
+
if estimator == "fe":
|
|
99
|
+
result, raw = fit_panel_fe(
|
|
100
|
+
df, dep, indeps, session._panel_var, session._time_var,
|
|
101
|
+
robust=robust, cluster=cluster,
|
|
102
|
+
)
|
|
103
|
+
elif estimator == "re":
|
|
104
|
+
result, raw = fit_panel_re(
|
|
105
|
+
df, dep, indeps, session._panel_var, session._time_var,
|
|
106
|
+
robust=robust,
|
|
107
|
+
)
|
|
108
|
+
else: # be
|
|
109
|
+
result, raw = fit_panel_be(
|
|
110
|
+
df, dep, indeps, session._panel_var, session._time_var,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Store raw model for hausman test
|
|
114
|
+
session._panel_models[estimator] = raw
|
|
115
|
+
|
|
116
|
+
return _store_panel_model(
|
|
117
|
+
session, result, raw, dep, indeps,
|
|
118
|
+
{"estimator": estimator, "robust": robust},
|
|
119
|
+
)
|
|
120
|
+
except ImportError as e:
|
|
121
|
+
return str(e)
|
|
122
|
+
except Exception as e:
|
|
123
|
+
return friendly_error(e, "xtreg")
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@command("hausman", usage="hausman")
|
|
127
|
+
def cmd_hausman(session: Session, args: str) -> str:
|
|
128
|
+
"""Hausman test: compare FE vs RE. Run both xtreg fe and xtreg re first."""
|
|
129
|
+
fe_raw = session._panel_models.get("fe")
|
|
130
|
+
re_raw = session._panel_models.get("re")
|
|
131
|
+
|
|
132
|
+
if not fe_raw or not re_raw:
|
|
133
|
+
return "Run both 'xtreg ..., fe' and 'xtreg ..., re' first."
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
from openstat.stats.panel import hausman_test
|
|
137
|
+
return hausman_test(fe_raw, re_raw)
|
|
138
|
+
except ImportError as e:
|
|
139
|
+
return str(e)
|
|
140
|
+
except Exception as e:
|
|
141
|
+
return friendly_error(e, "hausman")
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
"""PDF and Markdown export commands."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from datetime import date
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from openstat.commands.base import command, CommandArgs
|
|
10
|
+
from openstat.session import Session
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _ensure_dir(path: str) -> None:
|
|
14
|
+
os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# ── Markdown export ──────────────────────────────────────────────────────────
|
|
18
|
+
|
|
19
|
+
def _export_md(session: Session, path: str) -> str:
|
|
20
|
+
import polars as pl
|
|
21
|
+
|
|
22
|
+
lines = [
|
|
23
|
+
f"# OpenStat Results",
|
|
24
|
+
f"",
|
|
25
|
+
f"**Dataset:** {session.dataset_name or 'Unknown'} | "
|
|
26
|
+
f"**Date:** {date.today().isoformat()} | "
|
|
27
|
+
f"**Shape:** {session.shape_str}",
|
|
28
|
+
f"",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
if session.df is not None:
|
|
32
|
+
df = session.df
|
|
33
|
+
lines += ["## Dataset Overview", ""]
|
|
34
|
+
lines += [
|
|
35
|
+
f"| Property | Value |",
|
|
36
|
+
f"|---|---|",
|
|
37
|
+
f"| Rows | {df.height:,} |",
|
|
38
|
+
f"| Columns | {df.width} |",
|
|
39
|
+
f"| Missing cells | {sum(df[c].null_count() for c in df.columns)} |",
|
|
40
|
+
]
|
|
41
|
+
lines.append("")
|
|
42
|
+
|
|
43
|
+
NUMERIC = (pl.Float32, pl.Float64, pl.Int8, pl.Int16, pl.Int32, pl.Int64,
|
|
44
|
+
pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64)
|
|
45
|
+
num_cols = [c for c in df.columns if df[c].dtype in NUMERIC]
|
|
46
|
+
if num_cols:
|
|
47
|
+
lines += ["## Summary Statistics", ""]
|
|
48
|
+
lines.append("| Variable | N | Mean | SD | Min | Max |")
|
|
49
|
+
lines.append("|---|---|---|---|---|---|")
|
|
50
|
+
for c in num_cols[:30]:
|
|
51
|
+
col = df[c].drop_nulls()
|
|
52
|
+
if col.len() == 0:
|
|
53
|
+
continue
|
|
54
|
+
mean = f"{col.mean():.4f}"
|
|
55
|
+
sd = f"{col.std():.4f}" if col.len() > 1 else "—"
|
|
56
|
+
lines.append(
|
|
57
|
+
f"| {c} | {col.len()} | {mean} | {sd} "
|
|
58
|
+
f"| {col.min():.4f} | {col.max():.4f} |"
|
|
59
|
+
)
|
|
60
|
+
lines.append("")
|
|
61
|
+
|
|
62
|
+
for mr in session.results:
|
|
63
|
+
lines += [
|
|
64
|
+
f"## {mr.name}: {mr.formula}",
|
|
65
|
+
"",
|
|
66
|
+
"```",
|
|
67
|
+
mr.table,
|
|
68
|
+
"```",
|
|
69
|
+
"",
|
|
70
|
+
]
|
|
71
|
+
|
|
72
|
+
if session.plot_paths:
|
|
73
|
+
lines += ["## Figures", ""]
|
|
74
|
+
for p in session.plot_paths:
|
|
75
|
+
if os.path.exists(p):
|
|
76
|
+
lines.append(f"")
|
|
77
|
+
lines.append("")
|
|
78
|
+
|
|
79
|
+
content = "\n".join(lines)
|
|
80
|
+
_ensure_dir(path)
|
|
81
|
+
Path(path).write_text(content, encoding="utf-8")
|
|
82
|
+
return os.path.abspath(path)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
# ── PDF export ───────────────────────────────────────────────────────────────
|
|
86
|
+
|
|
87
|
+
def _export_pdf(session: Session, path: str) -> str:
|
|
88
|
+
try:
|
|
89
|
+
from reportlab.lib.pagesizes import A4
|
|
90
|
+
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
|
91
|
+
from reportlab.lib.units import cm
|
|
92
|
+
from reportlab.lib import colors
|
|
93
|
+
from reportlab.platypus import (
|
|
94
|
+
SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image,
|
|
95
|
+
HRFlowable,
|
|
96
|
+
)
|
|
97
|
+
from reportlab.lib.enums import TA_LEFT
|
|
98
|
+
except ImportError:
|
|
99
|
+
return (
|
|
100
|
+
"reportlab is required for PDF export.\n"
|
|
101
|
+
"Install: pip install reportlab"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
import polars as pl
|
|
105
|
+
|
|
106
|
+
doc = SimpleDocTemplate(
|
|
107
|
+
path,
|
|
108
|
+
pagesize=A4,
|
|
109
|
+
leftMargin=2 * cm, rightMargin=2 * cm,
|
|
110
|
+
topMargin=2 * cm, bottomMargin=2 * cm,
|
|
111
|
+
)
|
|
112
|
+
styles = getSampleStyleSheet()
|
|
113
|
+
mono = ParagraphStyle("Mono", parent=styles["Normal"], fontName="Courier", fontSize=8, leading=11)
|
|
114
|
+
h1 = styles["Heading1"]
|
|
115
|
+
h2 = styles["Heading2"]
|
|
116
|
+
normal = styles["Normal"]
|
|
117
|
+
|
|
118
|
+
story = []
|
|
119
|
+
|
|
120
|
+
# Title
|
|
121
|
+
story.append(Paragraph("OpenStat Results", h1))
|
|
122
|
+
story.append(Paragraph(
|
|
123
|
+
f"Dataset: {session.dataset_name or 'Unknown'} | "
|
|
124
|
+
f"Date: {date.today().isoformat()} | "
|
|
125
|
+
f"Shape: {session.shape_str}",
|
|
126
|
+
normal,
|
|
127
|
+
))
|
|
128
|
+
story.append(HRFlowable(width="100%"))
|
|
129
|
+
story.append(Spacer(1, 0.3 * cm))
|
|
130
|
+
|
|
131
|
+
# Dataset overview table
|
|
132
|
+
if session.df is not None:
|
|
133
|
+
df = session.df
|
|
134
|
+
story.append(Paragraph("Dataset Overview", h2))
|
|
135
|
+
NUMERIC = (pl.Float32, pl.Float64, pl.Int8, pl.Int16, pl.Int32, pl.Int64,
|
|
136
|
+
pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64)
|
|
137
|
+
overview_data = [
|
|
138
|
+
["Property", "Value"],
|
|
139
|
+
["Rows", f"{df.height:,}"],
|
|
140
|
+
["Columns", str(df.width)],
|
|
141
|
+
["Missing cells", str(sum(df[c].null_count() for c in df.columns))],
|
|
142
|
+
["Numeric columns", str(sum(1 for c in df.columns if df[c].dtype in NUMERIC))],
|
|
143
|
+
]
|
|
144
|
+
tbl = Table(overview_data, colWidths=[6 * cm, 10 * cm])
|
|
145
|
+
tbl.setStyle(TableStyle([
|
|
146
|
+
("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#4C72B0")),
|
|
147
|
+
("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
|
|
148
|
+
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
|
|
149
|
+
("GRID", (0, 0), (-1, -1), 0.5, colors.grey),
|
|
150
|
+
("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f0f4ff")]),
|
|
151
|
+
("FONTSIZE", (0, 0), (-1, -1), 9),
|
|
152
|
+
]))
|
|
153
|
+
story.append(tbl)
|
|
154
|
+
story.append(Spacer(1, 0.5 * cm))
|
|
155
|
+
|
|
156
|
+
# Summary statistics
|
|
157
|
+
num_cols = [c for c in df.columns if df[c].dtype in NUMERIC]
|
|
158
|
+
if num_cols:
|
|
159
|
+
story.append(Paragraph("Summary Statistics", h2))
|
|
160
|
+
stats_data = [["Variable", "N", "Mean", "SD", "Min", "Max"]]
|
|
161
|
+
for c in num_cols[:25]:
|
|
162
|
+
col = df[c].drop_nulls()
|
|
163
|
+
if col.len() == 0:
|
|
164
|
+
continue
|
|
165
|
+
sd_str = f"{col.std():.4f}" if col.len() > 1 else "—"
|
|
166
|
+
stats_data.append([
|
|
167
|
+
c, str(col.len()),
|
|
168
|
+
f"{col.mean():.4f}", sd_str,
|
|
169
|
+
f"{col.min():.4f}", f"{col.max():.4f}",
|
|
170
|
+
])
|
|
171
|
+
st = Table(stats_data, colWidths=[4*cm, 1.5*cm, 2.5*cm, 2.5*cm, 2.5*cm, 2.5*cm])
|
|
172
|
+
st.setStyle(TableStyle([
|
|
173
|
+
("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#4C72B0")),
|
|
174
|
+
("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
|
|
175
|
+
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
|
|
176
|
+
("GRID", (0, 0), (-1, -1), 0.5, colors.grey),
|
|
177
|
+
("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f0f4ff")]),
|
|
178
|
+
("FONTSIZE", (0, 0), (-1, -1), 8),
|
|
179
|
+
]))
|
|
180
|
+
story.append(st)
|
|
181
|
+
story.append(Spacer(1, 0.5 * cm))
|
|
182
|
+
|
|
183
|
+
# Model results
|
|
184
|
+
for mr in session.results:
|
|
185
|
+
story.append(Paragraph(f"{mr.name}: {mr.formula}", h2))
|
|
186
|
+
# Wrap long table text in monospace paragraphs
|
|
187
|
+
for line in mr.table.split("\n"):
|
|
188
|
+
story.append(Paragraph(line.replace(" ", " ") or " ", mono))
|
|
189
|
+
story.append(Spacer(1, 0.4 * cm))
|
|
190
|
+
|
|
191
|
+
# Plots
|
|
192
|
+
for plot_path in session.plot_paths:
|
|
193
|
+
if os.path.exists(plot_path):
|
|
194
|
+
story.append(Paragraph("Figure", h2))
|
|
195
|
+
try:
|
|
196
|
+
img = Image(plot_path, width=15 * cm, height=10 * cm, kind="proportional")
|
|
197
|
+
story.append(img)
|
|
198
|
+
except Exception:
|
|
199
|
+
story.append(Paragraph(f"[Plot: {plot_path}]", normal))
|
|
200
|
+
story.append(Spacer(1, 0.3 * cm))
|
|
201
|
+
|
|
202
|
+
_ensure_dir(path)
|
|
203
|
+
doc.build(story)
|
|
204
|
+
return os.path.abspath(path)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
# ── Commands ─────────────────────────────────────────────────────────────────
|
|
208
|
+
|
|
209
|
+
@command("export pdf", usage="export pdf [path]")
|
|
210
|
+
def cmd_export_pdf(session: Session, args: str) -> str:
|
|
211
|
+
"""Export results to a PDF report (requires reportlab)."""
|
|
212
|
+
ca = CommandArgs(args)
|
|
213
|
+
path = ca.positional[0] if ca.positional else "outputs/results.pdf"
|
|
214
|
+
out = _export_pdf(session, path)
|
|
215
|
+
if out.endswith(".pdf"):
|
|
216
|
+
return f"PDF saved: {out}"
|
|
217
|
+
return out
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
@command("export md", usage="export md [path]")
|
|
221
|
+
def cmd_export_md(session: Session, args: str) -> str:
|
|
222
|
+
"""Export results to a Markdown file."""
|
|
223
|
+
ca = CommandArgs(args)
|
|
224
|
+
path = ca.positional[0] if ca.positional else "outputs/results.md"
|
|
225
|
+
out = _export_md(session, path)
|
|
226
|
+
return f"Markdown saved: {out}"
|