openstat-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. openstat/__init__.py +3 -0
  2. openstat/__main__.py +4 -0
  3. openstat/backends/__init__.py +16 -0
  4. openstat/backends/duckdb_backend.py +70 -0
  5. openstat/backends/polars_backend.py +52 -0
  6. openstat/cli.py +92 -0
  7. openstat/commands/__init__.py +82 -0
  8. openstat/commands/adv_stat_cmds.py +1255 -0
  9. openstat/commands/advanced_ml_cmds.py +576 -0
  10. openstat/commands/advreg_cmds.py +207 -0
  11. openstat/commands/alias_cmds.py +135 -0
  12. openstat/commands/arch_cmds.py +82 -0
  13. openstat/commands/arules_cmds.py +111 -0
  14. openstat/commands/automodel_cmds.py +212 -0
  15. openstat/commands/backend_cmds.py +82 -0
  16. openstat/commands/base.py +170 -0
  17. openstat/commands/bayes_cmds.py +71 -0
  18. openstat/commands/causal_cmds.py +269 -0
  19. openstat/commands/cluster_cmds.py +152 -0
  20. openstat/commands/data_cmds.py +996 -0
  21. openstat/commands/datamanip_cmds.py +672 -0
  22. openstat/commands/dataquality_cmds.py +174 -0
  23. openstat/commands/datetime_cmds.py +176 -0
  24. openstat/commands/dimreduce_cmds.py +184 -0
  25. openstat/commands/discrete_cmds.py +149 -0
  26. openstat/commands/dsl_cmds.py +143 -0
  27. openstat/commands/epi_cmds.py +93 -0
  28. openstat/commands/equiv_tobit_cmds.py +94 -0
  29. openstat/commands/esttab_cmds.py +196 -0
  30. openstat/commands/export_beamer_cmds.py +142 -0
  31. openstat/commands/export_cmds.py +201 -0
  32. openstat/commands/export_extra_cmds.py +240 -0
  33. openstat/commands/factor_cmds.py +180 -0
  34. openstat/commands/groupby_cmds.py +155 -0
  35. openstat/commands/help_cmds.py +237 -0
  36. openstat/commands/i18n_cmds.py +43 -0
  37. openstat/commands/import_extra_cmds.py +561 -0
  38. openstat/commands/influence_cmds.py +134 -0
  39. openstat/commands/iv_cmds.py +106 -0
  40. openstat/commands/manova_cmds.py +105 -0
  41. openstat/commands/mediate_cmds.py +233 -0
  42. openstat/commands/meta_cmds.py +284 -0
  43. openstat/commands/mi_cmds.py +228 -0
  44. openstat/commands/mixed_cmds.py +79 -0
  45. openstat/commands/mixture_changepoint_cmds.py +166 -0
  46. openstat/commands/ml_adv_cmds.py +147 -0
  47. openstat/commands/ml_cmds.py +178 -0
  48. openstat/commands/model_eval_cmds.py +142 -0
  49. openstat/commands/network_cmds.py +288 -0
  50. openstat/commands/nlquery_cmds.py +161 -0
  51. openstat/commands/nonparam_cmds.py +149 -0
  52. openstat/commands/outreg_cmds.py +247 -0
  53. openstat/commands/panel_cmds.py +141 -0
  54. openstat/commands/pdf_cmds.py +226 -0
  55. openstat/commands/pipeline_cmds.py +319 -0
  56. openstat/commands/plot_cmds.py +189 -0
  57. openstat/commands/plugin_cmds.py +79 -0
  58. openstat/commands/posthoc_cmds.py +153 -0
  59. openstat/commands/power_cmds.py +172 -0
  60. openstat/commands/profile_cmds.py +246 -0
  61. openstat/commands/rbridge_cmds.py +81 -0
  62. openstat/commands/regex_cmds.py +104 -0
  63. openstat/commands/report_cmds.py +48 -0
  64. openstat/commands/repro_cmds.py +129 -0
  65. openstat/commands/resampling_cmds.py +109 -0
  66. openstat/commands/reshape_cmds.py +223 -0
  67. openstat/commands/sem_cmds.py +177 -0
  68. openstat/commands/stat_cmds.py +1040 -0
  69. openstat/commands/stata_import_cmds.py +215 -0
  70. openstat/commands/string_cmds.py +124 -0
  71. openstat/commands/surv_cmds.py +145 -0
  72. openstat/commands/survey_cmds.py +153 -0
  73. openstat/commands/textanalysis_cmds.py +192 -0
  74. openstat/commands/ts_adv_cmds.py +136 -0
  75. openstat/commands/ts_cmds.py +195 -0
  76. openstat/commands/tui_cmds.py +111 -0
  77. openstat/commands/ux_cmds.py +191 -0
  78. openstat/commands/validate_cmds.py +270 -0
  79. openstat/commands/viz_adv_cmds.py +312 -0
  80. openstat/commands/viz_extra_cmds.py +251 -0
  81. openstat/commands/watch_cmds.py +69 -0
  82. openstat/config.py +106 -0
  83. openstat/dsl/__init__.py +0 -0
  84. openstat/dsl/parser.py +332 -0
  85. openstat/dsl/tokenizer.py +105 -0
  86. openstat/i18n.py +120 -0
  87. openstat/io/__init__.py +0 -0
  88. openstat/io/loader.py +187 -0
  89. openstat/jupyter/__init__.py +18 -0
  90. openstat/jupyter/display.py +18 -0
  91. openstat/jupyter/magic.py +60 -0
  92. openstat/logging_config.py +59 -0
  93. openstat/plots/__init__.py +0 -0
  94. openstat/plots/plotter.py +437 -0
  95. openstat/plots/surv_plots.py +32 -0
  96. openstat/plots/ts_plots.py +59 -0
  97. openstat/plugins/__init__.py +5 -0
  98. openstat/plugins/manager.py +69 -0
  99. openstat/repl.py +457 -0
  100. openstat/reporting/__init__.py +0 -0
  101. openstat/reporting/eda.py +208 -0
  102. openstat/reporting/report.py +67 -0
  103. openstat/script_runner.py +319 -0
  104. openstat/session.py +133 -0
  105. openstat/stats/__init__.py +0 -0
  106. openstat/stats/advanced_regression.py +269 -0
  107. openstat/stats/arch_garch.py +84 -0
  108. openstat/stats/bayesian.py +103 -0
  109. openstat/stats/causal.py +258 -0
  110. openstat/stats/clustering.py +206 -0
  111. openstat/stats/discrete.py +311 -0
  112. openstat/stats/epidemiology.py +119 -0
  113. openstat/stats/equiv_tobit.py +163 -0
  114. openstat/stats/factor.py +174 -0
  115. openstat/stats/imputation.py +282 -0
  116. openstat/stats/influence.py +78 -0
  117. openstat/stats/iv.py +131 -0
  118. openstat/stats/manova.py +124 -0
  119. openstat/stats/mixed.py +128 -0
  120. openstat/stats/ml.py +275 -0
  121. openstat/stats/ml_advanced.py +117 -0
  122. openstat/stats/model_eval.py +183 -0
  123. openstat/stats/models.py +1342 -0
  124. openstat/stats/nonparametric.py +130 -0
  125. openstat/stats/panel.py +179 -0
  126. openstat/stats/power.py +295 -0
  127. openstat/stats/resampling.py +203 -0
  128. openstat/stats/survey.py +213 -0
  129. openstat/stats/survival.py +196 -0
  130. openstat/stats/timeseries.py +142 -0
  131. openstat/stats/ts_advanced.py +114 -0
  132. openstat/types.py +11 -0
  133. openstat/web/__init__.py +1 -0
  134. openstat/web/app.py +117 -0
  135. openstat/web/session_manager.py +73 -0
  136. openstat/web/static/app.js +117 -0
  137. openstat/web/static/index.html +38 -0
  138. openstat/web/static/style.css +103 -0
  139. openstat_cli-1.0.0.dist-info/METADATA +748 -0
  140. openstat_cli-1.0.0.dist-info/RECORD +143 -0
  141. openstat_cli-1.0.0.dist-info/WHEEL +4 -0
  142. openstat_cli-1.0.0.dist-info/entry_points.txt +2 -0
  143. openstat_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,247 @@
1
+ """outreg: export regression results to LaTeX or HTML tables. log: session logging."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from datetime import datetime
7
+
8
+ from openstat.commands.base import command
9
+ from openstat.session import Session
10
+
11
+
12
+ def _stata_opts(raw: str) -> tuple[list[str], dict[str, str]]:
13
+ opts: dict[str, str] = {}
14
+ for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
15
+ opts[m.group(1).lower()] = m.group(2)
16
+ rest = re.sub(r'\w+\([^)]*\)', '', raw)
17
+ positional = [t.strip(',') for t in rest.split() if t.strip(',')]
18
+ return positional, opts
19
+
20
+
21
+ def _results_to_table(results, fmt: str = "latex", stars: bool = True) -> str:
22
+ """Convert session results to LaTeX or HTML table."""
23
+ # Normalize ModelResult → dict
24
+ model_list = []
25
+ for res in results:
26
+ if isinstance(res, dict) and ("params" in res or "coefficients" in res):
27
+ model_list.append(res)
28
+ elif hasattr(res, "details"):
29
+ d = res.details
30
+ if "params" in d or "coefficients" in d:
31
+ model_list.append(d)
32
+
33
+ if not model_list:
34
+ return ""
35
+
36
+ all_params: list[str] = []
37
+ for res in model_list:
38
+ src = res.get("params") or res.get("coefficients") or {}
39
+ for p in src:
40
+ if p not in all_params:
41
+ all_params.append(p)
42
+
43
+ def _coef(res, param):
44
+ if "params" in res:
45
+ return res["params"].get(param, float("nan"))
46
+ c = res.get("coefficients", {}).get(param, {})
47
+ return c.get("mean", float("nan"))
48
+
49
+ def _se(res, param):
50
+ if "std_errors" in res:
51
+ return res["std_errors"].get(param, float("nan"))
52
+ c = res.get("coefficients", {}).get(param, {})
53
+ return c.get("std", float("nan"))
54
+
55
+ def _pval(res, param):
56
+ if "p_values" in res:
57
+ return res["p_values"].get(param, float("nan"))
58
+ return float("nan")
59
+
60
+ def _star(p):
61
+ if p != p: return ""
62
+ if p < 0.001: return "^{***}" if fmt == "latex" else "***"
63
+ if p < 0.01: return "^{**}" if fmt == "latex" else "**"
64
+ if p < 0.05: return "^{*}" if fmt == "latex" else "*"
65
+ return ""
66
+
67
+ n_models = len(model_list)
68
+
69
+ if fmt == "latex":
70
+ col_spec = "l" + "c" * n_models
71
+ lines = [
72
+ "\\begin{table}[htbp]",
73
+ "\\centering",
74
+ f"\\begin{{tabular}}{{{col_spec}}}",
75
+ "\\hline\\hline",
76
+ ]
77
+ header = " & " + " & ".join(f"({i+1})" for i in range(n_models)) + " \\\\"
78
+ lines.append(header)
79
+ lines.append("\\hline")
80
+ for param in all_params:
81
+ coef_row = param.replace("_", "\\_") + " & "
82
+ se_row = " & "
83
+ vals = []
84
+ ses = []
85
+ for res in model_list:
86
+ c = _coef(res, param)
87
+ s = _se(res, param)
88
+ p = _pval(res, param)
89
+ st = _star(p) if stars else ""
90
+ vals.append(f"{c:.4f}{st}" if c == c else "")
91
+ ses.append(f"({s:.4f})" if s == s else "")
92
+ coef_row += " & ".join(vals) + " \\\\"
93
+ se_row += " & ".join(ses) + " \\\\"
94
+ lines.append(coef_row)
95
+ lines.append(se_row)
96
+ lines.append("\\hline")
97
+ n_row = "N & " + " & ".join(str(res.get("n_obs", "")) for res in model_list) + " \\\\"
98
+ r2_row = "$R^2$ & " + " & ".join(
99
+ f"{res.get('r_squared', float('nan')):.4f}" if isinstance(res.get('r_squared'), float) else ""
100
+ for res in model_list
101
+ ) + " \\\\"
102
+ lines.extend([n_row, r2_row, "\\hline\\hline"])
103
+ if stars:
104
+ lines.append("\\multicolumn{" + str(n_models + 1) + "}{l}{\\footnotesize $^{*}p<0.05$, $^{**}p<0.01$, $^{***}p<0.001$}")
105
+ lines.extend(["\\end{tabular}", "\\end{table}"])
106
+ return "\n".join(lines)
107
+
108
+ else: # html
109
+ lines = ["<table border='1' cellpadding='5' style='border-collapse:collapse'>"]
110
+ lines.append("<tr><th>Variable</th>" + "".join(f"<th>({i+1})</th>" for i in range(n_models)) + "</tr>")
111
+ for param in all_params:
112
+ coef_cells = ""
113
+ se_cells = ""
114
+ for res in model_list:
115
+ c = _coef(res, param)
116
+ s = _se(res, param)
117
+ p = _pval(res, param)
118
+ st = _star(p) if stars else ""
119
+ coef_cells += f"<td align='center'>{f'{c:.4f}{st}' if c==c else ''}</td>"
120
+ se_cells += f"<td align='center'><small>{f'({s:.4f})' if s==s else ''}</small></td>"
121
+ lines.append(f"<tr><td><b>{param}</b></td>{coef_cells}</tr>")
122
+ lines.append(f"<tr><td></td>{se_cells}</tr>")
123
+ n_row = "<tr><td>N</td>" + "".join(f"<td align='center'>{res.get('n_obs','')}</td>" for res in model_list) + "</tr>"
124
+ def _r2_cell(res):
125
+ r2 = res.get("r_squared", None)
126
+ val = f"{r2:.4f}" if isinstance(r2, float) else ""
127
+ return f"<td align='center'>{val}</td>"
128
+ r2_row = "<tr><td>R\u00b2</td>" + "".join(_r2_cell(res) for res in model_list) + "</tr>"
129
+ lines.extend([n_row, r2_row, "</table>"])
130
+ if stars: lines.append("<p><small>* p&lt;0.05, ** p&lt;0.01, *** p&lt;0.001</small></p>")
131
+ return "\n".join(lines)
132
+
133
+
134
+ @command("outreg", usage="outreg [using path] [format(latex|html)] [--stars]")
135
+ def cmd_outreg(session: Session, args: str) -> str:
136
+ """Export regression comparison table to LaTeX or HTML."""
137
+ positional, opts = _stata_opts(args)
138
+ fmt = opts.get("format", "latex")
139
+ show_stars = "stars" in args or "--stars" in args
140
+ path = opts.get("using")
141
+ # Also accept: outreg using file.tex
142
+ if not path and "using" in positional:
143
+ idx = positional.index("using")
144
+ if idx + 1 < len(positional):
145
+ path = positional[idx + 1]
146
+
147
+ if not session.results:
148
+ return "No stored results. Run regression commands first."
149
+
150
+ table = _results_to_table(session.results, fmt=fmt, stars=show_stars)
151
+ if not table:
152
+ return "No regression results with coefficients found."
153
+
154
+ if path:
155
+ try:
156
+ import os
157
+ os.makedirs(os.path.dirname(path) if os.path.dirname(path) else ".", exist_ok=True)
158
+ with open(path, "w") as f:
159
+ f.write(table)
160
+ return f"outreg: table saved to {path} ({fmt} format)"
161
+ except Exception as exc:
162
+ return f"outreg write error: {exc}"
163
+
164
+ return f"\noutreg ({fmt}):\n\n{table}"
165
+
166
+
167
+ @command("log", usage="log using <path> | log close | log status | log display | log clear")
168
+ def cmd_log(session: Session, args: str) -> str:
169
+ """Session logging: real-time capture or history export.
170
+
171
+ Examples:
172
+ log using analysis.log — start real-time logging
173
+ log status — check if logging is active
174
+ log close — stop real-time log and close file
175
+ log display — print command history to screen
176
+ log clear — clear command history
177
+ """
178
+ import os
179
+ from pathlib import Path
180
+
181
+ stripped = args.strip()
182
+ positional, opts = _stata_opts(args)
183
+ subcmd = positional[0].lower() if positional else "display"
184
+
185
+ # ---- Real-time logging ----
186
+ if subcmd == "using":
187
+ log_path = positional[1] if len(positional) >= 2 else opts.get("using", "outputs/session.log")
188
+ # Close existing real-time log if open
189
+ if session._log_file is not None:
190
+ try:
191
+ session._log_file.write(f"\nLog replaced {datetime.now().isoformat()}\n")
192
+ session._log_file.close()
193
+ except Exception:
194
+ pass
195
+ path = Path(log_path)
196
+ path.parent.mkdir(parents=True, exist_ok=True)
197
+ try:
198
+ session._log_file = open(path, "w", encoding="utf-8")
199
+ session._log_path = str(path)
200
+ session._log_file.write("OpenStat session log\n")
201
+ session._log_file.write(f"Started: {datetime.now().isoformat()}\n")
202
+ session._log_file.write("=" * 60 + "\n\n")
203
+ session._log_file.flush()
204
+ return f"Log opened: {path}"
205
+ except OSError as exc:
206
+ return f"log error: {exc}"
207
+
208
+ elif subcmd in ("close", "off"):
209
+ if session._log_file is None:
210
+ return "No active log. Use: log using <path>"
211
+ closed_path = session._log_path
212
+ try:
213
+ session._log_file.write(f"\nLog closed {datetime.now().isoformat()}\n")
214
+ session._log_file.close()
215
+ except Exception:
216
+ pass
217
+ session._log_file = None
218
+ session._log_path = None
219
+ return f"Log closed: {closed_path}"
220
+
221
+ elif subcmd == "status":
222
+ if session._log_file is None:
223
+ return "Logging: OFF"
224
+ return f"Logging: ON → {session._log_path}"
225
+
226
+ # ---- History display / export ----
227
+ elif subcmd == "display":
228
+ if not session.history:
229
+ return "No commands in session history."
230
+ lines = ["\nSession Log:", "=" * 50]
231
+ for i, cmd_line in enumerate(session.history, 1):
232
+ lines.append(f" {i:>4}. {cmd_line}")
233
+ return "\n".join(lines)
234
+
235
+ elif subcmd == "clear":
236
+ session.history.clear()
237
+ return "Session history cleared."
238
+
239
+ else:
240
+ return (
241
+ "Usage:\n"
242
+ " log using <path> — start real-time logging to file\n"
243
+ " log status — check if logging is active\n"
244
+ " log close — stop logging\n"
245
+ " log display — show command history\n"
246
+ " log clear — clear command history"
247
+ )
@@ -0,0 +1,141 @@
1
+ """Panel data commands: xtset, xtreg, hausman."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ from openstat.session import Session, ModelResult
8
+ from openstat.commands.base import command, CommandArgs, friendly_error
9
+ from openstat.dsl.parser import parse_formula, ParseError
10
+
11
+
12
+ def _store_panel_model(session, result, raw_model, dep, indeps, fit_kwargs=None):
13
+ """Store panel model result in session."""
14
+ session._last_model = raw_model
15
+ session._last_model_vars = (dep, indeps)
16
+ session._last_fit_result = result
17
+ session._last_fit_kwargs = fit_kwargs or {}
18
+ md = result.to_markdown() if hasattr(result, "to_markdown") else str(result)
19
+ details = {
20
+ "n_obs": result.n_obs,
21
+ "params": dict(result.params),
22
+ "std_errors": dict(result.std_errors),
23
+ }
24
+ if result.r_squared is not None:
25
+ details["r_squared"] = result.r_squared
26
+ session.results.append(ModelResult(
27
+ name=result.model_type, formula=result.formula,
28
+ table=md, details=details,
29
+ ))
30
+ output = result.summary_table()
31
+ if result.warnings:
32
+ output += "\n" + "\n".join(result.warnings)
33
+ return output
34
+
35
+
36
+ @command("xtset", usage="xtset <panel_var> <time_var>")
37
+ def cmd_xtset(session: Session, args: str) -> str:
38
+ """Declare panel structure: entity and time variables."""
39
+ df = session.require_data()
40
+ parts = args.strip().split()
41
+ if len(parts) < 2:
42
+ return "Usage: xtset <panel_var> <time_var>"
43
+
44
+ panel_var, time_var = parts[0], parts[1]
45
+ for v in (panel_var, time_var):
46
+ if v not in df.columns:
47
+ return f"Column not found: {v}"
48
+
49
+ n_entities = df[panel_var].n_unique()
50
+ n_periods = df[time_var].n_unique()
51
+ session._panel_var = panel_var
52
+ session._time_var = time_var
53
+
54
+ return (
55
+ f"Panel variable: {panel_var} ({n_entities} entities)\n"
56
+ f"Time variable: {time_var} ({n_periods} periods)\n"
57
+ f"Observations: {df.height}"
58
+ )
59
+
60
+
61
+ @command("xtreg", usage="xtreg y ~ x1 + x2, fe|re|be [--robust] [--cluster]")
62
+ def cmd_xtreg(session: Session, args: str) -> str:
63
+ """Fit panel data regression: fixed effects, random effects, or between."""
64
+ df = session.require_data()
65
+
66
+ if session._panel_var is None or session._time_var is None:
67
+ return "Panel structure not set. Use: xtset <panel_var> <time_var>"
68
+
69
+ # Split on comma to get formula and estimator
70
+ if "," in args:
71
+ formula_part, options_part = args.rsplit(",", 1)
72
+ else:
73
+ return "Usage: xtreg y ~ x1 + x2, fe|re|be [--robust]"
74
+
75
+ ca = CommandArgs(options_part)
76
+ estimator = None
77
+ for est in ("fe", "re", "be"):
78
+ if est in [p.lower() for p in ca.positional]:
79
+ estimator = est
80
+ break
81
+ if estimator is None:
82
+ return "Specify estimator: fe (fixed effects), re (random effects), or be (between)"
83
+
84
+ robust = ca.has_flag("--robust") or "--robust" in formula_part
85
+ cluster = ca.get_option("cluster")
86
+ formula_clean = formula_part.replace("--robust", "").strip()
87
+ # Remove cluster option from formula
88
+ formula_clean = re.sub(r'--cluster=\S+', '', formula_clean).strip()
89
+
90
+ try:
91
+ dep, indeps = parse_formula(formula_clean)
92
+ except ParseError as e:
93
+ return f"Formula error: {e}"
94
+
95
+ try:
96
+ from openstat.stats.panel import fit_panel_fe, fit_panel_re, fit_panel_be
97
+
98
+ if estimator == "fe":
99
+ result, raw = fit_panel_fe(
100
+ df, dep, indeps, session._panel_var, session._time_var,
101
+ robust=robust, cluster=cluster,
102
+ )
103
+ elif estimator == "re":
104
+ result, raw = fit_panel_re(
105
+ df, dep, indeps, session._panel_var, session._time_var,
106
+ robust=robust,
107
+ )
108
+ else: # be
109
+ result, raw = fit_panel_be(
110
+ df, dep, indeps, session._panel_var, session._time_var,
111
+ )
112
+
113
+ # Store raw model for hausman test
114
+ session._panel_models[estimator] = raw
115
+
116
+ return _store_panel_model(
117
+ session, result, raw, dep, indeps,
118
+ {"estimator": estimator, "robust": robust},
119
+ )
120
+ except ImportError as e:
121
+ return str(e)
122
+ except Exception as e:
123
+ return friendly_error(e, "xtreg")
124
+
125
+
126
+ @command("hausman", usage="hausman")
127
+ def cmd_hausman(session: Session, args: str) -> str:
128
+ """Hausman test: compare FE vs RE. Run both xtreg fe and xtreg re first."""
129
+ fe_raw = session._panel_models.get("fe")
130
+ re_raw = session._panel_models.get("re")
131
+
132
+ if not fe_raw or not re_raw:
133
+ return "Run both 'xtreg ..., fe' and 'xtreg ..., re' first."
134
+
135
+ try:
136
+ from openstat.stats.panel import hausman_test
137
+ return hausman_test(fe_raw, re_raw)
138
+ except ImportError as e:
139
+ return str(e)
140
+ except Exception as e:
141
+ return friendly_error(e, "hausman")
@@ -0,0 +1,226 @@
1
+ """PDF and Markdown export commands."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from datetime import date
7
+ from pathlib import Path
8
+
9
+ from openstat.commands.base import command, CommandArgs
10
+ from openstat.session import Session
11
+
12
+
13
+ def _ensure_dir(path: str) -> None:
14
+ os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
15
+
16
+
17
+ # ── Markdown export ──────────────────────────────────────────────────────────
18
+
19
+ def _export_md(session: Session, path: str) -> str:
20
+ import polars as pl
21
+
22
+ lines = [
23
+ f"# OpenStat Results",
24
+ f"",
25
+ f"**Dataset:** {session.dataset_name or 'Unknown'} | "
26
+ f"**Date:** {date.today().isoformat()} | "
27
+ f"**Shape:** {session.shape_str}",
28
+ f"",
29
+ ]
30
+
31
+ if session.df is not None:
32
+ df = session.df
33
+ lines += ["## Dataset Overview", ""]
34
+ lines += [
35
+ f"| Property | Value |",
36
+ f"|---|---|",
37
+ f"| Rows | {df.height:,} |",
38
+ f"| Columns | {df.width} |",
39
+ f"| Missing cells | {sum(df[c].null_count() for c in df.columns)} |",
40
+ ]
41
+ lines.append("")
42
+
43
+ NUMERIC = (pl.Float32, pl.Float64, pl.Int8, pl.Int16, pl.Int32, pl.Int64,
44
+ pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64)
45
+ num_cols = [c for c in df.columns if df[c].dtype in NUMERIC]
46
+ if num_cols:
47
+ lines += ["## Summary Statistics", ""]
48
+ lines.append("| Variable | N | Mean | SD | Min | Max |")
49
+ lines.append("|---|---|---|---|---|---|")
50
+ for c in num_cols[:30]:
51
+ col = df[c].drop_nulls()
52
+ if col.len() == 0:
53
+ continue
54
+ mean = f"{col.mean():.4f}"
55
+ sd = f"{col.std():.4f}" if col.len() > 1 else "—"
56
+ lines.append(
57
+ f"| {c} | {col.len()} | {mean} | {sd} "
58
+ f"| {col.min():.4f} | {col.max():.4f} |"
59
+ )
60
+ lines.append("")
61
+
62
+ for mr in session.results:
63
+ lines += [
64
+ f"## {mr.name}: {mr.formula}",
65
+ "",
66
+ "```",
67
+ mr.table,
68
+ "```",
69
+ "",
70
+ ]
71
+
72
+ if session.plot_paths:
73
+ lines += ["## Figures", ""]
74
+ for p in session.plot_paths:
75
+ if os.path.exists(p):
76
+ lines.append(f"![Figure]({p})")
77
+ lines.append("")
78
+
79
+ content = "\n".join(lines)
80
+ _ensure_dir(path)
81
+ Path(path).write_text(content, encoding="utf-8")
82
+ return os.path.abspath(path)
83
+
84
+
85
+ # ── PDF export ───────────────────────────────────────────────────────────────
86
+
87
+ def _export_pdf(session: Session, path: str) -> str:
88
+ try:
89
+ from reportlab.lib.pagesizes import A4
90
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
91
+ from reportlab.lib.units import cm
92
+ from reportlab.lib import colors
93
+ from reportlab.platypus import (
94
+ SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, Image,
95
+ HRFlowable,
96
+ )
97
+ from reportlab.lib.enums import TA_LEFT
98
+ except ImportError:
99
+ return (
100
+ "reportlab is required for PDF export.\n"
101
+ "Install: pip install reportlab"
102
+ )
103
+
104
+ import polars as pl
105
+
106
+ doc = SimpleDocTemplate(
107
+ path,
108
+ pagesize=A4,
109
+ leftMargin=2 * cm, rightMargin=2 * cm,
110
+ topMargin=2 * cm, bottomMargin=2 * cm,
111
+ )
112
+ styles = getSampleStyleSheet()
113
+ mono = ParagraphStyle("Mono", parent=styles["Normal"], fontName="Courier", fontSize=8, leading=11)
114
+ h1 = styles["Heading1"]
115
+ h2 = styles["Heading2"]
116
+ normal = styles["Normal"]
117
+
118
+ story = []
119
+
120
+ # Title
121
+ story.append(Paragraph("OpenStat Results", h1))
122
+ story.append(Paragraph(
123
+ f"Dataset: {session.dataset_name or 'Unknown'} &nbsp;|&nbsp; "
124
+ f"Date: {date.today().isoformat()} &nbsp;|&nbsp; "
125
+ f"Shape: {session.shape_str}",
126
+ normal,
127
+ ))
128
+ story.append(HRFlowable(width="100%"))
129
+ story.append(Spacer(1, 0.3 * cm))
130
+
131
+ # Dataset overview table
132
+ if session.df is not None:
133
+ df = session.df
134
+ story.append(Paragraph("Dataset Overview", h2))
135
+ NUMERIC = (pl.Float32, pl.Float64, pl.Int8, pl.Int16, pl.Int32, pl.Int64,
136
+ pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64)
137
+ overview_data = [
138
+ ["Property", "Value"],
139
+ ["Rows", f"{df.height:,}"],
140
+ ["Columns", str(df.width)],
141
+ ["Missing cells", str(sum(df[c].null_count() for c in df.columns))],
142
+ ["Numeric columns", str(sum(1 for c in df.columns if df[c].dtype in NUMERIC))],
143
+ ]
144
+ tbl = Table(overview_data, colWidths=[6 * cm, 10 * cm])
145
+ tbl.setStyle(TableStyle([
146
+ ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#4C72B0")),
147
+ ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
148
+ ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
149
+ ("GRID", (0, 0), (-1, -1), 0.5, colors.grey),
150
+ ("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f0f4ff")]),
151
+ ("FONTSIZE", (0, 0), (-1, -1), 9),
152
+ ]))
153
+ story.append(tbl)
154
+ story.append(Spacer(1, 0.5 * cm))
155
+
156
+ # Summary statistics
157
+ num_cols = [c for c in df.columns if df[c].dtype in NUMERIC]
158
+ if num_cols:
159
+ story.append(Paragraph("Summary Statistics", h2))
160
+ stats_data = [["Variable", "N", "Mean", "SD", "Min", "Max"]]
161
+ for c in num_cols[:25]:
162
+ col = df[c].drop_nulls()
163
+ if col.len() == 0:
164
+ continue
165
+ sd_str = f"{col.std():.4f}" if col.len() > 1 else "—"
166
+ stats_data.append([
167
+ c, str(col.len()),
168
+ f"{col.mean():.4f}", sd_str,
169
+ f"{col.min():.4f}", f"{col.max():.4f}",
170
+ ])
171
+ st = Table(stats_data, colWidths=[4*cm, 1.5*cm, 2.5*cm, 2.5*cm, 2.5*cm, 2.5*cm])
172
+ st.setStyle(TableStyle([
173
+ ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#4C72B0")),
174
+ ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
175
+ ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
176
+ ("GRID", (0, 0), (-1, -1), 0.5, colors.grey),
177
+ ("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f0f4ff")]),
178
+ ("FONTSIZE", (0, 0), (-1, -1), 8),
179
+ ]))
180
+ story.append(st)
181
+ story.append(Spacer(1, 0.5 * cm))
182
+
183
+ # Model results
184
+ for mr in session.results:
185
+ story.append(Paragraph(f"{mr.name}: {mr.formula}", h2))
186
+ # Wrap long table text in monospace paragraphs
187
+ for line in mr.table.split("\n"):
188
+ story.append(Paragraph(line.replace(" ", "&nbsp;") or "&nbsp;", mono))
189
+ story.append(Spacer(1, 0.4 * cm))
190
+
191
+ # Plots
192
+ for plot_path in session.plot_paths:
193
+ if os.path.exists(plot_path):
194
+ story.append(Paragraph("Figure", h2))
195
+ try:
196
+ img = Image(plot_path, width=15 * cm, height=10 * cm, kind="proportional")
197
+ story.append(img)
198
+ except Exception:
199
+ story.append(Paragraph(f"[Plot: {plot_path}]", normal))
200
+ story.append(Spacer(1, 0.3 * cm))
201
+
202
+ _ensure_dir(path)
203
+ doc.build(story)
204
+ return os.path.abspath(path)
205
+
206
+
207
+ # ── Commands ─────────────────────────────────────────────────────────────────
208
+
209
+ @command("export pdf", usage="export pdf [path]")
210
+ def cmd_export_pdf(session: Session, args: str) -> str:
211
+ """Export results to a PDF report (requires reportlab)."""
212
+ ca = CommandArgs(args)
213
+ path = ca.positional[0] if ca.positional else "outputs/results.pdf"
214
+ out = _export_pdf(session, path)
215
+ if out.endswith(".pdf"):
216
+ return f"PDF saved: {out}"
217
+ return out
218
+
219
+
220
+ @command("export md", usage="export md [path]")
221
+ def cmd_export_md(session: Session, args: str) -> str:
222
+ """Export results to a Markdown file."""
223
+ ca = CommandArgs(args)
224
+ path = ca.positional[0] if ca.positional else "outputs/results.md"
225
+ out = _export_md(session, path)
226
+ return f"Markdown saved: {out}"