openstat-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstat/__init__.py +3 -0
- openstat/__main__.py +4 -0
- openstat/backends/__init__.py +16 -0
- openstat/backends/duckdb_backend.py +70 -0
- openstat/backends/polars_backend.py +52 -0
- openstat/cli.py +92 -0
- openstat/commands/__init__.py +82 -0
- openstat/commands/adv_stat_cmds.py +1255 -0
- openstat/commands/advanced_ml_cmds.py +576 -0
- openstat/commands/advreg_cmds.py +207 -0
- openstat/commands/alias_cmds.py +135 -0
- openstat/commands/arch_cmds.py +82 -0
- openstat/commands/arules_cmds.py +111 -0
- openstat/commands/automodel_cmds.py +212 -0
- openstat/commands/backend_cmds.py +82 -0
- openstat/commands/base.py +170 -0
- openstat/commands/bayes_cmds.py +71 -0
- openstat/commands/causal_cmds.py +269 -0
- openstat/commands/cluster_cmds.py +152 -0
- openstat/commands/data_cmds.py +996 -0
- openstat/commands/datamanip_cmds.py +672 -0
- openstat/commands/dataquality_cmds.py +174 -0
- openstat/commands/datetime_cmds.py +176 -0
- openstat/commands/dimreduce_cmds.py +184 -0
- openstat/commands/discrete_cmds.py +149 -0
- openstat/commands/dsl_cmds.py +143 -0
- openstat/commands/epi_cmds.py +93 -0
- openstat/commands/equiv_tobit_cmds.py +94 -0
- openstat/commands/esttab_cmds.py +196 -0
- openstat/commands/export_beamer_cmds.py +142 -0
- openstat/commands/export_cmds.py +201 -0
- openstat/commands/export_extra_cmds.py +240 -0
- openstat/commands/factor_cmds.py +180 -0
- openstat/commands/groupby_cmds.py +155 -0
- openstat/commands/help_cmds.py +237 -0
- openstat/commands/i18n_cmds.py +43 -0
- openstat/commands/import_extra_cmds.py +561 -0
- openstat/commands/influence_cmds.py +134 -0
- openstat/commands/iv_cmds.py +106 -0
- openstat/commands/manova_cmds.py +105 -0
- openstat/commands/mediate_cmds.py +233 -0
- openstat/commands/meta_cmds.py +284 -0
- openstat/commands/mi_cmds.py +228 -0
- openstat/commands/mixed_cmds.py +79 -0
- openstat/commands/mixture_changepoint_cmds.py +166 -0
- openstat/commands/ml_adv_cmds.py +147 -0
- openstat/commands/ml_cmds.py +178 -0
- openstat/commands/model_eval_cmds.py +142 -0
- openstat/commands/network_cmds.py +288 -0
- openstat/commands/nlquery_cmds.py +161 -0
- openstat/commands/nonparam_cmds.py +149 -0
- openstat/commands/outreg_cmds.py +247 -0
- openstat/commands/panel_cmds.py +141 -0
- openstat/commands/pdf_cmds.py +226 -0
- openstat/commands/pipeline_cmds.py +319 -0
- openstat/commands/plot_cmds.py +189 -0
- openstat/commands/plugin_cmds.py +79 -0
- openstat/commands/posthoc_cmds.py +153 -0
- openstat/commands/power_cmds.py +172 -0
- openstat/commands/profile_cmds.py +246 -0
- openstat/commands/rbridge_cmds.py +81 -0
- openstat/commands/regex_cmds.py +104 -0
- openstat/commands/report_cmds.py +48 -0
- openstat/commands/repro_cmds.py +129 -0
- openstat/commands/resampling_cmds.py +109 -0
- openstat/commands/reshape_cmds.py +223 -0
- openstat/commands/sem_cmds.py +177 -0
- openstat/commands/stat_cmds.py +1040 -0
- openstat/commands/stata_import_cmds.py +215 -0
- openstat/commands/string_cmds.py +124 -0
- openstat/commands/surv_cmds.py +145 -0
- openstat/commands/survey_cmds.py +153 -0
- openstat/commands/textanalysis_cmds.py +192 -0
- openstat/commands/ts_adv_cmds.py +136 -0
- openstat/commands/ts_cmds.py +195 -0
- openstat/commands/tui_cmds.py +111 -0
- openstat/commands/ux_cmds.py +191 -0
- openstat/commands/validate_cmds.py +270 -0
- openstat/commands/viz_adv_cmds.py +312 -0
- openstat/commands/viz_extra_cmds.py +251 -0
- openstat/commands/watch_cmds.py +69 -0
- openstat/config.py +106 -0
- openstat/dsl/__init__.py +0 -0
- openstat/dsl/parser.py +332 -0
- openstat/dsl/tokenizer.py +105 -0
- openstat/i18n.py +120 -0
- openstat/io/__init__.py +0 -0
- openstat/io/loader.py +187 -0
- openstat/jupyter/__init__.py +18 -0
- openstat/jupyter/display.py +18 -0
- openstat/jupyter/magic.py +60 -0
- openstat/logging_config.py +59 -0
- openstat/plots/__init__.py +0 -0
- openstat/plots/plotter.py +437 -0
- openstat/plots/surv_plots.py +32 -0
- openstat/plots/ts_plots.py +59 -0
- openstat/plugins/__init__.py +5 -0
- openstat/plugins/manager.py +69 -0
- openstat/repl.py +457 -0
- openstat/reporting/__init__.py +0 -0
- openstat/reporting/eda.py +208 -0
- openstat/reporting/report.py +67 -0
- openstat/script_runner.py +319 -0
- openstat/session.py +133 -0
- openstat/stats/__init__.py +0 -0
- openstat/stats/advanced_regression.py +269 -0
- openstat/stats/arch_garch.py +84 -0
- openstat/stats/bayesian.py +103 -0
- openstat/stats/causal.py +258 -0
- openstat/stats/clustering.py +206 -0
- openstat/stats/discrete.py +311 -0
- openstat/stats/epidemiology.py +119 -0
- openstat/stats/equiv_tobit.py +163 -0
- openstat/stats/factor.py +174 -0
- openstat/stats/imputation.py +282 -0
- openstat/stats/influence.py +78 -0
- openstat/stats/iv.py +131 -0
- openstat/stats/manova.py +124 -0
- openstat/stats/mixed.py +128 -0
- openstat/stats/ml.py +275 -0
- openstat/stats/ml_advanced.py +117 -0
- openstat/stats/model_eval.py +183 -0
- openstat/stats/models.py +1342 -0
- openstat/stats/nonparametric.py +130 -0
- openstat/stats/panel.py +179 -0
- openstat/stats/power.py +295 -0
- openstat/stats/resampling.py +203 -0
- openstat/stats/survey.py +213 -0
- openstat/stats/survival.py +196 -0
- openstat/stats/timeseries.py +142 -0
- openstat/stats/ts_advanced.py +114 -0
- openstat/types.py +11 -0
- openstat/web/__init__.py +1 -0
- openstat/web/app.py +117 -0
- openstat/web/session_manager.py +73 -0
- openstat/web/static/app.js +117 -0
- openstat/web/static/index.html +38 -0
- openstat/web/static/style.css +103 -0
- openstat_cli-1.0.0.dist-info/METADATA +748 -0
- openstat_cli-1.0.0.dist-info/RECORD +143 -0
- openstat_cli-1.0.0.dist-info/WHEEL +4 -0
- openstat_cli-1.0.0.dist-info/entry_points.txt +2 -0
- openstat_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
"""Export commands: export docx, export pptx."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from datetime import date
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from openstat.commands.base import command, CommandArgs
|
|
10
|
+
from openstat.session import Session
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _ensure_dir(path: str) -> None:
|
|
14
|
+
os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# ── Word (.docx) ───────────────────────────────────────────────────────────
|
|
18
|
+
|
|
19
|
+
def _export_docx(session: Session, path: str) -> str:
|
|
20
|
+
try:
|
|
21
|
+
from docx import Document
|
|
22
|
+
from docx.shared import Pt, RGBColor
|
|
23
|
+
except ImportError:
|
|
24
|
+
return (
|
|
25
|
+
"python-docx is required for Word export.\n"
|
|
26
|
+
"Install: pip install python-docx"
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
doc = Document()
|
|
30
|
+
doc.add_heading("OpenStat Results", 0)
|
|
31
|
+
doc.add_paragraph(
|
|
32
|
+
f"Dataset: {session.dataset_name or 'Unknown'} | "
|
|
33
|
+
f"Date: {date.today().isoformat()} | "
|
|
34
|
+
f"Shape: {session.shape_str}"
|
|
35
|
+
)
|
|
36
|
+
doc.add_paragraph()
|
|
37
|
+
|
|
38
|
+
# Dataset overview table
|
|
39
|
+
doc.add_heading("Dataset Overview", level=1)
|
|
40
|
+
if session.df is not None:
|
|
41
|
+
df = session.df
|
|
42
|
+
tbl = doc.add_table(rows=1, cols=2)
|
|
43
|
+
tbl.style = "Table Grid"
|
|
44
|
+
hdr = tbl.rows[0].cells
|
|
45
|
+
hdr[0].text = "Property"
|
|
46
|
+
hdr[1].text = "Value"
|
|
47
|
+
for label, val in [
|
|
48
|
+
("Rows", str(df.height)),
|
|
49
|
+
("Columns", str(df.width)),
|
|
50
|
+
("Missing cells", str(sum(df[c].null_count() for c in df.columns))),
|
|
51
|
+
("Numeric columns", str(sum(1 for c in df.columns if df[c].dtype in (
|
|
52
|
+
__import__("polars").Float32, __import__("polars").Float64,
|
|
53
|
+
__import__("polars").Int32, __import__("polars").Int64,
|
|
54
|
+
)))),
|
|
55
|
+
]:
|
|
56
|
+
row = tbl.add_row().cells
|
|
57
|
+
row[0].text = label
|
|
58
|
+
row[1].text = val
|
|
59
|
+
doc.add_paragraph()
|
|
60
|
+
|
|
61
|
+
# Summary statistics table
|
|
62
|
+
import polars as pl
|
|
63
|
+
NUMERIC = (pl.Float32, pl.Float64, pl.Int8, pl.Int16, pl.Int32, pl.Int64,
|
|
64
|
+
pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64)
|
|
65
|
+
num_cols = [c for c in df.columns if df[c].dtype in NUMERIC]
|
|
66
|
+
if num_cols:
|
|
67
|
+
doc.add_heading("Summary Statistics", level=2)
|
|
68
|
+
stats_tbl = doc.add_table(rows=1, cols=5)
|
|
69
|
+
stats_tbl.style = "Table Grid"
|
|
70
|
+
for i, hdr_text in enumerate(["Variable", "N", "Mean", "SD", "Min–Max"]):
|
|
71
|
+
stats_tbl.rows[0].cells[i].text = hdr_text
|
|
72
|
+
for c in num_cols[:20]: # cap at 20 rows
|
|
73
|
+
col = df[c].drop_nulls()
|
|
74
|
+
if col.len() == 0:
|
|
75
|
+
continue
|
|
76
|
+
cells = stats_tbl.add_row().cells
|
|
77
|
+
cells[0].text = c
|
|
78
|
+
cells[1].text = str(col.len())
|
|
79
|
+
cells[2].text = f"{col.mean():.4f}"
|
|
80
|
+
cells[3].text = f"{col.std():.4f}" if col.len() > 1 else "—"
|
|
81
|
+
cells[4].text = f"{col.min():.2f} – {col.max():.2f}"
|
|
82
|
+
|
|
83
|
+
# Model results
|
|
84
|
+
for mr in session.results:
|
|
85
|
+
doc.add_heading(f"{mr.name}: {mr.formula}", level=1)
|
|
86
|
+
doc.add_paragraph(mr.table, style="No Spacing")
|
|
87
|
+
doc.add_paragraph()
|
|
88
|
+
|
|
89
|
+
# Plots
|
|
90
|
+
for plot_path in session.plot_paths:
|
|
91
|
+
if os.path.exists(plot_path):
|
|
92
|
+
doc.add_heading("Figure", level=2)
|
|
93
|
+
try:
|
|
94
|
+
doc.add_picture(plot_path)
|
|
95
|
+
except Exception:
|
|
96
|
+
doc.add_paragraph(f"[Plot: {plot_path}]")
|
|
97
|
+
|
|
98
|
+
_ensure_dir(path)
|
|
99
|
+
doc.save(path)
|
|
100
|
+
return os.path.abspath(path)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# ── PowerPoint (.pptx) ────────────────────────────────────────────────────
|
|
104
|
+
|
|
105
|
+
def _export_pptx(session: Session, path: str) -> str:
|
|
106
|
+
try:
|
|
107
|
+
from pptx import Presentation
|
|
108
|
+
from pptx.util import Inches, Pt
|
|
109
|
+
except ImportError:
|
|
110
|
+
return (
|
|
111
|
+
"python-pptx is required for PowerPoint export.\n"
|
|
112
|
+
"Install: pip install python-pptx"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
prs = Presentation()
|
|
116
|
+
blank_layout = prs.slide_layouts[6] # blank
|
|
117
|
+
title_layout = prs.slide_layouts[0] # title slide
|
|
118
|
+
|
|
119
|
+
# Slide 1: Title
|
|
120
|
+
slide = prs.slides.add_slide(title_layout)
|
|
121
|
+
slide.shapes.title.text = "OpenStat Results"
|
|
122
|
+
slide.placeholders[1].text = (
|
|
123
|
+
f"{session.dataset_name or 'Dataset'} | {date.today().isoformat()}"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# Slide 2: Overview
|
|
127
|
+
slide = prs.slides.add_slide(prs.slide_layouts[1])
|
|
128
|
+
slide.shapes.title.text = "Dataset Overview"
|
|
129
|
+
body = slide.placeholders[1]
|
|
130
|
+
tf = body.text_frame
|
|
131
|
+
tf.text = session.shape_str
|
|
132
|
+
if session.df is not None:
|
|
133
|
+
df = session.df
|
|
134
|
+
tf.add_paragraph().text = f"Missing cells: {sum(df[c].null_count() for c in df.columns)}"
|
|
135
|
+
|
|
136
|
+
# One slide per model
|
|
137
|
+
for mr in session.results:
|
|
138
|
+
slide = prs.slides.add_slide(prs.slide_layouts[1])
|
|
139
|
+
slide.shapes.title.text = f"{mr.name}: {mr.formula}"
|
|
140
|
+
tf = slide.placeholders[1].text_frame
|
|
141
|
+
tf.text = mr.table[:1000] # truncate if huge
|
|
142
|
+
|
|
143
|
+
# One slide per plot
|
|
144
|
+
for plot_path in session.plot_paths:
|
|
145
|
+
if os.path.exists(plot_path):
|
|
146
|
+
slide = prs.slides.add_slide(blank_layout)
|
|
147
|
+
try:
|
|
148
|
+
slide.shapes.add_picture(
|
|
149
|
+
plot_path,
|
|
150
|
+
Inches(0.5), Inches(0.5),
|
|
151
|
+
width=Inches(8), height=Inches(5.5),
|
|
152
|
+
)
|
|
153
|
+
except Exception:
|
|
154
|
+
pass
|
|
155
|
+
|
|
156
|
+
_ensure_dir(path)
|
|
157
|
+
prs.save(path)
|
|
158
|
+
return os.path.abspath(path)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# ── Command ────────────────────────────────────────────────────────────────
|
|
162
|
+
|
|
163
|
+
@command("export", usage="export docx|pptx|pdf|md [path]")
|
|
164
|
+
def cmd_export(session: Session, args: str) -> str:
|
|
165
|
+
"""Export results to Word (.docx), PowerPoint (.pptx), PDF, or Markdown."""
|
|
166
|
+
ca = CommandArgs(args)
|
|
167
|
+
if not ca.positional:
|
|
168
|
+
return "Usage: export docx|pptx|pdf|md [path]"
|
|
169
|
+
|
|
170
|
+
fmt = ca.positional[0].lower()
|
|
171
|
+
|
|
172
|
+
if fmt == "docx":
|
|
173
|
+
path = ca.positional[1] if len(ca.positional) > 1 else "outputs/results.docx"
|
|
174
|
+
out = _export_docx(session, path)
|
|
175
|
+
if out.endswith(".docx"):
|
|
176
|
+
return f"Word document saved: {out}"
|
|
177
|
+
return out
|
|
178
|
+
|
|
179
|
+
elif fmt == "pptx":
|
|
180
|
+
path = ca.positional[1] if len(ca.positional) > 1 else "outputs/results.pptx"
|
|
181
|
+
out = _export_pptx(session, path)
|
|
182
|
+
if out.endswith(".pptx"):
|
|
183
|
+
return f"PowerPoint saved: {out}"
|
|
184
|
+
return out
|
|
185
|
+
|
|
186
|
+
elif fmt == "pdf":
|
|
187
|
+
from openstat.commands.pdf_cmds import _export_pdf
|
|
188
|
+
path = ca.positional[1] if len(ca.positional) > 1 else "outputs/results.pdf"
|
|
189
|
+
out = _export_pdf(session, path)
|
|
190
|
+
if out.endswith(".pdf"):
|
|
191
|
+
return f"PDF saved: {out}"
|
|
192
|
+
return out
|
|
193
|
+
|
|
194
|
+
elif fmt == "md":
|
|
195
|
+
from openstat.commands.pdf_cmds import _export_md
|
|
196
|
+
path = ca.positional[1] if len(ca.positional) > 1 else "outputs/results.md"
|
|
197
|
+
out = _export_md(session, path)
|
|
198
|
+
return f"Markdown saved: {out}"
|
|
199
|
+
|
|
200
|
+
else:
|
|
201
|
+
return f"Unknown export format: {fmt}. Use 'docx', 'pptx', 'pdf', or 'md'."
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
"""Extra export commands: Jupyter notebook, APA text, progress bars."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from datetime import date
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from openstat.commands.base import command, CommandArgs, friendly_error
|
|
10
|
+
from openstat.session import Session
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# ── Jupyter Notebook export ──────────────────────────────────────────────────
|
|
14
|
+
|
|
15
|
+
@command("export ipynb", usage="export ipynb [path]")
|
|
16
|
+
def cmd_export_ipynb(session: Session, args: str) -> str:
|
|
17
|
+
"""Export session history as a Jupyter notebook (.ipynb).
|
|
18
|
+
|
|
19
|
+
Creates a notebook where each command becomes a code cell with
|
|
20
|
+
its output. Requires: pip install nbformat
|
|
21
|
+
|
|
22
|
+
Examples:
|
|
23
|
+
export ipynb
|
|
24
|
+
export ipynb my_analysis.ipynb
|
|
25
|
+
"""
|
|
26
|
+
try:
|
|
27
|
+
import nbformat
|
|
28
|
+
from nbformat.v4 import new_notebook, new_code_cell, new_markdown_cell
|
|
29
|
+
except ImportError:
|
|
30
|
+
return "nbformat required. Install: pip install nbformat"
|
|
31
|
+
|
|
32
|
+
ca = CommandArgs(args)
|
|
33
|
+
out_path = ca.positional[0] if ca.positional else "outputs/analysis.ipynb"
|
|
34
|
+
|
|
35
|
+
nb = new_notebook()
|
|
36
|
+
cells = []
|
|
37
|
+
|
|
38
|
+
# Title cell
|
|
39
|
+
cells.append(new_markdown_cell(
|
|
40
|
+
f"# OpenStat Analysis\n\n"
|
|
41
|
+
f"**Dataset:** {session.dataset_name or 'Unknown'} \n"
|
|
42
|
+
f"**Date:** {date.today().isoformat()} \n"
|
|
43
|
+
f"**Shape:** {session.shape_str}"
|
|
44
|
+
))
|
|
45
|
+
|
|
46
|
+
# Setup cell
|
|
47
|
+
cells.append(new_code_cell(
|
|
48
|
+
"# Auto-generated from OpenStat session\n"
|
|
49
|
+
"# Run: pip install openstat\n"
|
|
50
|
+
"from openstat.session import Session\n"
|
|
51
|
+
"from openstat.commands import COMMANDS\n"
|
|
52
|
+
"session = Session()\n"
|
|
53
|
+
"\ndef run(cmd):\n"
|
|
54
|
+
" parts = cmd.split(None, 1)\n"
|
|
55
|
+
" name = parts[0]\n"
|
|
56
|
+
" args = parts[1] if len(parts) > 1 else ''\n"
|
|
57
|
+
" return COMMANDS[name](session, args)\n"
|
|
58
|
+
))
|
|
59
|
+
|
|
60
|
+
# One cell per history command
|
|
61
|
+
for cmd_line in session.history:
|
|
62
|
+
if cmd_line.strip().startswith("#"):
|
|
63
|
+
cells.append(new_markdown_cell(cmd_line.lstrip("# ")))
|
|
64
|
+
else:
|
|
65
|
+
safe_cmd = cmd_line.replace("'", "\\'")
|
|
66
|
+
cells.append(new_code_cell(f"print(run('{safe_cmd}'))"))
|
|
67
|
+
|
|
68
|
+
# Plots cell
|
|
69
|
+
if session.plot_paths:
|
|
70
|
+
plot_code = "from IPython.display import Image, display\n"
|
|
71
|
+
for p in session.plot_paths:
|
|
72
|
+
if os.path.exists(p):
|
|
73
|
+
plot_code += f"display(Image('{p}'))\n"
|
|
74
|
+
cells.append(new_code_cell(plot_code))
|
|
75
|
+
|
|
76
|
+
nb.cells = cells
|
|
77
|
+
nb.metadata["kernelspec"] = {
|
|
78
|
+
"display_name": "Python 3",
|
|
79
|
+
"language": "python",
|
|
80
|
+
"name": "python3",
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
Path(out_path).parent.mkdir(parents=True, exist_ok=True)
|
|
84
|
+
with open(out_path, "w", encoding="utf-8") as f:
|
|
85
|
+
nbformat.write(nb, f)
|
|
86
|
+
|
|
87
|
+
n_cells = len(cells)
|
|
88
|
+
return f"Jupyter notebook saved: {os.path.abspath(out_path)} ({n_cells} cells)"
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# ── APA export ───────────────────────────────────────────────────────────────
|
|
92
|
+
|
|
93
|
+
@command("export apa", usage="export apa [path]")
|
|
94
|
+
def cmd_export_apa(session: Session, args: str) -> str:
|
|
95
|
+
"""Export model results in APA 7th edition format.
|
|
96
|
+
|
|
97
|
+
Generates text suitable for inclusion in research papers.
|
|
98
|
+
Supports OLS, logit, probit, and other regression models.
|
|
99
|
+
|
|
100
|
+
Examples:
|
|
101
|
+
ols income educ age
|
|
102
|
+
export apa
|
|
103
|
+
export apa results/apa_table.txt
|
|
104
|
+
"""
|
|
105
|
+
import polars as pl
|
|
106
|
+
|
|
107
|
+
ca = CommandArgs(args)
|
|
108
|
+
out_path = ca.options.get("out")
|
|
109
|
+
|
|
110
|
+
lines = [
|
|
111
|
+
f"APA-Formatted Results",
|
|
112
|
+
f"Generated: {date.today().isoformat()}",
|
|
113
|
+
f"Dataset: {session.dataset_name or 'Unknown'} ({session.shape_str})",
|
|
114
|
+
"",
|
|
115
|
+
]
|
|
116
|
+
|
|
117
|
+
if not session.results:
|
|
118
|
+
return "No model results to export. Run ols/logit/etc. first."
|
|
119
|
+
|
|
120
|
+
for mr in session.results:
|
|
121
|
+
model_type = mr.name
|
|
122
|
+
formula = mr.formula
|
|
123
|
+
details = mr.details
|
|
124
|
+
|
|
125
|
+
n = details.get("n", "?")
|
|
126
|
+
r2 = details.get("r2")
|
|
127
|
+
adj_r2 = details.get("adj_r2")
|
|
128
|
+
f_stat = details.get("f_stat")
|
|
129
|
+
f_pval = details.get("f_pval")
|
|
130
|
+
aic = details.get("aic")
|
|
131
|
+
bic = details.get("bic")
|
|
132
|
+
ll = details.get("log_likelihood")
|
|
133
|
+
|
|
134
|
+
lines.append(f"{'='*60}")
|
|
135
|
+
lines.append(f"Model: {model_type} — {formula}")
|
|
136
|
+
lines.append("")
|
|
137
|
+
|
|
138
|
+
# APA regression table header
|
|
139
|
+
if model_type.upper() in ("OLS", "LINEAR"):
|
|
140
|
+
if r2 is not None:
|
|
141
|
+
lines.append(
|
|
142
|
+
f"A multiple linear regression was conducted to predict {formula.split('~')[0].strip()} "
|
|
143
|
+
f"from {formula.split('~')[1].strip() if '~' in formula else 'the predictors'}."
|
|
144
|
+
)
|
|
145
|
+
r2_str = f"R² = {r2:.3f}" if r2 is not None else ""
|
|
146
|
+
adj_r2_str = f", adjusted R² = {adj_r2:.3f}" if adj_r2 is not None else ""
|
|
147
|
+
f_str = f", F = {f_stat:.2f}" if f_stat is not None else ""
|
|
148
|
+
p_str = f", p {'< .001' if (f_pval is not None and f_pval < 0.001) else f'= {f_pval:.3f}'}" if f_pval is not None else ""
|
|
149
|
+
lines.append(f"The model was statistically significant: {r2_str}{adj_r2_str}{f_str}{p_str}.")
|
|
150
|
+
lines.append(f"N = {n}.")
|
|
151
|
+
else:
|
|
152
|
+
if ll is not None:
|
|
153
|
+
lines.append(f"N = {n}.")
|
|
154
|
+
if aic is not None:
|
|
155
|
+
lines.append(f"AIC = {aic:.2f}, BIC = {bic:.2f}." if bic is not None else f"AIC = {aic:.2f}.")
|
|
156
|
+
|
|
157
|
+
lines.append("")
|
|
158
|
+
lines.append("Table. Regression Coefficients")
|
|
159
|
+
lines.append(f" {'Variable':<25} {'B':>10} {'SE':>8} {'p':>8}")
|
|
160
|
+
lines.append(" " + "-" * 55)
|
|
161
|
+
|
|
162
|
+
# Parse table for coefficients
|
|
163
|
+
for line in mr.table.split("\n"):
|
|
164
|
+
stripped = line.strip()
|
|
165
|
+
if not stripped or stripped.startswith("=") or stripped.startswith("-"):
|
|
166
|
+
continue
|
|
167
|
+
parts = stripped.split()
|
|
168
|
+
if len(parts) >= 4:
|
|
169
|
+
try:
|
|
170
|
+
coef = float(parts[-4]) if len(parts) >= 4 else float(parts[1])
|
|
171
|
+
se = float(parts[-3]) if len(parts) >= 3 else None
|
|
172
|
+
p = float(parts[-1]) if len(parts) >= 1 else None
|
|
173
|
+
varname = " ".join(parts[:-4]) if len(parts) > 4 else parts[0]
|
|
174
|
+
sig = "***" if p is not None and p < 0.001 else "**" if p is not None and p < 0.01 else "*" if p is not None and p < 0.05 else ""
|
|
175
|
+
lines.append(f" {varname:<25} {coef:10.3f} {se:8.3f} {p:8.3f}{sig}" if se and p else f" {varname:<25} {coef:10.3f}")
|
|
176
|
+
except (ValueError, IndexError):
|
|
177
|
+
continue
|
|
178
|
+
|
|
179
|
+
lines.append(" " + "-" * 55)
|
|
180
|
+
lines.append(" Note. * p < .05. ** p < .01. *** p < .001.")
|
|
181
|
+
lines.append("")
|
|
182
|
+
|
|
183
|
+
text = "\n".join(lines)
|
|
184
|
+
|
|
185
|
+
if out_path:
|
|
186
|
+
Path(out_path).parent.mkdir(parents=True, exist_ok=True)
|
|
187
|
+
Path(out_path).write_text(text, encoding="utf-8")
|
|
188
|
+
return f"APA results saved: {os.path.abspath(out_path)}"
|
|
189
|
+
|
|
190
|
+
if ca.positional:
|
|
191
|
+
p = ca.positional[0]
|
|
192
|
+
Path(p).parent.mkdir(parents=True, exist_ok=True)
|
|
193
|
+
Path(p).write_text(text, encoding="utf-8")
|
|
194
|
+
return f"APA results saved: {os.path.abspath(p)}"
|
|
195
|
+
|
|
196
|
+
return text
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
# ── Progress bar wrapper for long commands ────────────────────────────────────
|
|
200
|
+
|
|
201
|
+
@command("progress", usage="progress <command with args>")
|
|
202
|
+
def cmd_progress(session: Session, args: str) -> str:
|
|
203
|
+
"""Run a command with a live progress indicator.
|
|
204
|
+
|
|
205
|
+
Useful for long-running commands like bootstrap, permtest, hyperopt.
|
|
206
|
+
Uses rich progress bar.
|
|
207
|
+
|
|
208
|
+
Examples:
|
|
209
|
+
progress bootstrap ols income educ age --reps=2000
|
|
210
|
+
progress hyperopt income educ age --model=rf --n_iter=50
|
|
211
|
+
"""
|
|
212
|
+
from openstat.commands.base import run_command
|
|
213
|
+
from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
|
|
214
|
+
import threading
|
|
215
|
+
import time
|
|
216
|
+
|
|
217
|
+
if not args.strip():
|
|
218
|
+
return "Usage: progress <command> [args]"
|
|
219
|
+
|
|
220
|
+
result_holder = {"result": None, "done": False}
|
|
221
|
+
|
|
222
|
+
def _run():
|
|
223
|
+
result_holder["result"] = run_command(session, args.strip())
|
|
224
|
+
result_holder["done"] = True
|
|
225
|
+
|
|
226
|
+
thread = threading.Thread(target=_run, daemon=True)
|
|
227
|
+
|
|
228
|
+
with Progress(
|
|
229
|
+
SpinnerColumn(),
|
|
230
|
+
TextColumn("[bold blue]{task.description}"),
|
|
231
|
+
TimeElapsedColumn(),
|
|
232
|
+
transient=True,
|
|
233
|
+
) as prog:
|
|
234
|
+
task = prog.add_task(f"Running: {args[:50]}...", total=None)
|
|
235
|
+
thread.start()
|
|
236
|
+
while not result_holder["done"]:
|
|
237
|
+
time.sleep(0.1)
|
|
238
|
+
thread.join()
|
|
239
|
+
|
|
240
|
+
return result_holder["result"] or ""
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"""Factor analysis and PCA commands."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
|
|
10
|
+
from openstat.commands.base import command
|
|
11
|
+
from openstat.session import Session
|
|
12
|
+
from openstat.stats.factor import fit_pca, fit_factor
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# ── Stata-style arg parser (same as power_cmds) ────────────────────────────
|
|
16
|
+
|
|
17
|
+
def _stata_parse(raw: str) -> tuple[list[str], dict[str, str], set[str]]:
|
|
18
|
+
opts: dict[str, str] = {}
|
|
19
|
+
positional: list[str] = []
|
|
20
|
+
flags: set[str] = set()
|
|
21
|
+
|
|
22
|
+
for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
|
|
23
|
+
opts[m.group(1).lower()] = m.group(2)
|
|
24
|
+
|
|
25
|
+
rest = re.sub(r'\w+\([^)]*\)', '', raw)
|
|
26
|
+
|
|
27
|
+
for tok in rest.split():
|
|
28
|
+
tok = tok.strip(',')
|
|
29
|
+
if not tok:
|
|
30
|
+
continue
|
|
31
|
+
if '=' in tok:
|
|
32
|
+
k, v = tok.split('=', 1)
|
|
33
|
+
opts[k.lower().lstrip('-')] = v
|
|
34
|
+
elif tok.startswith('--'):
|
|
35
|
+
flags.add(tok.lstrip('-').lower())
|
|
36
|
+
elif re.match(r'^\w+$', tok):
|
|
37
|
+
positional.append(tok)
|
|
38
|
+
|
|
39
|
+
return positional, opts, flags
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _loadings_table(cols: list[str], loadings: list, blanks: float = 0.0) -> str:
|
|
43
|
+
arr = np.array(loadings) # shape (p, k)
|
|
44
|
+
k = arr.shape[1]
|
|
45
|
+
header = f" {'Variable':<15}" + "".join(f" {'F' + str(i+1):>8}" for i in range(k))
|
|
46
|
+
lines = [header, "-" * (17 + k * 10)]
|
|
47
|
+
for i, col in enumerate(cols):
|
|
48
|
+
row = f" {col:<15}"
|
|
49
|
+
for j in range(k):
|
|
50
|
+
val = arr[i, j]
|
|
51
|
+
if abs(val) < blanks:
|
|
52
|
+
row += f" {'':>8}"
|
|
53
|
+
else:
|
|
54
|
+
row += f" {val:>8.4f}"
|
|
55
|
+
lines.append(row)
|
|
56
|
+
return "\n".join(lines)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@command("pca", usage="pca varlist [, n(k)]")
|
|
60
|
+
def cmd_pca(session: Session, args: str) -> str:
|
|
61
|
+
"""Principal component analysis."""
|
|
62
|
+
df = session.require_data()
|
|
63
|
+
positional, opts, flags = _stata_parse(args)
|
|
64
|
+
cols = [c for c in positional if c in df.columns]
|
|
65
|
+
if len(cols) < 2:
|
|
66
|
+
return "pca requires at least 2 numeric variables."
|
|
67
|
+
|
|
68
|
+
n_components = int(opts["n"]) if "n" in opts else None
|
|
69
|
+
|
|
70
|
+
result = fit_pca(df, cols, n_components=n_components)
|
|
71
|
+
session._last_model = result
|
|
72
|
+
session._last_model_vars = (None, cols)
|
|
73
|
+
|
|
74
|
+
eigvals = result["eigenvalues"]
|
|
75
|
+
evr = result["explained_variance_ratio"]
|
|
76
|
+
cum = result["cumulative_variance"]
|
|
77
|
+
loadings = result["loadings"]
|
|
78
|
+
k = result["n_components"]
|
|
79
|
+
|
|
80
|
+
lines = [f"\nPCA — {len(cols)} variables, {k} components", "=" * 55]
|
|
81
|
+
lines.append(f" {'Component':<12} {'Eigenvalue':>12} {'Var%':>8} {'Cum%':>8}")
|
|
82
|
+
lines.append("-" * 55)
|
|
83
|
+
for i in range(k):
|
|
84
|
+
lines.append(
|
|
85
|
+
f" {'Comp' + str(i+1):<12} {eigvals[i]:>12.4f} {evr[i]*100:>7.2f}% {cum[i]*100:>7.2f}%"
|
|
86
|
+
)
|
|
87
|
+
lines.append("=" * 55)
|
|
88
|
+
lines.append("\nLoadings:")
|
|
89
|
+
lines.append(_loadings_table(cols, loadings))
|
|
90
|
+
lines.append("\nRun 'estat screeplot' for a scree plot.")
|
|
91
|
+
return "\n".join(lines)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@command("factor", usage="factor varlist [, n(k) method(pc|ml) --norotate]")
|
|
95
|
+
def cmd_factor(session: Session, args: str) -> str:
|
|
96
|
+
"""Factor analysis with optional varimax rotation."""
|
|
97
|
+
df = session.require_data()
|
|
98
|
+
positional, opts, flags = _stata_parse(args)
|
|
99
|
+
cols = [c for c in positional if c in df.columns]
|
|
100
|
+
if len(cols) < 2:
|
|
101
|
+
return "factor requires at least 2 numeric variables."
|
|
102
|
+
|
|
103
|
+
n_factors = int(opts.get("n", 2))
|
|
104
|
+
method = opts.get("method", "pc").lower()
|
|
105
|
+
rotate = "norotate" not in flags
|
|
106
|
+
|
|
107
|
+
result = fit_factor(df, cols, n_factors=n_factors, method=method, rotate=rotate)
|
|
108
|
+
session._last_model = result
|
|
109
|
+
session._last_model_vars = (None, cols)
|
|
110
|
+
|
|
111
|
+
loadings = result["loadings"]
|
|
112
|
+
comm = result["communalities"]
|
|
113
|
+
uniq = result["uniqueness"]
|
|
114
|
+
k = result["n_factors"]
|
|
115
|
+
rot_str = " (varimax)" if rotate and k > 1 else ""
|
|
116
|
+
|
|
117
|
+
lines = [f"\nFactor Analysis — {method.upper()}{rot_str}", "=" * 60]
|
|
118
|
+
lines.append("\nLoadings:")
|
|
119
|
+
lines.append(_loadings_table(cols, loadings))
|
|
120
|
+
lines.append("\n " + f"{'Variable':<15} {'Communality':>12} {'Uniqueness':>12}")
|
|
121
|
+
lines.append(" " + "-" * 42)
|
|
122
|
+
for i, col in enumerate(cols):
|
|
123
|
+
lines.append(f" {col:<15} {comm[i]:>12.4f} {uniq[i]:>12.4f}")
|
|
124
|
+
lines.append("\nRun 'estat loadings' or 'estat screeplot' for more detail.")
|
|
125
|
+
return "\n".join(lines)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@command("estat", usage="estat screeplot|loadings [, blanks(0.3)]")
|
|
129
|
+
def cmd_estat(session: Session, args: str) -> str:
|
|
130
|
+
"""Post-estimation: screeplot or loadings table."""
|
|
131
|
+
positional, opts, flags = _stata_parse(args)
|
|
132
|
+
sub = positional[0].lower() if positional else ""
|
|
133
|
+
|
|
134
|
+
if sub == "screeplot":
|
|
135
|
+
result = session._last_model
|
|
136
|
+
if result is None or "eigenvalues" not in result:
|
|
137
|
+
return "No PCA/factor result in memory. Run 'pca' or 'factor' first."
|
|
138
|
+
|
|
139
|
+
eigvals = result["eigenvalues"]
|
|
140
|
+
lines = ["\nScree Plot", "=" * 50]
|
|
141
|
+
max_e = max(eigvals) if eigvals else 1
|
|
142
|
+
for i, e in enumerate(eigvals):
|
|
143
|
+
bar = int(30 * e / max_e)
|
|
144
|
+
lines.append(f" Comp{i+1:>2} {'█' * bar} {e:.3f}")
|
|
145
|
+
lines.append("=" * 50)
|
|
146
|
+
|
|
147
|
+
try:
|
|
148
|
+
import matplotlib.pyplot as plt
|
|
149
|
+
|
|
150
|
+
os.makedirs(str(session.output_dir), exist_ok=True)
|
|
151
|
+
fig, ax = plt.subplots(figsize=(6, 4))
|
|
152
|
+
ax.plot(range(1, len(eigvals) + 1), eigvals, "o-")
|
|
153
|
+
ax.axhline(1, linestyle="--", color="red", linewidth=0.8)
|
|
154
|
+
ax.set_xlabel("Component")
|
|
155
|
+
ax.set_ylabel("Eigenvalue")
|
|
156
|
+
ax.set_title("Scree Plot")
|
|
157
|
+
path = str(session.output_dir / "screeplot.png")
|
|
158
|
+
fig.savefig(path, dpi=100, bbox_inches="tight")
|
|
159
|
+
plt.close(fig)
|
|
160
|
+
session.plot_paths.append(path)
|
|
161
|
+
lines.append(f"\nScree plot saved: {path}")
|
|
162
|
+
except Exception:
|
|
163
|
+
pass
|
|
164
|
+
|
|
165
|
+
return "\n".join(lines)
|
|
166
|
+
|
|
167
|
+
elif sub == "loadings":
|
|
168
|
+
result = session._last_model
|
|
169
|
+
if result is None or "loadings" not in result:
|
|
170
|
+
return "No PCA/factor result in memory."
|
|
171
|
+
|
|
172
|
+
blanks = float(opts.get("blanks", 0.0))
|
|
173
|
+
cols = result["cols"]
|
|
174
|
+
loadings = result["loadings"]
|
|
175
|
+
lines = ["\nFactor/Component Loadings"]
|
|
176
|
+
lines.append(_loadings_table(cols, loadings, blanks=blanks))
|
|
177
|
+
return "\n".join(lines)
|
|
178
|
+
|
|
179
|
+
else:
|
|
180
|
+
return f"Unknown estat subcommand: {sub}\nAvailable: screeplot, loadings"
|