openstat-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. openstat/__init__.py +3 -0
  2. openstat/__main__.py +4 -0
  3. openstat/backends/__init__.py +16 -0
  4. openstat/backends/duckdb_backend.py +70 -0
  5. openstat/backends/polars_backend.py +52 -0
  6. openstat/cli.py +92 -0
  7. openstat/commands/__init__.py +82 -0
  8. openstat/commands/adv_stat_cmds.py +1255 -0
  9. openstat/commands/advanced_ml_cmds.py +576 -0
  10. openstat/commands/advreg_cmds.py +207 -0
  11. openstat/commands/alias_cmds.py +135 -0
  12. openstat/commands/arch_cmds.py +82 -0
  13. openstat/commands/arules_cmds.py +111 -0
  14. openstat/commands/automodel_cmds.py +212 -0
  15. openstat/commands/backend_cmds.py +82 -0
  16. openstat/commands/base.py +170 -0
  17. openstat/commands/bayes_cmds.py +71 -0
  18. openstat/commands/causal_cmds.py +269 -0
  19. openstat/commands/cluster_cmds.py +152 -0
  20. openstat/commands/data_cmds.py +996 -0
  21. openstat/commands/datamanip_cmds.py +672 -0
  22. openstat/commands/dataquality_cmds.py +174 -0
  23. openstat/commands/datetime_cmds.py +176 -0
  24. openstat/commands/dimreduce_cmds.py +184 -0
  25. openstat/commands/discrete_cmds.py +149 -0
  26. openstat/commands/dsl_cmds.py +143 -0
  27. openstat/commands/epi_cmds.py +93 -0
  28. openstat/commands/equiv_tobit_cmds.py +94 -0
  29. openstat/commands/esttab_cmds.py +196 -0
  30. openstat/commands/export_beamer_cmds.py +142 -0
  31. openstat/commands/export_cmds.py +201 -0
  32. openstat/commands/export_extra_cmds.py +240 -0
  33. openstat/commands/factor_cmds.py +180 -0
  34. openstat/commands/groupby_cmds.py +155 -0
  35. openstat/commands/help_cmds.py +237 -0
  36. openstat/commands/i18n_cmds.py +43 -0
  37. openstat/commands/import_extra_cmds.py +561 -0
  38. openstat/commands/influence_cmds.py +134 -0
  39. openstat/commands/iv_cmds.py +106 -0
  40. openstat/commands/manova_cmds.py +105 -0
  41. openstat/commands/mediate_cmds.py +233 -0
  42. openstat/commands/meta_cmds.py +284 -0
  43. openstat/commands/mi_cmds.py +228 -0
  44. openstat/commands/mixed_cmds.py +79 -0
  45. openstat/commands/mixture_changepoint_cmds.py +166 -0
  46. openstat/commands/ml_adv_cmds.py +147 -0
  47. openstat/commands/ml_cmds.py +178 -0
  48. openstat/commands/model_eval_cmds.py +142 -0
  49. openstat/commands/network_cmds.py +288 -0
  50. openstat/commands/nlquery_cmds.py +161 -0
  51. openstat/commands/nonparam_cmds.py +149 -0
  52. openstat/commands/outreg_cmds.py +247 -0
  53. openstat/commands/panel_cmds.py +141 -0
  54. openstat/commands/pdf_cmds.py +226 -0
  55. openstat/commands/pipeline_cmds.py +319 -0
  56. openstat/commands/plot_cmds.py +189 -0
  57. openstat/commands/plugin_cmds.py +79 -0
  58. openstat/commands/posthoc_cmds.py +153 -0
  59. openstat/commands/power_cmds.py +172 -0
  60. openstat/commands/profile_cmds.py +246 -0
  61. openstat/commands/rbridge_cmds.py +81 -0
  62. openstat/commands/regex_cmds.py +104 -0
  63. openstat/commands/report_cmds.py +48 -0
  64. openstat/commands/repro_cmds.py +129 -0
  65. openstat/commands/resampling_cmds.py +109 -0
  66. openstat/commands/reshape_cmds.py +223 -0
  67. openstat/commands/sem_cmds.py +177 -0
  68. openstat/commands/stat_cmds.py +1040 -0
  69. openstat/commands/stata_import_cmds.py +215 -0
  70. openstat/commands/string_cmds.py +124 -0
  71. openstat/commands/surv_cmds.py +145 -0
  72. openstat/commands/survey_cmds.py +153 -0
  73. openstat/commands/textanalysis_cmds.py +192 -0
  74. openstat/commands/ts_adv_cmds.py +136 -0
  75. openstat/commands/ts_cmds.py +195 -0
  76. openstat/commands/tui_cmds.py +111 -0
  77. openstat/commands/ux_cmds.py +191 -0
  78. openstat/commands/validate_cmds.py +270 -0
  79. openstat/commands/viz_adv_cmds.py +312 -0
  80. openstat/commands/viz_extra_cmds.py +251 -0
  81. openstat/commands/watch_cmds.py +69 -0
  82. openstat/config.py +106 -0
  83. openstat/dsl/__init__.py +0 -0
  84. openstat/dsl/parser.py +332 -0
  85. openstat/dsl/tokenizer.py +105 -0
  86. openstat/i18n.py +120 -0
  87. openstat/io/__init__.py +0 -0
  88. openstat/io/loader.py +187 -0
  89. openstat/jupyter/__init__.py +18 -0
  90. openstat/jupyter/display.py +18 -0
  91. openstat/jupyter/magic.py +60 -0
  92. openstat/logging_config.py +59 -0
  93. openstat/plots/__init__.py +0 -0
  94. openstat/plots/plotter.py +437 -0
  95. openstat/plots/surv_plots.py +32 -0
  96. openstat/plots/ts_plots.py +59 -0
  97. openstat/plugins/__init__.py +5 -0
  98. openstat/plugins/manager.py +69 -0
  99. openstat/repl.py +457 -0
  100. openstat/reporting/__init__.py +0 -0
  101. openstat/reporting/eda.py +208 -0
  102. openstat/reporting/report.py +67 -0
  103. openstat/script_runner.py +319 -0
  104. openstat/session.py +133 -0
  105. openstat/stats/__init__.py +0 -0
  106. openstat/stats/advanced_regression.py +269 -0
  107. openstat/stats/arch_garch.py +84 -0
  108. openstat/stats/bayesian.py +103 -0
  109. openstat/stats/causal.py +258 -0
  110. openstat/stats/clustering.py +206 -0
  111. openstat/stats/discrete.py +311 -0
  112. openstat/stats/epidemiology.py +119 -0
  113. openstat/stats/equiv_tobit.py +163 -0
  114. openstat/stats/factor.py +174 -0
  115. openstat/stats/imputation.py +282 -0
  116. openstat/stats/influence.py +78 -0
  117. openstat/stats/iv.py +131 -0
  118. openstat/stats/manova.py +124 -0
  119. openstat/stats/mixed.py +128 -0
  120. openstat/stats/ml.py +275 -0
  121. openstat/stats/ml_advanced.py +117 -0
  122. openstat/stats/model_eval.py +183 -0
  123. openstat/stats/models.py +1342 -0
  124. openstat/stats/nonparametric.py +130 -0
  125. openstat/stats/panel.py +179 -0
  126. openstat/stats/power.py +295 -0
  127. openstat/stats/resampling.py +203 -0
  128. openstat/stats/survey.py +213 -0
  129. openstat/stats/survival.py +196 -0
  130. openstat/stats/timeseries.py +142 -0
  131. openstat/stats/ts_advanced.py +114 -0
  132. openstat/types.py +11 -0
  133. openstat/web/__init__.py +1 -0
  134. openstat/web/app.py +117 -0
  135. openstat/web/session_manager.py +73 -0
  136. openstat/web/static/app.js +117 -0
  137. openstat/web/static/index.html +38 -0
  138. openstat/web/static/style.css +103 -0
  139. openstat_cli-1.0.0.dist-info/METADATA +748 -0
  140. openstat_cli-1.0.0.dist-info/RECORD +143 -0
  141. openstat_cli-1.0.0.dist-info/WHEEL +4 -0
  142. openstat_cli-1.0.0.dist-info/entry_points.txt +2 -0
  143. openstat_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,201 @@
1
+ """Export commands: export docx, export pptx."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from datetime import date
7
+ from pathlib import Path
8
+
9
+ from openstat.commands.base import command, CommandArgs
10
+ from openstat.session import Session
11
+
12
+
13
+ def _ensure_dir(path: str) -> None:
14
+ os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
15
+
16
+
17
+ # ── Word (.docx) ───────────────────────────────────────────────────────────
18
+
19
+ def _export_docx(session: Session, path: str) -> str:
20
+ try:
21
+ from docx import Document
22
+ from docx.shared import Pt, RGBColor
23
+ except ImportError:
24
+ return (
25
+ "python-docx is required for Word export.\n"
26
+ "Install: pip install python-docx"
27
+ )
28
+
29
+ doc = Document()
30
+ doc.add_heading("OpenStat Results", 0)
31
+ doc.add_paragraph(
32
+ f"Dataset: {session.dataset_name or 'Unknown'} | "
33
+ f"Date: {date.today().isoformat()} | "
34
+ f"Shape: {session.shape_str}"
35
+ )
36
+ doc.add_paragraph()
37
+
38
+ # Dataset overview table
39
+ doc.add_heading("Dataset Overview", level=1)
40
+ if session.df is not None:
41
+ df = session.df
42
+ tbl = doc.add_table(rows=1, cols=2)
43
+ tbl.style = "Table Grid"
44
+ hdr = tbl.rows[0].cells
45
+ hdr[0].text = "Property"
46
+ hdr[1].text = "Value"
47
+ for label, val in [
48
+ ("Rows", str(df.height)),
49
+ ("Columns", str(df.width)),
50
+ ("Missing cells", str(sum(df[c].null_count() for c in df.columns))),
51
+ ("Numeric columns", str(sum(1 for c in df.columns if df[c].dtype in (
52
+ __import__("polars").Float32, __import__("polars").Float64,
53
+ __import__("polars").Int32, __import__("polars").Int64,
54
+ )))),
55
+ ]:
56
+ row = tbl.add_row().cells
57
+ row[0].text = label
58
+ row[1].text = val
59
+ doc.add_paragraph()
60
+
61
+ # Summary statistics table
62
+ import polars as pl
63
+ NUMERIC = (pl.Float32, pl.Float64, pl.Int8, pl.Int16, pl.Int32, pl.Int64,
64
+ pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64)
65
+ num_cols = [c for c in df.columns if df[c].dtype in NUMERIC]
66
+ if num_cols:
67
+ doc.add_heading("Summary Statistics", level=2)
68
+ stats_tbl = doc.add_table(rows=1, cols=5)
69
+ stats_tbl.style = "Table Grid"
70
+ for i, hdr_text in enumerate(["Variable", "N", "Mean", "SD", "Min–Max"]):
71
+ stats_tbl.rows[0].cells[i].text = hdr_text
72
+ for c in num_cols[:20]: # cap at 20 rows
73
+ col = df[c].drop_nulls()
74
+ if col.len() == 0:
75
+ continue
76
+ cells = stats_tbl.add_row().cells
77
+ cells[0].text = c
78
+ cells[1].text = str(col.len())
79
+ cells[2].text = f"{col.mean():.4f}"
80
+ cells[3].text = f"{col.std():.4f}" if col.len() > 1 else "—"
81
+ cells[4].text = f"{col.min():.2f} – {col.max():.2f}"
82
+
83
+ # Model results
84
+ for mr in session.results:
85
+ doc.add_heading(f"{mr.name}: {mr.formula}", level=1)
86
+ doc.add_paragraph(mr.table, style="No Spacing")
87
+ doc.add_paragraph()
88
+
89
+ # Plots
90
+ for plot_path in session.plot_paths:
91
+ if os.path.exists(plot_path):
92
+ doc.add_heading("Figure", level=2)
93
+ try:
94
+ doc.add_picture(plot_path)
95
+ except Exception:
96
+ doc.add_paragraph(f"[Plot: {plot_path}]")
97
+
98
+ _ensure_dir(path)
99
+ doc.save(path)
100
+ return os.path.abspath(path)
101
+
102
+
103
+ # ── PowerPoint (.pptx) ────────────────────────────────────────────────────
104
+
105
+ def _export_pptx(session: Session, path: str) -> str:
106
+ try:
107
+ from pptx import Presentation
108
+ from pptx.util import Inches, Pt
109
+ except ImportError:
110
+ return (
111
+ "python-pptx is required for PowerPoint export.\n"
112
+ "Install: pip install python-pptx"
113
+ )
114
+
115
+ prs = Presentation()
116
+ blank_layout = prs.slide_layouts[6] # blank
117
+ title_layout = prs.slide_layouts[0] # title slide
118
+
119
+ # Slide 1: Title
120
+ slide = prs.slides.add_slide(title_layout)
121
+ slide.shapes.title.text = "OpenStat Results"
122
+ slide.placeholders[1].text = (
123
+ f"{session.dataset_name or 'Dataset'} | {date.today().isoformat()}"
124
+ )
125
+
126
+ # Slide 2: Overview
127
+ slide = prs.slides.add_slide(prs.slide_layouts[1])
128
+ slide.shapes.title.text = "Dataset Overview"
129
+ body = slide.placeholders[1]
130
+ tf = body.text_frame
131
+ tf.text = session.shape_str
132
+ if session.df is not None:
133
+ df = session.df
134
+ tf.add_paragraph().text = f"Missing cells: {sum(df[c].null_count() for c in df.columns)}"
135
+
136
+ # One slide per model
137
+ for mr in session.results:
138
+ slide = prs.slides.add_slide(prs.slide_layouts[1])
139
+ slide.shapes.title.text = f"{mr.name}: {mr.formula}"
140
+ tf = slide.placeholders[1].text_frame
141
+ tf.text = mr.table[:1000] # truncate if huge
142
+
143
+ # One slide per plot
144
+ for plot_path in session.plot_paths:
145
+ if os.path.exists(plot_path):
146
+ slide = prs.slides.add_slide(blank_layout)
147
+ try:
148
+ slide.shapes.add_picture(
149
+ plot_path,
150
+ Inches(0.5), Inches(0.5),
151
+ width=Inches(8), height=Inches(5.5),
152
+ )
153
+ except Exception:
154
+ pass
155
+
156
+ _ensure_dir(path)
157
+ prs.save(path)
158
+ return os.path.abspath(path)
159
+
160
+
161
+ # ── Command ────────────────────────────────────────────────────────────────
162
+
163
+ @command("export", usage="export docx|pptx|pdf|md [path]")
164
+ def cmd_export(session: Session, args: str) -> str:
165
+ """Export results to Word (.docx), PowerPoint (.pptx), PDF, or Markdown."""
166
+ ca = CommandArgs(args)
167
+ if not ca.positional:
168
+ return "Usage: export docx|pptx|pdf|md [path]"
169
+
170
+ fmt = ca.positional[0].lower()
171
+
172
+ if fmt == "docx":
173
+ path = ca.positional[1] if len(ca.positional) > 1 else "outputs/results.docx"
174
+ out = _export_docx(session, path)
175
+ if out.endswith(".docx"):
176
+ return f"Word document saved: {out}"
177
+ return out
178
+
179
+ elif fmt == "pptx":
180
+ path = ca.positional[1] if len(ca.positional) > 1 else "outputs/results.pptx"
181
+ out = _export_pptx(session, path)
182
+ if out.endswith(".pptx"):
183
+ return f"PowerPoint saved: {out}"
184
+ return out
185
+
186
+ elif fmt == "pdf":
187
+ from openstat.commands.pdf_cmds import _export_pdf
188
+ path = ca.positional[1] if len(ca.positional) > 1 else "outputs/results.pdf"
189
+ out = _export_pdf(session, path)
190
+ if out.endswith(".pdf"):
191
+ return f"PDF saved: {out}"
192
+ return out
193
+
194
+ elif fmt == "md":
195
+ from openstat.commands.pdf_cmds import _export_md
196
+ path = ca.positional[1] if len(ca.positional) > 1 else "outputs/results.md"
197
+ out = _export_md(session, path)
198
+ return f"Markdown saved: {out}"
199
+
200
+ else:
201
+ return f"Unknown export format: {fmt}. Use 'docx', 'pptx', 'pdf', or 'md'."
@@ -0,0 +1,240 @@
1
+ """Extra export commands: Jupyter notebook, APA text, progress bars."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from datetime import date
7
+ from pathlib import Path
8
+
9
+ from openstat.commands.base import command, CommandArgs, friendly_error
10
+ from openstat.session import Session
11
+
12
+
13
+ # ── Jupyter Notebook export ──────────────────────────────────────────────────
14
+
15
+ @command("export ipynb", usage="export ipynb [path]")
16
+ def cmd_export_ipynb(session: Session, args: str) -> str:
17
+ """Export session history as a Jupyter notebook (.ipynb).
18
+
19
+ Creates a notebook where each command becomes a code cell with
20
+ its output. Requires: pip install nbformat
21
+
22
+ Examples:
23
+ export ipynb
24
+ export ipynb my_analysis.ipynb
25
+ """
26
+ try:
27
+ import nbformat
28
+ from nbformat.v4 import new_notebook, new_code_cell, new_markdown_cell
29
+ except ImportError:
30
+ return "nbformat required. Install: pip install nbformat"
31
+
32
+ ca = CommandArgs(args)
33
+ out_path = ca.positional[0] if ca.positional else "outputs/analysis.ipynb"
34
+
35
+ nb = new_notebook()
36
+ cells = []
37
+
38
+ # Title cell
39
+ cells.append(new_markdown_cell(
40
+ f"# OpenStat Analysis\n\n"
41
+ f"**Dataset:** {session.dataset_name or 'Unknown'} \n"
42
+ f"**Date:** {date.today().isoformat()} \n"
43
+ f"**Shape:** {session.shape_str}"
44
+ ))
45
+
46
+ # Setup cell
47
+ cells.append(new_code_cell(
48
+ "# Auto-generated from OpenStat session\n"
49
+ "# Run: pip install openstat\n"
50
+ "from openstat.session import Session\n"
51
+ "from openstat.commands import COMMANDS\n"
52
+ "session = Session()\n"
53
+ "\ndef run(cmd):\n"
54
+ " parts = cmd.split(None, 1)\n"
55
+ " name = parts[0]\n"
56
+ " args = parts[1] if len(parts) > 1 else ''\n"
57
+ " return COMMANDS[name](session, args)\n"
58
+ ))
59
+
60
+ # One cell per history command
61
+ for cmd_line in session.history:
62
+ if cmd_line.strip().startswith("#"):
63
+ cells.append(new_markdown_cell(cmd_line.lstrip("# ")))
64
+ else:
65
+ safe_cmd = cmd_line.replace("'", "\\'")
66
+ cells.append(new_code_cell(f"print(run('{safe_cmd}'))"))
67
+
68
+ # Plots cell
69
+ if session.plot_paths:
70
+ plot_code = "from IPython.display import Image, display\n"
71
+ for p in session.plot_paths:
72
+ if os.path.exists(p):
73
+ plot_code += f"display(Image('{p}'))\n"
74
+ cells.append(new_code_cell(plot_code))
75
+
76
+ nb.cells = cells
77
+ nb.metadata["kernelspec"] = {
78
+ "display_name": "Python 3",
79
+ "language": "python",
80
+ "name": "python3",
81
+ }
82
+
83
+ Path(out_path).parent.mkdir(parents=True, exist_ok=True)
84
+ with open(out_path, "w", encoding="utf-8") as f:
85
+ nbformat.write(nb, f)
86
+
87
+ n_cells = len(cells)
88
+ return f"Jupyter notebook saved: {os.path.abspath(out_path)} ({n_cells} cells)"
89
+
90
+
91
+ # ── APA export ───────────────────────────────────────────────────────────────
92
+
93
+ @command("export apa", usage="export apa [path]")
94
+ def cmd_export_apa(session: Session, args: str) -> str:
95
+ """Export model results in APA 7th edition format.
96
+
97
+ Generates text suitable for inclusion in research papers.
98
+ Supports OLS, logit, probit, and other regression models.
99
+
100
+ Examples:
101
+ ols income educ age
102
+ export apa
103
+ export apa results/apa_table.txt
104
+ """
105
+ import polars as pl
106
+
107
+ ca = CommandArgs(args)
108
+ out_path = ca.options.get("out")
109
+
110
+ lines = [
111
+ f"APA-Formatted Results",
112
+ f"Generated: {date.today().isoformat()}",
113
+ f"Dataset: {session.dataset_name or 'Unknown'} ({session.shape_str})",
114
+ "",
115
+ ]
116
+
117
+ if not session.results:
118
+ return "No model results to export. Run ols/logit/etc. first."
119
+
120
+ for mr in session.results:
121
+ model_type = mr.name
122
+ formula = mr.formula
123
+ details = mr.details
124
+
125
+ n = details.get("n", "?")
126
+ r2 = details.get("r2")
127
+ adj_r2 = details.get("adj_r2")
128
+ f_stat = details.get("f_stat")
129
+ f_pval = details.get("f_pval")
130
+ aic = details.get("aic")
131
+ bic = details.get("bic")
132
+ ll = details.get("log_likelihood")
133
+
134
+ lines.append(f"{'='*60}")
135
+ lines.append(f"Model: {model_type} — {formula}")
136
+ lines.append("")
137
+
138
+ # APA regression table header
139
+ if model_type.upper() in ("OLS", "LINEAR"):
140
+ if r2 is not None:
141
+ lines.append(
142
+ f"A multiple linear regression was conducted to predict {formula.split('~')[0].strip()} "
143
+ f"from {formula.split('~')[1].strip() if '~' in formula else 'the predictors'}."
144
+ )
145
+ r2_str = f"R² = {r2:.3f}" if r2 is not None else ""
146
+ adj_r2_str = f", adjusted R² = {adj_r2:.3f}" if adj_r2 is not None else ""
147
+ f_str = f", F = {f_stat:.2f}" if f_stat is not None else ""
148
+ p_str = f", p {'< .001' if (f_pval is not None and f_pval < 0.001) else f'= {f_pval:.3f}'}" if f_pval is not None else ""
149
+ lines.append(f"The model was statistically significant: {r2_str}{adj_r2_str}{f_str}{p_str}.")
150
+ lines.append(f"N = {n}.")
151
+ else:
152
+ if ll is not None:
153
+ lines.append(f"N = {n}.")
154
+ if aic is not None:
155
+ lines.append(f"AIC = {aic:.2f}, BIC = {bic:.2f}." if bic is not None else f"AIC = {aic:.2f}.")
156
+
157
+ lines.append("")
158
+ lines.append("Table. Regression Coefficients")
159
+ lines.append(f" {'Variable':<25} {'B':>10} {'SE':>8} {'p':>8}")
160
+ lines.append(" " + "-" * 55)
161
+
162
+ # Parse table for coefficients
163
+ for line in mr.table.split("\n"):
164
+ stripped = line.strip()
165
+ if not stripped or stripped.startswith("=") or stripped.startswith("-"):
166
+ continue
167
+ parts = stripped.split()
168
+ if len(parts) >= 4:
169
+ try:
170
+ coef = float(parts[-4]) if len(parts) >= 4 else float(parts[1])
171
+ se = float(parts[-3]) if len(parts) >= 3 else None
172
+ p = float(parts[-1]) if len(parts) >= 1 else None
173
+ varname = " ".join(parts[:-4]) if len(parts) > 4 else parts[0]
174
+ sig = "***" if p is not None and p < 0.001 else "**" if p is not None and p < 0.01 else "*" if p is not None and p < 0.05 else ""
175
+ lines.append(f" {varname:<25} {coef:10.3f} {se:8.3f} {p:8.3f}{sig}" if se and p else f" {varname:<25} {coef:10.3f}")
176
+ except (ValueError, IndexError):
177
+ continue
178
+
179
+ lines.append(" " + "-" * 55)
180
+ lines.append(" Note. * p < .05. ** p < .01. *** p < .001.")
181
+ lines.append("")
182
+
183
+ text = "\n".join(lines)
184
+
185
+ if out_path:
186
+ Path(out_path).parent.mkdir(parents=True, exist_ok=True)
187
+ Path(out_path).write_text(text, encoding="utf-8")
188
+ return f"APA results saved: {os.path.abspath(out_path)}"
189
+
190
+ if ca.positional:
191
+ p = ca.positional[0]
192
+ Path(p).parent.mkdir(parents=True, exist_ok=True)
193
+ Path(p).write_text(text, encoding="utf-8")
194
+ return f"APA results saved: {os.path.abspath(p)}"
195
+
196
+ return text
197
+
198
+
199
+ # ── Progress bar wrapper for long commands ────────────────────────────────────
200
+
201
+ @command("progress", usage="progress <command with args>")
202
+ def cmd_progress(session: Session, args: str) -> str:
203
+ """Run a command with a live progress indicator.
204
+
205
+ Useful for long-running commands like bootstrap, permtest, hyperopt.
206
+ Uses rich progress bar.
207
+
208
+ Examples:
209
+ progress bootstrap ols income educ age --reps=2000
210
+ progress hyperopt income educ age --model=rf --n_iter=50
211
+ """
212
+ from openstat.commands.base import run_command
213
+ from rich.progress import Progress, SpinnerColumn, TextColumn, TimeElapsedColumn
214
+ import threading
215
+ import time
216
+
217
+ if not args.strip():
218
+ return "Usage: progress <command> [args]"
219
+
220
+ result_holder = {"result": None, "done": False}
221
+
222
+ def _run():
223
+ result_holder["result"] = run_command(session, args.strip())
224
+ result_holder["done"] = True
225
+
226
+ thread = threading.Thread(target=_run, daemon=True)
227
+
228
+ with Progress(
229
+ SpinnerColumn(),
230
+ TextColumn("[bold blue]{task.description}"),
231
+ TimeElapsedColumn(),
232
+ transient=True,
233
+ ) as prog:
234
+ task = prog.add_task(f"Running: {args[:50]}...", total=None)
235
+ thread.start()
236
+ while not result_holder["done"]:
237
+ time.sleep(0.1)
238
+ thread.join()
239
+
240
+ return result_holder["result"] or ""
@@ -0,0 +1,180 @@
1
+ """Factor analysis and PCA commands."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import re
7
+
8
+ import numpy as np
9
+
10
+ from openstat.commands.base import command
11
+ from openstat.session import Session
12
+ from openstat.stats.factor import fit_pca, fit_factor
13
+
14
+
15
+ # ── Stata-style arg parser (same as power_cmds) ────────────────────────────
16
+
17
+ def _stata_parse(raw: str) -> tuple[list[str], dict[str, str], set[str]]:
18
+ opts: dict[str, str] = {}
19
+ positional: list[str] = []
20
+ flags: set[str] = set()
21
+
22
+ for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
23
+ opts[m.group(1).lower()] = m.group(2)
24
+
25
+ rest = re.sub(r'\w+\([^)]*\)', '', raw)
26
+
27
+ for tok in rest.split():
28
+ tok = tok.strip(',')
29
+ if not tok:
30
+ continue
31
+ if '=' in tok:
32
+ k, v = tok.split('=', 1)
33
+ opts[k.lower().lstrip('-')] = v
34
+ elif tok.startswith('--'):
35
+ flags.add(tok.lstrip('-').lower())
36
+ elif re.match(r'^\w+$', tok):
37
+ positional.append(tok)
38
+
39
+ return positional, opts, flags
40
+
41
+
42
+ def _loadings_table(cols: list[str], loadings: list, blanks: float = 0.0) -> str:
43
+ arr = np.array(loadings) # shape (p, k)
44
+ k = arr.shape[1]
45
+ header = f" {'Variable':<15}" + "".join(f" {'F' + str(i+1):>8}" for i in range(k))
46
+ lines = [header, "-" * (17 + k * 10)]
47
+ for i, col in enumerate(cols):
48
+ row = f" {col:<15}"
49
+ for j in range(k):
50
+ val = arr[i, j]
51
+ if abs(val) < blanks:
52
+ row += f" {'':>8}"
53
+ else:
54
+ row += f" {val:>8.4f}"
55
+ lines.append(row)
56
+ return "\n".join(lines)
57
+
58
+
59
+ @command("pca", usage="pca varlist [, n(k)]")
60
+ def cmd_pca(session: Session, args: str) -> str:
61
+ """Principal component analysis."""
62
+ df = session.require_data()
63
+ positional, opts, flags = _stata_parse(args)
64
+ cols = [c for c in positional if c in df.columns]
65
+ if len(cols) < 2:
66
+ return "pca requires at least 2 numeric variables."
67
+
68
+ n_components = int(opts["n"]) if "n" in opts else None
69
+
70
+ result = fit_pca(df, cols, n_components=n_components)
71
+ session._last_model = result
72
+ session._last_model_vars = (None, cols)
73
+
74
+ eigvals = result["eigenvalues"]
75
+ evr = result["explained_variance_ratio"]
76
+ cum = result["cumulative_variance"]
77
+ loadings = result["loadings"]
78
+ k = result["n_components"]
79
+
80
+ lines = [f"\nPCA — {len(cols)} variables, {k} components", "=" * 55]
81
+ lines.append(f" {'Component':<12} {'Eigenvalue':>12} {'Var%':>8} {'Cum%':>8}")
82
+ lines.append("-" * 55)
83
+ for i in range(k):
84
+ lines.append(
85
+ f" {'Comp' + str(i+1):<12} {eigvals[i]:>12.4f} {evr[i]*100:>7.2f}% {cum[i]*100:>7.2f}%"
86
+ )
87
+ lines.append("=" * 55)
88
+ lines.append("\nLoadings:")
89
+ lines.append(_loadings_table(cols, loadings))
90
+ lines.append("\nRun 'estat screeplot' for a scree plot.")
91
+ return "\n".join(lines)
92
+
93
+
94
+ @command("factor", usage="factor varlist [, n(k) method(pc|ml) --norotate]")
95
+ def cmd_factor(session: Session, args: str) -> str:
96
+ """Factor analysis with optional varimax rotation."""
97
+ df = session.require_data()
98
+ positional, opts, flags = _stata_parse(args)
99
+ cols = [c for c in positional if c in df.columns]
100
+ if len(cols) < 2:
101
+ return "factor requires at least 2 numeric variables."
102
+
103
+ n_factors = int(opts.get("n", 2))
104
+ method = opts.get("method", "pc").lower()
105
+ rotate = "norotate" not in flags
106
+
107
+ result = fit_factor(df, cols, n_factors=n_factors, method=method, rotate=rotate)
108
+ session._last_model = result
109
+ session._last_model_vars = (None, cols)
110
+
111
+ loadings = result["loadings"]
112
+ comm = result["communalities"]
113
+ uniq = result["uniqueness"]
114
+ k = result["n_factors"]
115
+ rot_str = " (varimax)" if rotate and k > 1 else ""
116
+
117
+ lines = [f"\nFactor Analysis — {method.upper()}{rot_str}", "=" * 60]
118
+ lines.append("\nLoadings:")
119
+ lines.append(_loadings_table(cols, loadings))
120
+ lines.append("\n " + f"{'Variable':<15} {'Communality':>12} {'Uniqueness':>12}")
121
+ lines.append(" " + "-" * 42)
122
+ for i, col in enumerate(cols):
123
+ lines.append(f" {col:<15} {comm[i]:>12.4f} {uniq[i]:>12.4f}")
124
+ lines.append("\nRun 'estat loadings' or 'estat screeplot' for more detail.")
125
+ return "\n".join(lines)
126
+
127
+
128
+ @command("estat", usage="estat screeplot|loadings [, blanks(0.3)]")
129
+ def cmd_estat(session: Session, args: str) -> str:
130
+ """Post-estimation: screeplot or loadings table."""
131
+ positional, opts, flags = _stata_parse(args)
132
+ sub = positional[0].lower() if positional else ""
133
+
134
+ if sub == "screeplot":
135
+ result = session._last_model
136
+ if result is None or "eigenvalues" not in result:
137
+ return "No PCA/factor result in memory. Run 'pca' or 'factor' first."
138
+
139
+ eigvals = result["eigenvalues"]
140
+ lines = ["\nScree Plot", "=" * 50]
141
+ max_e = max(eigvals) if eigvals else 1
142
+ for i, e in enumerate(eigvals):
143
+ bar = int(30 * e / max_e)
144
+ lines.append(f" Comp{i+1:>2} {'█' * bar} {e:.3f}")
145
+ lines.append("=" * 50)
146
+
147
+ try:
148
+ import matplotlib.pyplot as plt
149
+
150
+ os.makedirs(str(session.output_dir), exist_ok=True)
151
+ fig, ax = plt.subplots(figsize=(6, 4))
152
+ ax.plot(range(1, len(eigvals) + 1), eigvals, "o-")
153
+ ax.axhline(1, linestyle="--", color="red", linewidth=0.8)
154
+ ax.set_xlabel("Component")
155
+ ax.set_ylabel("Eigenvalue")
156
+ ax.set_title("Scree Plot")
157
+ path = str(session.output_dir / "screeplot.png")
158
+ fig.savefig(path, dpi=100, bbox_inches="tight")
159
+ plt.close(fig)
160
+ session.plot_paths.append(path)
161
+ lines.append(f"\nScree plot saved: {path}")
162
+ except Exception:
163
+ pass
164
+
165
+ return "\n".join(lines)
166
+
167
+ elif sub == "loadings":
168
+ result = session._last_model
169
+ if result is None or "loadings" not in result:
170
+ return "No PCA/factor result in memory."
171
+
172
+ blanks = float(opts.get("blanks", 0.0))
173
+ cols = result["cols"]
174
+ loadings = result["loadings"]
175
+ lines = ["\nFactor/Component Loadings"]
176
+ lines.append(_loadings_table(cols, loadings, blanks=blanks))
177
+ return "\n".join(lines)
178
+
179
+ else:
180
+ return f"Unknown estat subcommand: {sub}\nAvailable: screeplot, loadings"