openstat-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstat/__init__.py +3 -0
- openstat/__main__.py +4 -0
- openstat/backends/__init__.py +16 -0
- openstat/backends/duckdb_backend.py +70 -0
- openstat/backends/polars_backend.py +52 -0
- openstat/cli.py +92 -0
- openstat/commands/__init__.py +82 -0
- openstat/commands/adv_stat_cmds.py +1255 -0
- openstat/commands/advanced_ml_cmds.py +576 -0
- openstat/commands/advreg_cmds.py +207 -0
- openstat/commands/alias_cmds.py +135 -0
- openstat/commands/arch_cmds.py +82 -0
- openstat/commands/arules_cmds.py +111 -0
- openstat/commands/automodel_cmds.py +212 -0
- openstat/commands/backend_cmds.py +82 -0
- openstat/commands/base.py +170 -0
- openstat/commands/bayes_cmds.py +71 -0
- openstat/commands/causal_cmds.py +269 -0
- openstat/commands/cluster_cmds.py +152 -0
- openstat/commands/data_cmds.py +996 -0
- openstat/commands/datamanip_cmds.py +672 -0
- openstat/commands/dataquality_cmds.py +174 -0
- openstat/commands/datetime_cmds.py +176 -0
- openstat/commands/dimreduce_cmds.py +184 -0
- openstat/commands/discrete_cmds.py +149 -0
- openstat/commands/dsl_cmds.py +143 -0
- openstat/commands/epi_cmds.py +93 -0
- openstat/commands/equiv_tobit_cmds.py +94 -0
- openstat/commands/esttab_cmds.py +196 -0
- openstat/commands/export_beamer_cmds.py +142 -0
- openstat/commands/export_cmds.py +201 -0
- openstat/commands/export_extra_cmds.py +240 -0
- openstat/commands/factor_cmds.py +180 -0
- openstat/commands/groupby_cmds.py +155 -0
- openstat/commands/help_cmds.py +237 -0
- openstat/commands/i18n_cmds.py +43 -0
- openstat/commands/import_extra_cmds.py +561 -0
- openstat/commands/influence_cmds.py +134 -0
- openstat/commands/iv_cmds.py +106 -0
- openstat/commands/manova_cmds.py +105 -0
- openstat/commands/mediate_cmds.py +233 -0
- openstat/commands/meta_cmds.py +284 -0
- openstat/commands/mi_cmds.py +228 -0
- openstat/commands/mixed_cmds.py +79 -0
- openstat/commands/mixture_changepoint_cmds.py +166 -0
- openstat/commands/ml_adv_cmds.py +147 -0
- openstat/commands/ml_cmds.py +178 -0
- openstat/commands/model_eval_cmds.py +142 -0
- openstat/commands/network_cmds.py +288 -0
- openstat/commands/nlquery_cmds.py +161 -0
- openstat/commands/nonparam_cmds.py +149 -0
- openstat/commands/outreg_cmds.py +247 -0
- openstat/commands/panel_cmds.py +141 -0
- openstat/commands/pdf_cmds.py +226 -0
- openstat/commands/pipeline_cmds.py +319 -0
- openstat/commands/plot_cmds.py +189 -0
- openstat/commands/plugin_cmds.py +79 -0
- openstat/commands/posthoc_cmds.py +153 -0
- openstat/commands/power_cmds.py +172 -0
- openstat/commands/profile_cmds.py +246 -0
- openstat/commands/rbridge_cmds.py +81 -0
- openstat/commands/regex_cmds.py +104 -0
- openstat/commands/report_cmds.py +48 -0
- openstat/commands/repro_cmds.py +129 -0
- openstat/commands/resampling_cmds.py +109 -0
- openstat/commands/reshape_cmds.py +223 -0
- openstat/commands/sem_cmds.py +177 -0
- openstat/commands/stat_cmds.py +1040 -0
- openstat/commands/stata_import_cmds.py +215 -0
- openstat/commands/string_cmds.py +124 -0
- openstat/commands/surv_cmds.py +145 -0
- openstat/commands/survey_cmds.py +153 -0
- openstat/commands/textanalysis_cmds.py +192 -0
- openstat/commands/ts_adv_cmds.py +136 -0
- openstat/commands/ts_cmds.py +195 -0
- openstat/commands/tui_cmds.py +111 -0
- openstat/commands/ux_cmds.py +191 -0
- openstat/commands/validate_cmds.py +270 -0
- openstat/commands/viz_adv_cmds.py +312 -0
- openstat/commands/viz_extra_cmds.py +251 -0
- openstat/commands/watch_cmds.py +69 -0
- openstat/config.py +106 -0
- openstat/dsl/__init__.py +0 -0
- openstat/dsl/parser.py +332 -0
- openstat/dsl/tokenizer.py +105 -0
- openstat/i18n.py +120 -0
- openstat/io/__init__.py +0 -0
- openstat/io/loader.py +187 -0
- openstat/jupyter/__init__.py +18 -0
- openstat/jupyter/display.py +18 -0
- openstat/jupyter/magic.py +60 -0
- openstat/logging_config.py +59 -0
- openstat/plots/__init__.py +0 -0
- openstat/plots/plotter.py +437 -0
- openstat/plots/surv_plots.py +32 -0
- openstat/plots/ts_plots.py +59 -0
- openstat/plugins/__init__.py +5 -0
- openstat/plugins/manager.py +69 -0
- openstat/repl.py +457 -0
- openstat/reporting/__init__.py +0 -0
- openstat/reporting/eda.py +208 -0
- openstat/reporting/report.py +67 -0
- openstat/script_runner.py +319 -0
- openstat/session.py +133 -0
- openstat/stats/__init__.py +0 -0
- openstat/stats/advanced_regression.py +269 -0
- openstat/stats/arch_garch.py +84 -0
- openstat/stats/bayesian.py +103 -0
- openstat/stats/causal.py +258 -0
- openstat/stats/clustering.py +206 -0
- openstat/stats/discrete.py +311 -0
- openstat/stats/epidemiology.py +119 -0
- openstat/stats/equiv_tobit.py +163 -0
- openstat/stats/factor.py +174 -0
- openstat/stats/imputation.py +282 -0
- openstat/stats/influence.py +78 -0
- openstat/stats/iv.py +131 -0
- openstat/stats/manova.py +124 -0
- openstat/stats/mixed.py +128 -0
- openstat/stats/ml.py +275 -0
- openstat/stats/ml_advanced.py +117 -0
- openstat/stats/model_eval.py +183 -0
- openstat/stats/models.py +1342 -0
- openstat/stats/nonparametric.py +130 -0
- openstat/stats/panel.py +179 -0
- openstat/stats/power.py +295 -0
- openstat/stats/resampling.py +203 -0
- openstat/stats/survey.py +213 -0
- openstat/stats/survival.py +196 -0
- openstat/stats/timeseries.py +142 -0
- openstat/stats/ts_advanced.py +114 -0
- openstat/types.py +11 -0
- openstat/web/__init__.py +1 -0
- openstat/web/app.py +117 -0
- openstat/web/session_manager.py +73 -0
- openstat/web/static/app.js +117 -0
- openstat/web/static/index.html +38 -0
- openstat/web/static/style.css +103 -0
- openstat_cli-1.0.0.dist-info/METADATA +748 -0
- openstat_cli-1.0.0.dist-info/RECORD +143 -0
- openstat_cli-1.0.0.dist-info/WHEEL +4 -0
- openstat_cli-1.0.0.dist-info/entry_points.txt +2 -0
- openstat_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
"""Advanced visualization: 3D, interactive, animated, missing, map."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from openstat.commands.base import command, CommandArgs, friendly_error
|
|
6
|
+
from openstat.session import Session
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@command("plot3d", usage="plot3d <x> <y> <z> [--color=<col>]")
|
|
10
|
+
def cmd_plot3d(session: Session, args: str) -> str:
|
|
11
|
+
"""3D scatter plot.
|
|
12
|
+
|
|
13
|
+
Examples:
|
|
14
|
+
plot3d x y z
|
|
15
|
+
plot3d age income educ --color=gender
|
|
16
|
+
"""
|
|
17
|
+
import matplotlib
|
|
18
|
+
matplotlib.use("Agg")
|
|
19
|
+
import matplotlib.pyplot as plt
|
|
20
|
+
from mpl_toolkits.mplot3d import Axes3D # noqa: F401
|
|
21
|
+
import numpy as np
|
|
22
|
+
import polars as pl
|
|
23
|
+
|
|
24
|
+
ca = CommandArgs(args)
|
|
25
|
+
if len(ca.positional) < 3:
|
|
26
|
+
return "Usage: plot3d <x> <y> <z> [--color=<col>]"
|
|
27
|
+
x_col, y_col, z_col = ca.positional[:3]
|
|
28
|
+
color_col = ca.options.get("color")
|
|
29
|
+
|
|
30
|
+
try:
|
|
31
|
+
df = session.require_data()
|
|
32
|
+
for c in [x_col, y_col, z_col]:
|
|
33
|
+
if c not in df.columns:
|
|
34
|
+
return f"Column not found: {c}"
|
|
35
|
+
|
|
36
|
+
cols = [x_col, y_col, z_col]
|
|
37
|
+
if color_col and color_col in df.columns:
|
|
38
|
+
cols.append(color_col)
|
|
39
|
+
sub = df.select(cols).drop_nulls()
|
|
40
|
+
|
|
41
|
+
x = sub[x_col].to_numpy().astype(float)
|
|
42
|
+
y = sub[y_col].to_numpy().astype(float)
|
|
43
|
+
z = sub[z_col].to_numpy().astype(float)
|
|
44
|
+
|
|
45
|
+
fig = plt.figure(figsize=(9, 7))
|
|
46
|
+
ax = fig.add_subplot(111, projection="3d")
|
|
47
|
+
|
|
48
|
+
if color_col and color_col in sub.columns:
|
|
49
|
+
cats = sub[color_col].cast(pl.Utf8).to_list()
|
|
50
|
+
unique_cats = sorted(set(cats))
|
|
51
|
+
cmap = plt.colormaps.get_cmap("tab10")
|
|
52
|
+
for i, cat in enumerate(unique_cats):
|
|
53
|
+
mask = [c == cat for c in cats]
|
|
54
|
+
ax.scatter(x[mask], y[mask], z[mask],
|
|
55
|
+
label=str(cat), alpha=0.7, color=cmap(i / max(len(unique_cats), 1)))
|
|
56
|
+
ax.legend(title=color_col)
|
|
57
|
+
else:
|
|
58
|
+
ax.scatter(x, y, z, alpha=0.6, color="#4C72B0")
|
|
59
|
+
|
|
60
|
+
ax.set_xlabel(x_col)
|
|
61
|
+
ax.set_ylabel(y_col)
|
|
62
|
+
ax.set_zlabel(z_col)
|
|
63
|
+
ax.set_title(f"3D Scatter: {x_col} × {y_col} × {z_col}")
|
|
64
|
+
fig.tight_layout()
|
|
65
|
+
|
|
66
|
+
session.output_dir.mkdir(parents=True, exist_ok=True)
|
|
67
|
+
from pathlib import Path
|
|
68
|
+
path = session.output_dir / "scatter3d.png"
|
|
69
|
+
fig.savefig(path, dpi=150)
|
|
70
|
+
plt.close(fig)
|
|
71
|
+
session.plot_paths.append(str(path))
|
|
72
|
+
return f"3D scatter plot saved: {path}"
|
|
73
|
+
except Exception as e:
|
|
74
|
+
return friendly_error(e, "plot3d")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@command("plotmissing", usage="plotmissing [cols...]")
|
|
78
|
+
def cmd_plotmissing(session: Session, args: str) -> str:
|
|
79
|
+
"""Missing data heatmap: visualise missingness patterns across columns.
|
|
80
|
+
|
|
81
|
+
Shows rows (sampled if large) × columns with missing/present colour coding.
|
|
82
|
+
|
|
83
|
+
Examples:
|
|
84
|
+
plotmissing
|
|
85
|
+
plotmissing income age education
|
|
86
|
+
"""
|
|
87
|
+
import matplotlib
|
|
88
|
+
matplotlib.use("Agg")
|
|
89
|
+
import matplotlib.pyplot as plt
|
|
90
|
+
import numpy as np
|
|
91
|
+
import polars as pl
|
|
92
|
+
|
|
93
|
+
ca = CommandArgs(args)
|
|
94
|
+
try:
|
|
95
|
+
df = session.require_data()
|
|
96
|
+
cols = ca.positional if ca.positional else df.columns
|
|
97
|
+
|
|
98
|
+
missing_cols = [c for c in cols if c not in df.columns]
|
|
99
|
+
if missing_cols:
|
|
100
|
+
return f"Columns not found: {', '.join(missing_cols)}"
|
|
101
|
+
|
|
102
|
+
# Sample up to 500 rows for readability
|
|
103
|
+
MAX_ROWS = 500
|
|
104
|
+
if df.height > MAX_ROWS:
|
|
105
|
+
import random
|
|
106
|
+
idx = sorted(random.sample(range(df.height), MAX_ROWS))
|
|
107
|
+
sub = df[idx].select(cols)
|
|
108
|
+
else:
|
|
109
|
+
sub = df.select(cols)
|
|
110
|
+
|
|
111
|
+
# Build missing matrix
|
|
112
|
+
mat = np.zeros((sub.height, len(cols)))
|
|
113
|
+
for j, c in enumerate(cols):
|
|
114
|
+
mat[:, j] = sub[c].is_null().to_numpy().astype(float)
|
|
115
|
+
|
|
116
|
+
miss_pct = [f"{100*df[c].null_count()/df.height:.0f}%" for c in cols]
|
|
117
|
+
labels = [f"{c}\n({p})" for c, p in zip(cols, miss_pct)]
|
|
118
|
+
|
|
119
|
+
fig, ax = plt.subplots(figsize=(max(8, len(cols) * 0.5 + 2), 6))
|
|
120
|
+
ax.imshow(mat.T, aspect="auto", cmap="RdYlGn_r", interpolation="nearest",
|
|
121
|
+
vmin=0, vmax=1)
|
|
122
|
+
ax.set_yticks(range(len(cols)))
|
|
123
|
+
ax.set_yticklabels(labels, fontsize=8)
|
|
124
|
+
ax.set_xlabel("Rows")
|
|
125
|
+
ax.set_title("Missingness Heatmap (red=missing, green=present)")
|
|
126
|
+
fig.tight_layout()
|
|
127
|
+
|
|
128
|
+
session.output_dir.mkdir(parents=True, exist_ok=True)
|
|
129
|
+
from pathlib import Path
|
|
130
|
+
path = session.output_dir / "missing_heatmap.png"
|
|
131
|
+
fig.savefig(path, dpi=150)
|
|
132
|
+
plt.close(fig)
|
|
133
|
+
session.plot_paths.append(str(path))
|
|
134
|
+
|
|
135
|
+
# Summary table
|
|
136
|
+
lines = [f"Missing data heatmap saved: {path}", ""]
|
|
137
|
+
lines.append(f" {'Column':<25} {'Missing':>8} {'%':>6}")
|
|
138
|
+
lines.append(" " + "-" * 42)
|
|
139
|
+
for c in cols:
|
|
140
|
+
n = df[c].null_count()
|
|
141
|
+
p = 100 * n / df.height
|
|
142
|
+
lines.append(f" {c:<25} {n:>8,} {p:>5.1f}%")
|
|
143
|
+
return "\n".join(lines)
|
|
144
|
+
except Exception as e:
|
|
145
|
+
return friendly_error(e, "plotmissing")
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
@command("plotinteractive", usage="plotinteractive scatter|bar|line|hist <args> [--out=plot.html]")
|
|
149
|
+
def cmd_plotinteractive(session: Session, args: str) -> str:
|
|
150
|
+
"""Interactive Plotly chart (saved as HTML — open in browser).
|
|
151
|
+
|
|
152
|
+
Sub-commands: scatter, bar, line, hist, box, heatmap
|
|
153
|
+
|
|
154
|
+
Options:
|
|
155
|
+
--out=<path> output HTML file (default: outputs/interactive_plot.html)
|
|
156
|
+
--title=<txt> chart title
|
|
157
|
+
|
|
158
|
+
Examples:
|
|
159
|
+
plotinteractive scatter y x
|
|
160
|
+
plotinteractive hist income --out=income_dist.html
|
|
161
|
+
plotinteractive bar category value
|
|
162
|
+
plotinteractive line date price --title="Price Over Time"
|
|
163
|
+
"""
|
|
164
|
+
try:
|
|
165
|
+
import plotly.express as px
|
|
166
|
+
import plotly.io as pio
|
|
167
|
+
except ImportError:
|
|
168
|
+
return "plotly required. Install: pip install plotly"
|
|
169
|
+
|
|
170
|
+
import polars as pl
|
|
171
|
+
ca = CommandArgs(args)
|
|
172
|
+
if not ca.positional:
|
|
173
|
+
return "Usage: plotinteractive scatter|bar|line|hist|box|heatmap <args>"
|
|
174
|
+
|
|
175
|
+
subcmd = ca.positional[0].lower()
|
|
176
|
+
out_path = ca.options.get("out", str(session.output_dir / "interactive_plot.html"))
|
|
177
|
+
title = ca.options.get("title", f"Interactive {subcmd.title()}")
|
|
178
|
+
|
|
179
|
+
try:
|
|
180
|
+
df = session.require_data()
|
|
181
|
+
pd_df = df.to_pandas()
|
|
182
|
+
|
|
183
|
+
if subcmd == "scatter":
|
|
184
|
+
if len(ca.positional) < 3:
|
|
185
|
+
return "Usage: plotinteractive scatter <y> <x>"
|
|
186
|
+
y_col, x_col = ca.positional[1], ca.positional[2]
|
|
187
|
+
color_col = ca.options.get("color")
|
|
188
|
+
fig = px.scatter(pd_df, x=x_col, y=y_col, color=color_col, title=title, trendline="ols")
|
|
189
|
+
|
|
190
|
+
elif subcmd == "hist":
|
|
191
|
+
if len(ca.positional) < 2:
|
|
192
|
+
return "Usage: plotinteractive hist <col>"
|
|
193
|
+
col = ca.positional[1]
|
|
194
|
+
fig = px.histogram(pd_df, x=col, title=title, nbins=30)
|
|
195
|
+
|
|
196
|
+
elif subcmd == "bar":
|
|
197
|
+
if len(ca.positional) < 3:
|
|
198
|
+
return "Usage: plotinteractive bar <category> <value>"
|
|
199
|
+
cat_col, val_col = ca.positional[1], ca.positional[2]
|
|
200
|
+
fig = px.bar(pd_df, x=cat_col, y=val_col, title=title)
|
|
201
|
+
|
|
202
|
+
elif subcmd == "line":
|
|
203
|
+
if len(ca.positional) < 3:
|
|
204
|
+
return "Usage: plotinteractive line <x> <y>"
|
|
205
|
+
x_col, y_col = ca.positional[1], ca.positional[2]
|
|
206
|
+
color_col = ca.options.get("color")
|
|
207
|
+
fig = px.line(pd_df, x=x_col, y=y_col, color=color_col, title=title)
|
|
208
|
+
|
|
209
|
+
elif subcmd == "box":
|
|
210
|
+
if len(ca.positional) < 2:
|
|
211
|
+
return "Usage: plotinteractive box <col> [by <group>]"
|
|
212
|
+
col = ca.positional[1]
|
|
213
|
+
by_raw = ca.rest_after("by")
|
|
214
|
+
group = by_raw.strip().split()[0] if by_raw else None
|
|
215
|
+
fig = px.box(pd_df, y=col, x=group, title=title)
|
|
216
|
+
|
|
217
|
+
elif subcmd == "heatmap":
|
|
218
|
+
|
|
219
|
+
NUMERIC = ["float32", "float64", "int32", "int64", "int8", "int16"]
|
|
220
|
+
num_cols = [c for c in df.columns if str(df[c].dtype).lower() in NUMERIC]
|
|
221
|
+
corr = df.select(num_cols).to_pandas().corr()
|
|
222
|
+
fig = px.imshow(corr, title="Correlation Heatmap", color_continuous_scale="RdBu", zmin=-1, zmax=1)
|
|
223
|
+
|
|
224
|
+
else:
|
|
225
|
+
return f"Unknown sub-command: {subcmd}"
|
|
226
|
+
|
|
227
|
+
from pathlib import Path
|
|
228
|
+
Path(out_path).parent.mkdir(parents=True, exist_ok=True)
|
|
229
|
+
pio.write_html(fig, out_path)
|
|
230
|
+
return f"Interactive {subcmd} chart saved: {out_path}\nOpen in a web browser to view."
|
|
231
|
+
except Exception as e:
|
|
232
|
+
return friendly_error(e, "plotinteractive")
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
@command("plotanimated", usage="plotanimated <y> <x> <time_col> [--out=anim.gif]")
|
|
236
|
+
def cmd_plotanimated(session: Session, args: str) -> str:
|
|
237
|
+
"""Animated line/scatter plot over a time variable (saved as GIF).
|
|
238
|
+
|
|
239
|
+
Shows how the relationship between x and y changes across time steps.
|
|
240
|
+
|
|
241
|
+
Examples:
|
|
242
|
+
plotanimated sales month year --out=sales_trend.gif
|
|
243
|
+
plotanimated price date --out=price_animation.gif
|
|
244
|
+
"""
|
|
245
|
+
import matplotlib
|
|
246
|
+
matplotlib.use("Agg")
|
|
247
|
+
import matplotlib.pyplot as plt
|
|
248
|
+
import matplotlib.animation as animation
|
|
249
|
+
import numpy as np
|
|
250
|
+
import polars as pl
|
|
251
|
+
|
|
252
|
+
ca = CommandArgs(args)
|
|
253
|
+
if len(ca.positional) < 3:
|
|
254
|
+
return "Usage: plotanimated <y> <x> <time_col> [--out=anim.gif]"
|
|
255
|
+
|
|
256
|
+
y_col, x_col, t_col = ca.positional[0], ca.positional[1], ca.positional[2]
|
|
257
|
+
out_path = ca.options.get("out", str(session.output_dir / "animated.gif"))
|
|
258
|
+
|
|
259
|
+
try:
|
|
260
|
+
df = session.require_data()
|
|
261
|
+
for c in [y_col, x_col, t_col]:
|
|
262
|
+
if c not in df.columns:
|
|
263
|
+
return f"Column not found: {c}"
|
|
264
|
+
|
|
265
|
+
sub = df.select([t_col, x_col, y_col]).drop_nulls().sort(t_col)
|
|
266
|
+
time_vals = sub[t_col].cast(pl.Utf8).to_list()
|
|
267
|
+
unique_times = sorted(set(time_vals))
|
|
268
|
+
|
|
269
|
+
if len(unique_times) < 2:
|
|
270
|
+
return f"Need at least 2 unique values in '{t_col}' for animation."
|
|
271
|
+
if len(unique_times) > 60:
|
|
272
|
+
unique_times = unique_times[:60] # cap
|
|
273
|
+
|
|
274
|
+
x_all = sub[x_col].to_numpy().astype(float)
|
|
275
|
+
y_all = sub[y_col].to_numpy().astype(float)
|
|
276
|
+
x_min, x_max = x_all.min(), x_all.max()
|
|
277
|
+
y_min, y_max = y_all.min(), y_all.max()
|
|
278
|
+
|
|
279
|
+
fig, ax = plt.subplots(figsize=(7, 5))
|
|
280
|
+
scatter = ax.scatter([], [], alpha=0.7, color="#4C72B0", s=30)
|
|
281
|
+
ax.set_xlim(x_min - abs(x_min) * 0.05, x_max + abs(x_max) * 0.05)
|
|
282
|
+
ax.set_ylim(y_min - abs(y_min) * 0.05, y_max + abs(y_max) * 0.05)
|
|
283
|
+
ax.set_xlabel(x_col)
|
|
284
|
+
ax.set_ylabel(y_col)
|
|
285
|
+
title_obj = ax.set_title("")
|
|
286
|
+
|
|
287
|
+
def update(frame):
|
|
288
|
+
t = unique_times[frame]
|
|
289
|
+
mask = [tv == t for tv in time_vals]
|
|
290
|
+
xd = x_all[mask]
|
|
291
|
+
yd = y_all[mask]
|
|
292
|
+
scatter.set_offsets(np.column_stack([xd, yd]))
|
|
293
|
+
title_obj.set_text(f"{t_col}: {t}")
|
|
294
|
+
return scatter, title_obj
|
|
295
|
+
|
|
296
|
+
ani = animation.FuncAnimation(fig, update, frames=len(unique_times),
|
|
297
|
+
interval=300, blit=False)
|
|
298
|
+
|
|
299
|
+
from pathlib import Path
|
|
300
|
+
Path(out_path).parent.mkdir(parents=True, exist_ok=True)
|
|
301
|
+
try:
|
|
302
|
+
ani.save(out_path, writer="pillow", fps=3)
|
|
303
|
+
except Exception:
|
|
304
|
+
# Fallback: save as MP4 or just first frame
|
|
305
|
+
out_path = out_path.replace(".gif", ".png")
|
|
306
|
+
update(0)
|
|
307
|
+
fig.savefig(out_path, dpi=120)
|
|
308
|
+
plt.close(fig)
|
|
309
|
+
session.plot_paths.append(out_path)
|
|
310
|
+
return f"Animated plot saved: {out_path} ({len(unique_times)} frames)"
|
|
311
|
+
except Exception as e:
|
|
312
|
+
return friendly_error(e, "plotanimated")
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
"""Extra visualization commands: corrplot, pairplot, violin, qq, residplot."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
from openstat.commands.base import command
|
|
8
|
+
from openstat.session import Session
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _stata_opts(raw: str) -> tuple[list[str], dict[str, str]]:
|
|
12
|
+
opts: dict[str, str] = {}
|
|
13
|
+
for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
|
|
14
|
+
opts[m.group(1).lower()] = m.group(2)
|
|
15
|
+
rest = re.sub(r'\w+\([^)]*\)', '', raw)
|
|
16
|
+
positional = [t.strip(',') for t in rest.split() if t.strip(',')]
|
|
17
|
+
return positional, opts
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _save_or_show(fig, path: str | None, default_name: str) -> str:
|
|
21
|
+
import os
|
|
22
|
+
if path:
|
|
23
|
+
os.makedirs(os.path.dirname(path) if os.path.dirname(path) else ".", exist_ok=True)
|
|
24
|
+
fig.savefig(path, dpi=150, bbox_inches="tight")
|
|
25
|
+
return path
|
|
26
|
+
os.makedirs("outputs", exist_ok=True)
|
|
27
|
+
out = f"outputs/{default_name}"
|
|
28
|
+
fig.savefig(out, dpi=150, bbox_inches="tight")
|
|
29
|
+
return out
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@command("corrplot", usage="corrplot [var1 var2 ...] [saving(path.png)]")
|
|
33
|
+
def cmd_corrplot(session: Session, args: str) -> str:
|
|
34
|
+
"""Correlation matrix heatmap."""
|
|
35
|
+
try:
|
|
36
|
+
import matplotlib.pyplot as plt
|
|
37
|
+
import numpy as np
|
|
38
|
+
except ImportError:
|
|
39
|
+
return "matplotlib not installed."
|
|
40
|
+
df = session.require_data()
|
|
41
|
+
positional, opts = _stata_opts(args)
|
|
42
|
+
cols = [c for c in positional if c in df.columns]
|
|
43
|
+
if not cols:
|
|
44
|
+
cols = [c for c in df.columns if df[c].dtype in (
|
|
45
|
+
__import__("polars").Float64, __import__("polars").Float32,
|
|
46
|
+
__import__("polars").Int64, __import__("polars").Int32,
|
|
47
|
+
)][:12]
|
|
48
|
+
if len(cols) < 2:
|
|
49
|
+
return "corrplot requires at least 2 numeric variables."
|
|
50
|
+
try:
|
|
51
|
+
import polars as pl
|
|
52
|
+
data = df.select(cols).drop_nulls().to_numpy().astype(float)
|
|
53
|
+
corr = np.corrcoef(data.T)
|
|
54
|
+
fig, ax = plt.subplots(figsize=(max(6, len(cols)), max(5, len(cols) - 1)))
|
|
55
|
+
im = ax.imshow(corr, cmap="RdBu_r", vmin=-1, vmax=1, aspect="auto")
|
|
56
|
+
plt.colorbar(im, ax=ax, shrink=0.8)
|
|
57
|
+
ax.set_xticks(range(len(cols)))
|
|
58
|
+
ax.set_yticks(range(len(cols)))
|
|
59
|
+
ax.set_xticklabels(cols, rotation=45, ha="right", fontsize=9)
|
|
60
|
+
ax.set_yticklabels(cols, fontsize=9)
|
|
61
|
+
for i in range(len(cols)):
|
|
62
|
+
for j in range(len(cols)):
|
|
63
|
+
ax.text(j, i, f"{corr[i,j]:.2f}", ha="center", va="center", fontsize=7,
|
|
64
|
+
color="white" if abs(corr[i, j]) > 0.6 else "black")
|
|
65
|
+
ax.set_title("Correlation Matrix")
|
|
66
|
+
fig.tight_layout()
|
|
67
|
+
path = opts.get("saving")
|
|
68
|
+
out = _save_or_show(fig, path, "corrplot.png")
|
|
69
|
+
plt.close(fig)
|
|
70
|
+
return f"Correlation plot saved: {out}"
|
|
71
|
+
except Exception as exc:
|
|
72
|
+
return f"corrplot error: {exc}"
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@command("pairplot", usage="pairplot [var1 var2 ...] [saving(path.png)]")
|
|
76
|
+
def cmd_pairplot(session: Session, args: str) -> str:
|
|
77
|
+
"""Scatter matrix (pairplot) of numeric variables."""
|
|
78
|
+
try:
|
|
79
|
+
import matplotlib.pyplot as plt
|
|
80
|
+
import numpy as np
|
|
81
|
+
except ImportError:
|
|
82
|
+
return "matplotlib not installed."
|
|
83
|
+
df = session.require_data()
|
|
84
|
+
positional, opts = _stata_opts(args)
|
|
85
|
+
cols = [c for c in positional if c in df.columns]
|
|
86
|
+
if not cols:
|
|
87
|
+
import polars as pl
|
|
88
|
+
cols = [c for c in df.columns if df[c].dtype in (
|
|
89
|
+
pl.Float64, pl.Float32, pl.Int64, pl.Int32,
|
|
90
|
+
)][:6]
|
|
91
|
+
if len(cols) < 2:
|
|
92
|
+
return "pairplot requires at least 2 numeric variables."
|
|
93
|
+
try:
|
|
94
|
+
data = df.select(cols).drop_nulls().to_numpy().astype(float)
|
|
95
|
+
k = len(cols)
|
|
96
|
+
fig, axes = plt.subplots(k, k, figsize=(2.5 * k, 2.5 * k))
|
|
97
|
+
for i in range(k):
|
|
98
|
+
for j in range(k):
|
|
99
|
+
ax = axes[i, j]
|
|
100
|
+
if i == j:
|
|
101
|
+
ax.hist(data[:, i], bins=20, color="steelblue", alpha=0.7)
|
|
102
|
+
ax.set_ylabel("")
|
|
103
|
+
else:
|
|
104
|
+
ax.scatter(data[:, j], data[:, i], alpha=0.3, s=10, color="steelblue")
|
|
105
|
+
if i == k - 1:
|
|
106
|
+
ax.set_xlabel(cols[j], fontsize=8)
|
|
107
|
+
if j == 0:
|
|
108
|
+
ax.set_ylabel(cols[i], fontsize=8)
|
|
109
|
+
ax.tick_params(labelsize=6)
|
|
110
|
+
fig.suptitle("Scatter Matrix", fontsize=12)
|
|
111
|
+
fig.tight_layout()
|
|
112
|
+
path = opts.get("saving")
|
|
113
|
+
out = _save_or_show(fig, path, "pairplot.png")
|
|
114
|
+
plt.close(fig)
|
|
115
|
+
return f"Pair plot saved: {out}"
|
|
116
|
+
except Exception as exc:
|
|
117
|
+
return f"pairplot error: {exc}"
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@command("violin", usage="violin var [by(groupvar)] [saving(path.png)]")
|
|
121
|
+
def cmd_violin(session: Session, args: str) -> str:
|
|
122
|
+
"""Violin plot for distribution visualization."""
|
|
123
|
+
try:
|
|
124
|
+
import matplotlib.pyplot as plt
|
|
125
|
+
import numpy as np
|
|
126
|
+
except ImportError:
|
|
127
|
+
return "matplotlib not installed."
|
|
128
|
+
df = session.require_data()
|
|
129
|
+
positional, opts = _stata_opts(args)
|
|
130
|
+
if not positional:
|
|
131
|
+
return "Usage: violin var [by(groupvar)] [saving(path.png)]"
|
|
132
|
+
col = positional[0]
|
|
133
|
+
if col not in df.columns:
|
|
134
|
+
return f"Column '{col}' not found."
|
|
135
|
+
by = opts.get("by")
|
|
136
|
+
try:
|
|
137
|
+
fig, ax = plt.subplots(figsize=(8, 5))
|
|
138
|
+
if by and by in df.columns:
|
|
139
|
+
groups = df[by].drop_nulls().unique().sort().to_list()
|
|
140
|
+
data_groups = [df.filter(__import__("polars").col(by) == g)[col].drop_nulls().to_numpy().astype(float)
|
|
141
|
+
for g in groups]
|
|
142
|
+
parts = ax.violinplot(data_groups, showmedians=True)
|
|
143
|
+
ax.set_xticks(range(1, len(groups) + 1))
|
|
144
|
+
ax.set_xticklabels([str(g) for g in groups])
|
|
145
|
+
ax.set_xlabel(str(by))
|
|
146
|
+
else:
|
|
147
|
+
data = df[col].drop_nulls().to_numpy().astype(float)
|
|
148
|
+
ax.violinplot([data], showmedians=True)
|
|
149
|
+
ax.set_xticks([1])
|
|
150
|
+
ax.set_xticklabels([col])
|
|
151
|
+
ax.set_ylabel(col)
|
|
152
|
+
ax.set_title(f"Violin Plot: {col}")
|
|
153
|
+
fig.tight_layout()
|
|
154
|
+
path = opts.get("saving")
|
|
155
|
+
out = _save_or_show(fig, path, f"violin_{col}.png")
|
|
156
|
+
plt.close(fig)
|
|
157
|
+
return f"Violin plot saved: {out}"
|
|
158
|
+
except Exception as exc:
|
|
159
|
+
return f"violin error: {exc}"
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
@command("qqplot", usage="qqplot var [saving(path.png)]")
|
|
163
|
+
def cmd_qqplot(session: Session, args: str) -> str:
|
|
164
|
+
"""Quantile-Quantile (Q-Q) normality plot."""
|
|
165
|
+
try:
|
|
166
|
+
import matplotlib.pyplot as plt
|
|
167
|
+
import numpy as np
|
|
168
|
+
from scipy import stats as sp_stats
|
|
169
|
+
except ImportError:
|
|
170
|
+
return "matplotlib or scipy not installed."
|
|
171
|
+
df = session.require_data()
|
|
172
|
+
positional, opts = _stata_opts(args)
|
|
173
|
+
if not positional:
|
|
174
|
+
return "Usage: qqplot var [saving(path.png)]"
|
|
175
|
+
col = positional[0]
|
|
176
|
+
if col not in df.columns:
|
|
177
|
+
return f"Column '{col}' not found."
|
|
178
|
+
try:
|
|
179
|
+
data = df[col].drop_nulls().to_numpy().astype(float)
|
|
180
|
+
fig, ax = plt.subplots(figsize=(6, 6))
|
|
181
|
+
(osm, osr), (slope, intercept, r) = sp_stats.probplot(data, dist="norm")
|
|
182
|
+
ax.scatter(osm, osr, alpha=0.5, s=15, color="steelblue", label="Data")
|
|
183
|
+
x_line = np.array([osm.min(), osm.max()])
|
|
184
|
+
ax.plot(x_line, slope * x_line + intercept, "r-", lw=2, label="Normal line")
|
|
185
|
+
ax.set_xlabel("Theoretical quantiles")
|
|
186
|
+
ax.set_ylabel("Sample quantiles")
|
|
187
|
+
ax.set_title(f"Q-Q Plot: {col}")
|
|
188
|
+
ax.legend()
|
|
189
|
+
# Shapiro-Wilk stat
|
|
190
|
+
if len(data) <= 5000:
|
|
191
|
+
sw_stat, sw_p = sp_stats.shapiro(data[:5000])
|
|
192
|
+
ax.text(0.05, 0.95, f"Shapiro-Wilk p = {sw_p:.4f}", transform=ax.transAxes,
|
|
193
|
+
fontsize=9, verticalalignment="top")
|
|
194
|
+
fig.tight_layout()
|
|
195
|
+
path = opts.get("saving")
|
|
196
|
+
out = _save_or_show(fig, path, f"qqplot_{col}.png")
|
|
197
|
+
plt.close(fig)
|
|
198
|
+
return f"Q-Q plot saved: {out} (R²={r**2:.4f})"
|
|
199
|
+
except Exception as exc:
|
|
200
|
+
return f"qqplot error: {exc}"
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
@command("residplot", usage="residplot dep var1 [var2 ...] [saving(path.png)]")
|
|
204
|
+
def cmd_residplot(session: Session, args: str) -> str:
|
|
205
|
+
"""Residual vs fitted and scale-location plots for OLS."""
|
|
206
|
+
try:
|
|
207
|
+
import matplotlib.pyplot as plt
|
|
208
|
+
import numpy as np
|
|
209
|
+
except ImportError:
|
|
210
|
+
return "matplotlib not installed."
|
|
211
|
+
df = session.require_data()
|
|
212
|
+
positional, opts = _stata_opts(args)
|
|
213
|
+
if len(positional) < 2:
|
|
214
|
+
return "Usage: residplot dep var1 [var2 ...]"
|
|
215
|
+
dep = positional[0]
|
|
216
|
+
indeps = [c for c in positional[1:] if c in df.columns]
|
|
217
|
+
if dep not in df.columns:
|
|
218
|
+
return f"Column '{dep}' not found."
|
|
219
|
+
if not indeps:
|
|
220
|
+
return "No valid predictor variables."
|
|
221
|
+
try:
|
|
222
|
+
sub = df.select([dep] + indeps).drop_nulls()
|
|
223
|
+
y = sub[dep].to_numpy().astype(float)
|
|
224
|
+
X = np.column_stack([np.ones(len(y)), sub.select(indeps).to_numpy().astype(float)])
|
|
225
|
+
beta = np.linalg.lstsq(X, y, rcond=None)[0]
|
|
226
|
+
fitted = X @ beta
|
|
227
|
+
resid = y - fitted
|
|
228
|
+
std_resid = resid / max(resid.std(), 1e-10)
|
|
229
|
+
|
|
230
|
+
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
|
|
231
|
+
|
|
232
|
+
# Residuals vs Fitted
|
|
233
|
+
axes[0].scatter(fitted, resid, alpha=0.4, s=15, color="steelblue")
|
|
234
|
+
axes[0].axhline(0, color="red", lw=1)
|
|
235
|
+
axes[0].set_xlabel("Fitted values")
|
|
236
|
+
axes[0].set_ylabel("Residuals")
|
|
237
|
+
axes[0].set_title("Residuals vs Fitted")
|
|
238
|
+
|
|
239
|
+
# Scale-Location
|
|
240
|
+
axes[1].scatter(fitted, np.sqrt(np.abs(std_resid)), alpha=0.4, s=15, color="steelblue")
|
|
241
|
+
axes[1].set_xlabel("Fitted values")
|
|
242
|
+
axes[1].set_ylabel("√|Standardized residuals|")
|
|
243
|
+
axes[1].set_title("Scale-Location")
|
|
244
|
+
|
|
245
|
+
fig.tight_layout()
|
|
246
|
+
path = opts.get("saving")
|
|
247
|
+
out = _save_or_show(fig, path, f"residplot_{dep}.png")
|
|
248
|
+
plt.close(fig)
|
|
249
|
+
return f"Residual plots saved: {out}"
|
|
250
|
+
except Exception as exc:
|
|
251
|
+
return f"residplot error: {exc}"
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Watch command: re-run a script automatically when the file changes."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
import os
|
|
7
|
+
|
|
8
|
+
from openstat.commands.base import command, CommandArgs
|
|
9
|
+
from openstat.session import Session
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@command("watch", usage="watch <script.ost> [--interval=2]")
|
|
13
|
+
def cmd_watch(session: Session, args: str) -> str:
|
|
14
|
+
"""Watch a script file and re-run it whenever it changes.
|
|
15
|
+
|
|
16
|
+
Monitors the file's modification time every N seconds (default 2).
|
|
17
|
+
Press Ctrl+C to stop watching.
|
|
18
|
+
|
|
19
|
+
Examples:
|
|
20
|
+
watch analysis.ost
|
|
21
|
+
watch pipeline.ost --interval=5
|
|
22
|
+
"""
|
|
23
|
+
ca = CommandArgs(args)
|
|
24
|
+
if not ca.positional:
|
|
25
|
+
return "Usage: watch <script.ost> [--interval=2]"
|
|
26
|
+
|
|
27
|
+
script_path = ca.positional[0]
|
|
28
|
+
interval = float(ca.options.get("interval", 2))
|
|
29
|
+
|
|
30
|
+
if not os.path.exists(script_path):
|
|
31
|
+
return f"File not found: {script_path}"
|
|
32
|
+
|
|
33
|
+
from openstat.script_runner import run_script_advanced
|
|
34
|
+
|
|
35
|
+
last_mtime = os.path.getmtime(script_path)
|
|
36
|
+
|
|
37
|
+
print(f"Watching {script_path} (interval={interval}s). Press Ctrl+C to stop.")
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
# Run once immediately
|
|
41
|
+
run_script_advanced(script_path, session)
|
|
42
|
+
print(f"[Initial run complete]")
|
|
43
|
+
|
|
44
|
+
while True:
|
|
45
|
+
time.sleep(interval)
|
|
46
|
+
try:
|
|
47
|
+
current_mtime = os.path.getmtime(script_path)
|
|
48
|
+
except FileNotFoundError:
|
|
49
|
+
print(f"File removed: {script_path}. Stopping watch.")
|
|
50
|
+
break
|
|
51
|
+
|
|
52
|
+
if current_mtime != last_mtime:
|
|
53
|
+
last_mtime = current_mtime
|
|
54
|
+
print(f"\n[{_timestamp()}] File changed — re-running {script_path}...")
|
|
55
|
+
try:
|
|
56
|
+
run_script_advanced(script_path, session)
|
|
57
|
+
print(f"[{_timestamp()}] Done.")
|
|
58
|
+
except Exception as exc:
|
|
59
|
+
print(f"[{_timestamp()}] Error: {exc}")
|
|
60
|
+
|
|
61
|
+
except KeyboardInterrupt:
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
return f"Watch stopped: {script_path}"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _timestamp() -> str:
|
|
68
|
+
from datetime import datetime
|
|
69
|
+
return datetime.now().strftime("%H:%M:%S")
|