openstat-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstat/__init__.py +3 -0
- openstat/__main__.py +4 -0
- openstat/backends/__init__.py +16 -0
- openstat/backends/duckdb_backend.py +70 -0
- openstat/backends/polars_backend.py +52 -0
- openstat/cli.py +92 -0
- openstat/commands/__init__.py +82 -0
- openstat/commands/adv_stat_cmds.py +1255 -0
- openstat/commands/advanced_ml_cmds.py +576 -0
- openstat/commands/advreg_cmds.py +207 -0
- openstat/commands/alias_cmds.py +135 -0
- openstat/commands/arch_cmds.py +82 -0
- openstat/commands/arules_cmds.py +111 -0
- openstat/commands/automodel_cmds.py +212 -0
- openstat/commands/backend_cmds.py +82 -0
- openstat/commands/base.py +170 -0
- openstat/commands/bayes_cmds.py +71 -0
- openstat/commands/causal_cmds.py +269 -0
- openstat/commands/cluster_cmds.py +152 -0
- openstat/commands/data_cmds.py +996 -0
- openstat/commands/datamanip_cmds.py +672 -0
- openstat/commands/dataquality_cmds.py +174 -0
- openstat/commands/datetime_cmds.py +176 -0
- openstat/commands/dimreduce_cmds.py +184 -0
- openstat/commands/discrete_cmds.py +149 -0
- openstat/commands/dsl_cmds.py +143 -0
- openstat/commands/epi_cmds.py +93 -0
- openstat/commands/equiv_tobit_cmds.py +94 -0
- openstat/commands/esttab_cmds.py +196 -0
- openstat/commands/export_beamer_cmds.py +142 -0
- openstat/commands/export_cmds.py +201 -0
- openstat/commands/export_extra_cmds.py +240 -0
- openstat/commands/factor_cmds.py +180 -0
- openstat/commands/groupby_cmds.py +155 -0
- openstat/commands/help_cmds.py +237 -0
- openstat/commands/i18n_cmds.py +43 -0
- openstat/commands/import_extra_cmds.py +561 -0
- openstat/commands/influence_cmds.py +134 -0
- openstat/commands/iv_cmds.py +106 -0
- openstat/commands/manova_cmds.py +105 -0
- openstat/commands/mediate_cmds.py +233 -0
- openstat/commands/meta_cmds.py +284 -0
- openstat/commands/mi_cmds.py +228 -0
- openstat/commands/mixed_cmds.py +79 -0
- openstat/commands/mixture_changepoint_cmds.py +166 -0
- openstat/commands/ml_adv_cmds.py +147 -0
- openstat/commands/ml_cmds.py +178 -0
- openstat/commands/model_eval_cmds.py +142 -0
- openstat/commands/network_cmds.py +288 -0
- openstat/commands/nlquery_cmds.py +161 -0
- openstat/commands/nonparam_cmds.py +149 -0
- openstat/commands/outreg_cmds.py +247 -0
- openstat/commands/panel_cmds.py +141 -0
- openstat/commands/pdf_cmds.py +226 -0
- openstat/commands/pipeline_cmds.py +319 -0
- openstat/commands/plot_cmds.py +189 -0
- openstat/commands/plugin_cmds.py +79 -0
- openstat/commands/posthoc_cmds.py +153 -0
- openstat/commands/power_cmds.py +172 -0
- openstat/commands/profile_cmds.py +246 -0
- openstat/commands/rbridge_cmds.py +81 -0
- openstat/commands/regex_cmds.py +104 -0
- openstat/commands/report_cmds.py +48 -0
- openstat/commands/repro_cmds.py +129 -0
- openstat/commands/resampling_cmds.py +109 -0
- openstat/commands/reshape_cmds.py +223 -0
- openstat/commands/sem_cmds.py +177 -0
- openstat/commands/stat_cmds.py +1040 -0
- openstat/commands/stata_import_cmds.py +215 -0
- openstat/commands/string_cmds.py +124 -0
- openstat/commands/surv_cmds.py +145 -0
- openstat/commands/survey_cmds.py +153 -0
- openstat/commands/textanalysis_cmds.py +192 -0
- openstat/commands/ts_adv_cmds.py +136 -0
- openstat/commands/ts_cmds.py +195 -0
- openstat/commands/tui_cmds.py +111 -0
- openstat/commands/ux_cmds.py +191 -0
- openstat/commands/validate_cmds.py +270 -0
- openstat/commands/viz_adv_cmds.py +312 -0
- openstat/commands/viz_extra_cmds.py +251 -0
- openstat/commands/watch_cmds.py +69 -0
- openstat/config.py +106 -0
- openstat/dsl/__init__.py +0 -0
- openstat/dsl/parser.py +332 -0
- openstat/dsl/tokenizer.py +105 -0
- openstat/i18n.py +120 -0
- openstat/io/__init__.py +0 -0
- openstat/io/loader.py +187 -0
- openstat/jupyter/__init__.py +18 -0
- openstat/jupyter/display.py +18 -0
- openstat/jupyter/magic.py +60 -0
- openstat/logging_config.py +59 -0
- openstat/plots/__init__.py +0 -0
- openstat/plots/plotter.py +437 -0
- openstat/plots/surv_plots.py +32 -0
- openstat/plots/ts_plots.py +59 -0
- openstat/plugins/__init__.py +5 -0
- openstat/plugins/manager.py +69 -0
- openstat/repl.py +457 -0
- openstat/reporting/__init__.py +0 -0
- openstat/reporting/eda.py +208 -0
- openstat/reporting/report.py +67 -0
- openstat/script_runner.py +319 -0
- openstat/session.py +133 -0
- openstat/stats/__init__.py +0 -0
- openstat/stats/advanced_regression.py +269 -0
- openstat/stats/arch_garch.py +84 -0
- openstat/stats/bayesian.py +103 -0
- openstat/stats/causal.py +258 -0
- openstat/stats/clustering.py +206 -0
- openstat/stats/discrete.py +311 -0
- openstat/stats/epidemiology.py +119 -0
- openstat/stats/equiv_tobit.py +163 -0
- openstat/stats/factor.py +174 -0
- openstat/stats/imputation.py +282 -0
- openstat/stats/influence.py +78 -0
- openstat/stats/iv.py +131 -0
- openstat/stats/manova.py +124 -0
- openstat/stats/mixed.py +128 -0
- openstat/stats/ml.py +275 -0
- openstat/stats/ml_advanced.py +117 -0
- openstat/stats/model_eval.py +183 -0
- openstat/stats/models.py +1342 -0
- openstat/stats/nonparametric.py +130 -0
- openstat/stats/panel.py +179 -0
- openstat/stats/power.py +295 -0
- openstat/stats/resampling.py +203 -0
- openstat/stats/survey.py +213 -0
- openstat/stats/survival.py +196 -0
- openstat/stats/timeseries.py +142 -0
- openstat/stats/ts_advanced.py +114 -0
- openstat/types.py +11 -0
- openstat/web/__init__.py +1 -0
- openstat/web/app.py +117 -0
- openstat/web/session_manager.py +73 -0
- openstat/web/static/app.js +117 -0
- openstat/web/static/index.html +38 -0
- openstat/web/static/style.css +103 -0
- openstat_cli-1.0.0.dist-info/METADATA +748 -0
- openstat_cli-1.0.0.dist-info/RECORD +143 -0
- openstat_cli-1.0.0.dist-info/WHEEL +4 -0
- openstat_cli-1.0.0.dist-info/entry_points.txt +2 -0
- openstat_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"""Automatic model selection: automodel command."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import itertools
|
|
6
|
+
from typing import NamedTuple
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
import polars as pl
|
|
10
|
+
|
|
11
|
+
from openstat.commands.base import command
|
|
12
|
+
from openstat.session import Session
|
|
13
|
+
from openstat.dsl.parser import parse_formula, ParseError
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class _Candidate(NamedTuple):
|
|
17
|
+
formula: str
|
|
18
|
+
model_type: str
|
|
19
|
+
aic: float
|
|
20
|
+
bic: float
|
|
21
|
+
r2: float | None
|
|
22
|
+
n: int
|
|
23
|
+
k: int # number of predictors
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _fit_candidate(
|
|
27
|
+
df: pl.DataFrame,
|
|
28
|
+
dep: str,
|
|
29
|
+
indeps: list[str],
|
|
30
|
+
model_type: str,
|
|
31
|
+
) -> _Candidate | None:
|
|
32
|
+
"""Fit a single candidate model, return metrics or None on failure."""
|
|
33
|
+
try:
|
|
34
|
+
if model_type == "ols":
|
|
35
|
+
from openstat.stats.models import fit_ols
|
|
36
|
+
result, _ = fit_ols(df, dep, indeps)
|
|
37
|
+
formula = f"{dep} ~ {' + '.join(indeps)}"
|
|
38
|
+
return _Candidate(
|
|
39
|
+
formula=formula, model_type="OLS",
|
|
40
|
+
aic=result.aic or float("inf"), bic=result.bic or float("inf"),
|
|
41
|
+
r2=result.r_squared, n=result.n_obs, k=len(indeps),
|
|
42
|
+
)
|
|
43
|
+
elif model_type == "logit":
|
|
44
|
+
from openstat.stats.models import fit_logit
|
|
45
|
+
result, _ = fit_logit(df, dep, indeps)
|
|
46
|
+
formula = f"{dep} ~ {' + '.join(indeps)}"
|
|
47
|
+
return _Candidate(
|
|
48
|
+
formula=formula, model_type="Logit",
|
|
49
|
+
aic=result.aic or float("inf"), bic=result.bic or float("inf"),
|
|
50
|
+
r2=result.pseudo_r2, n=result.n_obs, k=len(indeps),
|
|
51
|
+
)
|
|
52
|
+
elif model_type == "poisson":
|
|
53
|
+
from openstat.stats.models import fit_poisson
|
|
54
|
+
result, _ = fit_poisson(df, dep, indeps)
|
|
55
|
+
formula = f"{dep} ~ {' + '.join(indeps)}"
|
|
56
|
+
return _Candidate(
|
|
57
|
+
formula=formula, model_type="Poisson",
|
|
58
|
+
aic=result.aic or float("inf"), bic=result.bic or float("inf"),
|
|
59
|
+
r2=result.pseudo_r2, n=result.n_obs, k=len(indeps),
|
|
60
|
+
)
|
|
61
|
+
except Exception:
|
|
62
|
+
return None
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@command("automodel", usage="automodel <depvar> ~ <x1> <x2> ... [--ols|--logit|--poisson] [--criterion=aic|bic] [--maxvars=N]")
|
|
67
|
+
def cmd_automodel(session: Session, args: str) -> str:
|
|
68
|
+
"""Automatic model selection: fits all variable subsets and ranks by AIC/BIC.
|
|
69
|
+
|
|
70
|
+
Uses exhaustive search for ≤ 8 predictors, forward stepwise for more.
|
|
71
|
+
|
|
72
|
+
Examples:
|
|
73
|
+
automodel score ~ age income education
|
|
74
|
+
automodel employed ~ age income score region --logit --criterion=bic
|
|
75
|
+
automodel score ~ age income education region --criterion=aic --maxvars=3
|
|
76
|
+
"""
|
|
77
|
+
import re
|
|
78
|
+
df = session.require_data()
|
|
79
|
+
|
|
80
|
+
# Parse flags
|
|
81
|
+
use_logit = "--logit" in args
|
|
82
|
+
use_poisson = "--poisson" in args
|
|
83
|
+
model_type = "logit" if use_logit else "poisson" if use_poisson else "ols"
|
|
84
|
+
|
|
85
|
+
m_crit = re.search(r"--criterion[= ](\w+)", args)
|
|
86
|
+
criterion = m_crit.group(1).lower() if m_crit else "aic"
|
|
87
|
+
if criterion not in ("aic", "bic"):
|
|
88
|
+
criterion = "aic"
|
|
89
|
+
|
|
90
|
+
m_max = re.search(r"--maxvars[= ](\d+)", args)
|
|
91
|
+
max_vars = int(m_max.group(1)) if m_max else None
|
|
92
|
+
|
|
93
|
+
# Clean flags from formula
|
|
94
|
+
formula_str = re.sub(r"--\w+(?:[= ]\w+)?", "", args).strip()
|
|
95
|
+
if "~" not in formula_str:
|
|
96
|
+
return (
|
|
97
|
+
"Usage: automodel <depvar> ~ <x1> <x2> ... [--ols|--logit|--poisson]\n"
|
|
98
|
+
"Example: automodel score ~ age income education"
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# Normalize: allow space-separated predictors (convert to + separated)
|
|
102
|
+
if "~" in formula_str:
|
|
103
|
+
lhs, rhs = formula_str.split("~", 1)
|
|
104
|
+
# If no + in rhs, convert spaces to +
|
|
105
|
+
if "+" not in rhs:
|
|
106
|
+
rhs = " + ".join(rhs.split())
|
|
107
|
+
formula_str = f"{lhs.strip()} ~ {rhs.strip()}"
|
|
108
|
+
|
|
109
|
+
try:
|
|
110
|
+
dep, indeps = parse_formula(formula_str)
|
|
111
|
+
except ParseError as e:
|
|
112
|
+
return f"Formula error: {e}"
|
|
113
|
+
|
|
114
|
+
if dep not in df.columns:
|
|
115
|
+
return f"Dependent variable not found: {dep}"
|
|
116
|
+
missing = [x for x in indeps if x not in df.columns]
|
|
117
|
+
if missing:
|
|
118
|
+
return f"Predictors not found: {', '.join(missing)}"
|
|
119
|
+
|
|
120
|
+
if max_vars:
|
|
121
|
+
indeps = indeps[:max_vars + 10] # allow some buffer
|
|
122
|
+
|
|
123
|
+
k = len(indeps)
|
|
124
|
+
strategy = "exhaustive" if k <= 8 else "forward stepwise"
|
|
125
|
+
|
|
126
|
+
# Build candidates
|
|
127
|
+
candidates: list[_Candidate] = []
|
|
128
|
+
|
|
129
|
+
if strategy == "exhaustive":
|
|
130
|
+
total = 2 ** k - 1 # exclude empty model
|
|
131
|
+
for r in range(1, k + 1):
|
|
132
|
+
if max_vars and r > max_vars:
|
|
133
|
+
break
|
|
134
|
+
for subset in itertools.combinations(indeps, r):
|
|
135
|
+
c = _fit_candidate(df, dep, list(subset), model_type)
|
|
136
|
+
if c:
|
|
137
|
+
candidates.append(c)
|
|
138
|
+
else:
|
|
139
|
+
# Forward stepwise
|
|
140
|
+
current = []
|
|
141
|
+
remaining = list(indeps)
|
|
142
|
+
while remaining and (max_vars is None or len(current) < max_vars):
|
|
143
|
+
best: _Candidate | None = None
|
|
144
|
+
for var in remaining:
|
|
145
|
+
trial = current + [var]
|
|
146
|
+
c = _fit_candidate(df, dep, trial, model_type)
|
|
147
|
+
if c:
|
|
148
|
+
if best is None or getattr(c, criterion) < getattr(best, criterion):
|
|
149
|
+
best = c
|
|
150
|
+
if best is None:
|
|
151
|
+
break
|
|
152
|
+
# Find which var was added
|
|
153
|
+
best_vars = best.formula.split("~")[1].strip().split(" + ")
|
|
154
|
+
added = [v for v in best_vars if v not in current]
|
|
155
|
+
current.extend(added)
|
|
156
|
+
remaining = [v for v in remaining if v not in current]
|
|
157
|
+
candidates.append(best)
|
|
158
|
+
|
|
159
|
+
if not candidates:
|
|
160
|
+
return "No valid models found. Check your data and variable names."
|
|
161
|
+
|
|
162
|
+
# Sort by criterion
|
|
163
|
+
candidates.sort(key=lambda c: getattr(c, criterion))
|
|
164
|
+
top_n = min(10, len(candidates))
|
|
165
|
+
top = candidates[:top_n]
|
|
166
|
+
|
|
167
|
+
# Store best model result in session
|
|
168
|
+
best = candidates[0]
|
|
169
|
+
try:
|
|
170
|
+
dep2, indeps2 = parse_formula(best.formula)
|
|
171
|
+
if model_type == "ols":
|
|
172
|
+
from openstat.stats.models import fit_ols
|
|
173
|
+
result, raw = fit_ols(df, dep2, indeps2)
|
|
174
|
+
elif model_type == "logit":
|
|
175
|
+
from openstat.stats.models import fit_logit
|
|
176
|
+
result, raw = fit_logit(df, dep2, indeps2)
|
|
177
|
+
else:
|
|
178
|
+
from openstat.stats.models import fit_poisson
|
|
179
|
+
result, raw = fit_poisson(df, dep2, indeps2)
|
|
180
|
+
session._last_model = raw
|
|
181
|
+
session._last_model_vars = (dep2, indeps2)
|
|
182
|
+
session._last_fit_result = result
|
|
183
|
+
session._last_fit_kwargs = {}
|
|
184
|
+
except Exception:
|
|
185
|
+
pass
|
|
186
|
+
|
|
187
|
+
crit_label = criterion.upper()
|
|
188
|
+
lines = [
|
|
189
|
+
f"Dependent: {dep} Candidates: {len(candidates)} Strategy: {strategy}",
|
|
190
|
+
f"Model type: {model_type.upper()} Selection criterion: {crit_label}",
|
|
191
|
+
"",
|
|
192
|
+
f"Top {top_n} models by {crit_label}:",
|
|
193
|
+
f" {'#':<3} {'AIC':>9} {'BIC':>9} {'R²/PseudoR²':>11} k Formula",
|
|
194
|
+
" " + "-" * 76,
|
|
195
|
+
]
|
|
196
|
+
for i, c in enumerate(top, 1):
|
|
197
|
+
r2_str = f"{c.r2:.4f}" if c.r2 is not None else " —"
|
|
198
|
+
marker = " ← best" if i == 1 else ""
|
|
199
|
+
lines.append(
|
|
200
|
+
f" {i:<3} {c.aic:>9.2f} {c.bic:>9.2f} "
|
|
201
|
+
f"{r2_str:>11} {c.k} {c.formula}{marker}"
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
lines += [
|
|
205
|
+
"",
|
|
206
|
+
f"Best model: {best.formula}",
|
|
207
|
+
f" AIC = {best.aic:.2f} BIC = {best.bic:.2f}",
|
|
208
|
+
"",
|
|
209
|
+
"Best model loaded. Use 'estimates', 'vif', 'residuals', 'plot coef' for diagnostics.",
|
|
210
|
+
]
|
|
211
|
+
|
|
212
|
+
return "\n" + "=" * 60 + "\nAutomatic Model Selection\n" + "=" * 60 + "\n" + "\n".join(lines) + "\n" + "=" * 60
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""Backend management commands: set backend, sql."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from rich.console import Console
|
|
6
|
+
from rich.table import Table
|
|
7
|
+
|
|
8
|
+
from openstat.session import Session
|
|
9
|
+
from openstat.commands.base import command, CommandArgs, rich_to_str, friendly_error
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@command("set", usage="set seed <N> | set backend polars|duckdb")
|
|
13
|
+
def cmd_set(session: Session, args: str) -> str:
|
|
14
|
+
"""Change settings: random seed, backend."""
|
|
15
|
+
ca = CommandArgs(args)
|
|
16
|
+
if not ca.positional:
|
|
17
|
+
return "Usage: set seed <N> | set backend polars|duckdb"
|
|
18
|
+
|
|
19
|
+
subcmd = ca.positional[0].lower()
|
|
20
|
+
|
|
21
|
+
if subcmd == "seed":
|
|
22
|
+
if len(ca.positional) < 2:
|
|
23
|
+
seed = getattr(session, "_repro_seed", None)
|
|
24
|
+
return f"Current seed: {seed}" if seed is not None else "No seed set."
|
|
25
|
+
try:
|
|
26
|
+
seed = int(ca.positional[1])
|
|
27
|
+
except ValueError:
|
|
28
|
+
return f"Invalid seed: {ca.positional[1]}. Must be an integer."
|
|
29
|
+
import numpy as np
|
|
30
|
+
import random as _random
|
|
31
|
+
np.random.seed(seed)
|
|
32
|
+
_random.seed(seed)
|
|
33
|
+
session._repro_seed = seed # type: ignore[attr-defined]
|
|
34
|
+
return f"Seed set to {seed}. Reproducible random operations enabled."
|
|
35
|
+
|
|
36
|
+
elif subcmd == "backend":
|
|
37
|
+
backend_name = ca.positional[1].lower() if len(ca.positional) > 1 else ""
|
|
38
|
+
if backend_name == "polars":
|
|
39
|
+
session._backend = "polars"
|
|
40
|
+
session._backend_obj = None
|
|
41
|
+
return "Backend set to: polars"
|
|
42
|
+
elif backend_name == "duckdb":
|
|
43
|
+
try:
|
|
44
|
+
from openstat.backends.duckdb_backend import DuckDBBackend
|
|
45
|
+
session._backend_obj = DuckDBBackend()
|
|
46
|
+
session._backend = "duckdb"
|
|
47
|
+
# If data already loaded, register it
|
|
48
|
+
if session.df is not None:
|
|
49
|
+
session._backend_obj._conn.register("data", session.df.to_pandas())
|
|
50
|
+
session._backend_obj._table_loaded = True
|
|
51
|
+
return "Backend set to: duckdb"
|
|
52
|
+
except ImportError as e:
|
|
53
|
+
return str(e)
|
|
54
|
+
else:
|
|
55
|
+
return f"Unknown backend: {backend_name}. Use 'polars' or 'duckdb'."
|
|
56
|
+
else:
|
|
57
|
+
return f"Unknown setting: {subcmd}. Available: seed, backend"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@command("sql", usage='sql "SELECT * FROM data WHERE ..."')
|
|
61
|
+
def cmd_sql(session: Session, args: str) -> str:
|
|
62
|
+
"""Execute SQL query on the loaded dataset (DuckDB backend recommended)."""
|
|
63
|
+
query = args.strip().strip('"\'')
|
|
64
|
+
if not query:
|
|
65
|
+
return 'Usage: sql "SELECT * FROM data WHERE ..."'
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
if session._backend == "duckdb" and session._backend_obj is not None:
|
|
69
|
+
result_df = session._backend_obj.sql(query)
|
|
70
|
+
elif session.df is not None:
|
|
71
|
+
# Use Polars SQL context as fallback
|
|
72
|
+
import polars as pl
|
|
73
|
+
ctx = pl.SQLContext({"data": session.df})
|
|
74
|
+
result_df = ctx.execute(query).collect()
|
|
75
|
+
else:
|
|
76
|
+
return "No data loaded."
|
|
77
|
+
|
|
78
|
+
session.snapshot()
|
|
79
|
+
session.df = result_df
|
|
80
|
+
return f"Query returned {session.shape_str}"
|
|
81
|
+
except Exception as e:
|
|
82
|
+
return friendly_error(e, "sql")
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""Command registration infrastructure.
|
|
2
|
+
|
|
3
|
+
Provides a @command decorator that auto-registers handler functions,
|
|
4
|
+
and a CommandArgs helper for standardized argument parsing.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
from openstat.commands.base import command
|
|
8
|
+
|
|
9
|
+
@command("mycommand", usage="mycommand <arg>")
|
|
10
|
+
def cmd_mycommand(session, args):
|
|
11
|
+
'''One-line description shown in help.'''
|
|
12
|
+
...
|
|
13
|
+
return "result text"
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import io
|
|
19
|
+
import re
|
|
20
|
+
from typing import Callable
|
|
21
|
+
|
|
22
|
+
from rich.console import Console
|
|
23
|
+
|
|
24
|
+
from openstat.session import Session
|
|
25
|
+
from openstat.logging_config import get_logger
|
|
26
|
+
|
|
27
|
+
log = get_logger("commands")
|
|
28
|
+
|
|
29
|
+
# Type alias for command handlers
|
|
30
|
+
Handler = Callable[[Session, str], str]
|
|
31
|
+
|
|
32
|
+
# Global registry — populated by @command decorator
|
|
33
|
+
_REGISTRY: dict[str, Handler] = {}
|
|
34
|
+
_USAGE: dict[str, str] = {}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class CommandArgs:
|
|
38
|
+
"""Standardized argument parser for commands.
|
|
39
|
+
|
|
40
|
+
Handles: positional args, --flags, key=value options.
|
|
41
|
+
|
|
42
|
+
Usage:
|
|
43
|
+
ca = CommandArgs(args)
|
|
44
|
+
ca.positional # list of positional tokens
|
|
45
|
+
ca.has_flag("--robust") # True/False
|
|
46
|
+
ca.get_option("how", "inner") # key=value with default
|
|
47
|
+
ca.rest_after("on") # everything after keyword "on"
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
def __init__(self, raw: str) -> None:
|
|
51
|
+
self.raw = raw
|
|
52
|
+
self._tokens = raw.split()
|
|
53
|
+
self.flags: set[str] = set()
|
|
54
|
+
self.options: dict[str, str] = {}
|
|
55
|
+
self.positional: list[str] = []
|
|
56
|
+
|
|
57
|
+
for tok in self._tokens:
|
|
58
|
+
if tok.startswith("--"):
|
|
59
|
+
if "=" in tok:
|
|
60
|
+
k, v = tok.split("=", 1)
|
|
61
|
+
self.options[k.lstrip("-")] = v
|
|
62
|
+
else:
|
|
63
|
+
self.flags.add(tok)
|
|
64
|
+
elif "=" in tok and not tok.startswith('"') and not tok.startswith("'"):
|
|
65
|
+
k, v = tok.split("=", 1)
|
|
66
|
+
self.options[k] = v
|
|
67
|
+
else:
|
|
68
|
+
self.positional.append(tok)
|
|
69
|
+
|
|
70
|
+
def has_flag(self, flag: str) -> bool:
|
|
71
|
+
return flag in self.flags
|
|
72
|
+
|
|
73
|
+
def get_option(self, key: str, default: str | None = None) -> str | None:
|
|
74
|
+
return self.options.get(key, default)
|
|
75
|
+
|
|
76
|
+
def get_option_float(self, key: str, default: float) -> float:
|
|
77
|
+
val = self.options.get(key)
|
|
78
|
+
if val is None:
|
|
79
|
+
return default
|
|
80
|
+
try:
|
|
81
|
+
return float(val)
|
|
82
|
+
except ValueError:
|
|
83
|
+
return default
|
|
84
|
+
|
|
85
|
+
def rest_after(self, keyword: str) -> str | None:
|
|
86
|
+
"""Return everything after a keyword (case-insensitive)."""
|
|
87
|
+
parts = re.split(rf"\b{keyword}\b", self.raw, maxsplit=1, flags=re.IGNORECASE)
|
|
88
|
+
if len(parts) < 2:
|
|
89
|
+
return None
|
|
90
|
+
return parts[1].strip()
|
|
91
|
+
|
|
92
|
+
def strip_flags_and_options(self) -> str:
|
|
93
|
+
"""Return raw string with all --flags and key=value removed."""
|
|
94
|
+
result = self.raw
|
|
95
|
+
for flag in self.flags:
|
|
96
|
+
result = result.replace(flag, "")
|
|
97
|
+
for k, v in self.options.items():
|
|
98
|
+
result = result.replace(f"--{k}={v}", "")
|
|
99
|
+
result = result.replace(f"{k}={v}", "")
|
|
100
|
+
return result.strip()
|
|
101
|
+
|
|
102
|
+
def __bool__(self) -> bool:
|
|
103
|
+
return bool(self.raw.strip())
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def command(name: str, *, usage: str = "") -> Callable[[Handler], Handler]:
|
|
107
|
+
"""Decorator to register a command handler.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
name: Command name as typed by the user.
|
|
111
|
+
usage: One-line usage example shown in help.
|
|
112
|
+
"""
|
|
113
|
+
def decorator(fn: Handler) -> Handler:
|
|
114
|
+
if name in _REGISTRY:
|
|
115
|
+
log.warning("Command '%s' re-registered (overriding previous)", name)
|
|
116
|
+
_REGISTRY[name] = fn
|
|
117
|
+
_USAGE[name] = usage or f"{name} ..."
|
|
118
|
+
return fn
|
|
119
|
+
return decorator
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def get_registry() -> dict[str, Handler]:
|
|
123
|
+
"""Return a live read-only view of the command registry.
|
|
124
|
+
|
|
125
|
+
The returned mapping always reflects the current state of the
|
|
126
|
+
registry, so commands registered after import time (e.g. plugins)
|
|
127
|
+
are visible automatically.
|
|
128
|
+
"""
|
|
129
|
+
from types import MappingProxyType
|
|
130
|
+
return MappingProxyType(_REGISTRY) # type: ignore[return-value]
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def get_usage(name: str) -> str:
|
|
134
|
+
return _USAGE.get(name, "")
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def run_command(session, line: str) -> str:
|
|
138
|
+
"""Run a command line string against the session. Used by DSL loops."""
|
|
139
|
+
line = line.strip()
|
|
140
|
+
if not line:
|
|
141
|
+
return ""
|
|
142
|
+
parts = line.split(None, 1)
|
|
143
|
+
cmd_name = parts[0].lower()
|
|
144
|
+
args = parts[1] if len(parts) > 1 else ""
|
|
145
|
+
handler = _REGISTRY.get(cmd_name)
|
|
146
|
+
if handler is None:
|
|
147
|
+
return f"Unknown command: {cmd_name}"
|
|
148
|
+
return handler(session, args) or ""
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def rich_to_str(fn) -> str:
|
|
152
|
+
"""Capture Rich output as plain text (no stdout side-effect)."""
|
|
153
|
+
buf = io.StringIO()
|
|
154
|
+
console = Console(file=buf, width=120, record=True)
|
|
155
|
+
fn(console)
|
|
156
|
+
return console.export_text().rstrip()
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def friendly_error(e: Exception, context: str) -> str:
|
|
160
|
+
"""Convert common Polars/statsmodels errors to user-friendly messages."""
|
|
161
|
+
msg = str(e)
|
|
162
|
+
etype = type(e).__name__
|
|
163
|
+
if "not found" in msg.lower() or "ColumnNotFoundError" in etype:
|
|
164
|
+
return f"[red]Error:[/red] {context}: Column not found. Check column names with 'describe'."
|
|
165
|
+
if "type" in msg.lower() and ("str" in msg.lower() or "string" in msg.lower()):
|
|
166
|
+
return f"[red]Error:[/red] {context}: Type mismatch — cannot use arithmetic on text columns."
|
|
167
|
+
if "singular" in msg.lower() or "linalg" in msg.lower():
|
|
168
|
+
return f"[red]Error:[/red] {context}: Matrix is singular — check for perfect multicollinearity or constant columns."
|
|
169
|
+
log.debug("Unhandled error in %s: %s: %s", context, etype, msg)
|
|
170
|
+
return f"[red]Error:[/red] {context}: {e}"
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Bayesian regression commands."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
from openstat.commands.base import command
|
|
8
|
+
from openstat.session import Session
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _stata_opts(raw: str) -> tuple[list[str], dict[str, str]]:
|
|
12
|
+
opts: dict[str, str] = {}
|
|
13
|
+
for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
|
|
14
|
+
opts[m.group(1).lower()] = m.group(2)
|
|
15
|
+
rest = re.sub(r'\w+\([^)]*\)', '', raw)
|
|
16
|
+
positional = [t.strip(',') for t in rest.split() if t.strip(',')]
|
|
17
|
+
return positional, opts
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@command("bayes", usage="bayes: ols depvar indepvars [, samples(4000) priorscale(10) ci(0.95)]")
|
|
21
|
+
def cmd_bayes(session: Session, args: str) -> str:
|
|
22
|
+
"""Bayesian OLS with conjugate Normal-Inverse-Gamma prior (no MCMC required)."""
|
|
23
|
+
df = session.require_data()
|
|
24
|
+
|
|
25
|
+
# strip "ols" or ": ols" prefix
|
|
26
|
+
clean = re.sub(r'^\s*:?\s*ols\s+', '', args, flags=re.IGNORECASE)
|
|
27
|
+
positional, opts = _stata_opts(clean)
|
|
28
|
+
|
|
29
|
+
dep = positional[0] if positional else ""
|
|
30
|
+
indeps = [c for c in positional[1:] if c in df.columns]
|
|
31
|
+
|
|
32
|
+
if not dep or not indeps:
|
|
33
|
+
return "Usage: bayes: ols depvar indepvar1 indepvar2 ... [, samples(4000)]"
|
|
34
|
+
|
|
35
|
+
n_samples = int(opts.get("samples", 4000))
|
|
36
|
+
prior_scale = float(opts.get("priorscale", 10.0))
|
|
37
|
+
ci = float(opts.get("ci", 0.95))
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
from openstat.stats.bayesian import bayes_ols
|
|
41
|
+
result = bayes_ols(
|
|
42
|
+
df, dep, indeps,
|
|
43
|
+
n_samples=n_samples,
|
|
44
|
+
prior_scale=prior_scale,
|
|
45
|
+
credible_interval=ci,
|
|
46
|
+
)
|
|
47
|
+
except Exception as exc:
|
|
48
|
+
return f"bayes error: {exc}"
|
|
49
|
+
|
|
50
|
+
ci_pct = int(ci * 100)
|
|
51
|
+
lines = [f"\n{result['model']}", "=" * 70]
|
|
52
|
+
lines.append(f" Dependent: {dep} N = {result['n_obs']} "
|
|
53
|
+
f"Draws = {n_samples} R² ≈ {result['r_squared']:.4f}")
|
|
54
|
+
lines.append(f" σ̂ = {result['sigma_mean']:.4f} (±{result['sigma_std']:.4f})")
|
|
55
|
+
lines.append("")
|
|
56
|
+
lines.append(
|
|
57
|
+
f" {'Variable':<20} {'Post. Mean':>12} {'Post. SD':>10} "
|
|
58
|
+
f"{'CI Lo ({ci_pct}%)':>12} {'CI Hi':>12} {'P(β>0)':>8}"
|
|
59
|
+
)
|
|
60
|
+
lines.append(" " + "-" * 66)
|
|
61
|
+
for name, stats in result["coefficients"].items():
|
|
62
|
+
lo_key = f"ci_{ci_pct}_lo"
|
|
63
|
+
hi_key = f"ci_{ci_pct}_hi"
|
|
64
|
+
lines.append(
|
|
65
|
+
f" {name:<20} {stats['mean']:>12.4f} {stats['std']:>10.4f} "
|
|
66
|
+
f" {stats[lo_key]:>12.4f} {stats[hi_key]:>12.4f} {stats['prob_positive']:>8.4f}"
|
|
67
|
+
)
|
|
68
|
+
lines.append("=" * 70)
|
|
69
|
+
lines.append(f" Prior: Normal(0, {prior_scale}²) on coefficients | IG(0.001, 0.001) on σ²")
|
|
70
|
+
session._last_model = result
|
|
71
|
+
return "\n".join(lines)
|