openstat-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. openstat/__init__.py +3 -0
  2. openstat/__main__.py +4 -0
  3. openstat/backends/__init__.py +16 -0
  4. openstat/backends/duckdb_backend.py +70 -0
  5. openstat/backends/polars_backend.py +52 -0
  6. openstat/cli.py +92 -0
  7. openstat/commands/__init__.py +82 -0
  8. openstat/commands/adv_stat_cmds.py +1255 -0
  9. openstat/commands/advanced_ml_cmds.py +576 -0
  10. openstat/commands/advreg_cmds.py +207 -0
  11. openstat/commands/alias_cmds.py +135 -0
  12. openstat/commands/arch_cmds.py +82 -0
  13. openstat/commands/arules_cmds.py +111 -0
  14. openstat/commands/automodel_cmds.py +212 -0
  15. openstat/commands/backend_cmds.py +82 -0
  16. openstat/commands/base.py +170 -0
  17. openstat/commands/bayes_cmds.py +71 -0
  18. openstat/commands/causal_cmds.py +269 -0
  19. openstat/commands/cluster_cmds.py +152 -0
  20. openstat/commands/data_cmds.py +996 -0
  21. openstat/commands/datamanip_cmds.py +672 -0
  22. openstat/commands/dataquality_cmds.py +174 -0
  23. openstat/commands/datetime_cmds.py +176 -0
  24. openstat/commands/dimreduce_cmds.py +184 -0
  25. openstat/commands/discrete_cmds.py +149 -0
  26. openstat/commands/dsl_cmds.py +143 -0
  27. openstat/commands/epi_cmds.py +93 -0
  28. openstat/commands/equiv_tobit_cmds.py +94 -0
  29. openstat/commands/esttab_cmds.py +196 -0
  30. openstat/commands/export_beamer_cmds.py +142 -0
  31. openstat/commands/export_cmds.py +201 -0
  32. openstat/commands/export_extra_cmds.py +240 -0
  33. openstat/commands/factor_cmds.py +180 -0
  34. openstat/commands/groupby_cmds.py +155 -0
  35. openstat/commands/help_cmds.py +237 -0
  36. openstat/commands/i18n_cmds.py +43 -0
  37. openstat/commands/import_extra_cmds.py +561 -0
  38. openstat/commands/influence_cmds.py +134 -0
  39. openstat/commands/iv_cmds.py +106 -0
  40. openstat/commands/manova_cmds.py +105 -0
  41. openstat/commands/mediate_cmds.py +233 -0
  42. openstat/commands/meta_cmds.py +284 -0
  43. openstat/commands/mi_cmds.py +228 -0
  44. openstat/commands/mixed_cmds.py +79 -0
  45. openstat/commands/mixture_changepoint_cmds.py +166 -0
  46. openstat/commands/ml_adv_cmds.py +147 -0
  47. openstat/commands/ml_cmds.py +178 -0
  48. openstat/commands/model_eval_cmds.py +142 -0
  49. openstat/commands/network_cmds.py +288 -0
  50. openstat/commands/nlquery_cmds.py +161 -0
  51. openstat/commands/nonparam_cmds.py +149 -0
  52. openstat/commands/outreg_cmds.py +247 -0
  53. openstat/commands/panel_cmds.py +141 -0
  54. openstat/commands/pdf_cmds.py +226 -0
  55. openstat/commands/pipeline_cmds.py +319 -0
  56. openstat/commands/plot_cmds.py +189 -0
  57. openstat/commands/plugin_cmds.py +79 -0
  58. openstat/commands/posthoc_cmds.py +153 -0
  59. openstat/commands/power_cmds.py +172 -0
  60. openstat/commands/profile_cmds.py +246 -0
  61. openstat/commands/rbridge_cmds.py +81 -0
  62. openstat/commands/regex_cmds.py +104 -0
  63. openstat/commands/report_cmds.py +48 -0
  64. openstat/commands/repro_cmds.py +129 -0
  65. openstat/commands/resampling_cmds.py +109 -0
  66. openstat/commands/reshape_cmds.py +223 -0
  67. openstat/commands/sem_cmds.py +177 -0
  68. openstat/commands/stat_cmds.py +1040 -0
  69. openstat/commands/stata_import_cmds.py +215 -0
  70. openstat/commands/string_cmds.py +124 -0
  71. openstat/commands/surv_cmds.py +145 -0
  72. openstat/commands/survey_cmds.py +153 -0
  73. openstat/commands/textanalysis_cmds.py +192 -0
  74. openstat/commands/ts_adv_cmds.py +136 -0
  75. openstat/commands/ts_cmds.py +195 -0
  76. openstat/commands/tui_cmds.py +111 -0
  77. openstat/commands/ux_cmds.py +191 -0
  78. openstat/commands/validate_cmds.py +270 -0
  79. openstat/commands/viz_adv_cmds.py +312 -0
  80. openstat/commands/viz_extra_cmds.py +251 -0
  81. openstat/commands/watch_cmds.py +69 -0
  82. openstat/config.py +106 -0
  83. openstat/dsl/__init__.py +0 -0
  84. openstat/dsl/parser.py +332 -0
  85. openstat/dsl/tokenizer.py +105 -0
  86. openstat/i18n.py +120 -0
  87. openstat/io/__init__.py +0 -0
  88. openstat/io/loader.py +187 -0
  89. openstat/jupyter/__init__.py +18 -0
  90. openstat/jupyter/display.py +18 -0
  91. openstat/jupyter/magic.py +60 -0
  92. openstat/logging_config.py +59 -0
  93. openstat/plots/__init__.py +0 -0
  94. openstat/plots/plotter.py +437 -0
  95. openstat/plots/surv_plots.py +32 -0
  96. openstat/plots/ts_plots.py +59 -0
  97. openstat/plugins/__init__.py +5 -0
  98. openstat/plugins/manager.py +69 -0
  99. openstat/repl.py +457 -0
  100. openstat/reporting/__init__.py +0 -0
  101. openstat/reporting/eda.py +208 -0
  102. openstat/reporting/report.py +67 -0
  103. openstat/script_runner.py +319 -0
  104. openstat/session.py +133 -0
  105. openstat/stats/__init__.py +0 -0
  106. openstat/stats/advanced_regression.py +269 -0
  107. openstat/stats/arch_garch.py +84 -0
  108. openstat/stats/bayesian.py +103 -0
  109. openstat/stats/causal.py +258 -0
  110. openstat/stats/clustering.py +206 -0
  111. openstat/stats/discrete.py +311 -0
  112. openstat/stats/epidemiology.py +119 -0
  113. openstat/stats/equiv_tobit.py +163 -0
  114. openstat/stats/factor.py +174 -0
  115. openstat/stats/imputation.py +282 -0
  116. openstat/stats/influence.py +78 -0
  117. openstat/stats/iv.py +131 -0
  118. openstat/stats/manova.py +124 -0
  119. openstat/stats/mixed.py +128 -0
  120. openstat/stats/ml.py +275 -0
  121. openstat/stats/ml_advanced.py +117 -0
  122. openstat/stats/model_eval.py +183 -0
  123. openstat/stats/models.py +1342 -0
  124. openstat/stats/nonparametric.py +130 -0
  125. openstat/stats/panel.py +179 -0
  126. openstat/stats/power.py +295 -0
  127. openstat/stats/resampling.py +203 -0
  128. openstat/stats/survey.py +213 -0
  129. openstat/stats/survival.py +196 -0
  130. openstat/stats/timeseries.py +142 -0
  131. openstat/stats/ts_advanced.py +114 -0
  132. openstat/types.py +11 -0
  133. openstat/web/__init__.py +1 -0
  134. openstat/web/app.py +117 -0
  135. openstat/web/session_manager.py +73 -0
  136. openstat/web/static/app.js +117 -0
  137. openstat/web/static/index.html +38 -0
  138. openstat/web/static/style.css +103 -0
  139. openstat_cli-1.0.0.dist-info/METADATA +748 -0
  140. openstat_cli-1.0.0.dist-info/RECORD +143 -0
  141. openstat_cli-1.0.0.dist-info/WHEEL +4 -0
  142. openstat_cli-1.0.0.dist-info/entry_points.txt +2 -0
  143. openstat_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,212 @@
1
+ """Automatic model selection: automodel command."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import itertools
6
+ from typing import NamedTuple
7
+
8
+ import numpy as np
9
+ import polars as pl
10
+
11
+ from openstat.commands.base import command
12
+ from openstat.session import Session
13
+ from openstat.dsl.parser import parse_formula, ParseError
14
+
15
+
16
+ class _Candidate(NamedTuple):
17
+ formula: str
18
+ model_type: str
19
+ aic: float
20
+ bic: float
21
+ r2: float | None
22
+ n: int
23
+ k: int # number of predictors
24
+
25
+
26
+ def _fit_candidate(
27
+ df: pl.DataFrame,
28
+ dep: str,
29
+ indeps: list[str],
30
+ model_type: str,
31
+ ) -> _Candidate | None:
32
+ """Fit a single candidate model, return metrics or None on failure."""
33
+ try:
34
+ if model_type == "ols":
35
+ from openstat.stats.models import fit_ols
36
+ result, _ = fit_ols(df, dep, indeps)
37
+ formula = f"{dep} ~ {' + '.join(indeps)}"
38
+ return _Candidate(
39
+ formula=formula, model_type="OLS",
40
+ aic=result.aic or float("inf"), bic=result.bic or float("inf"),
41
+ r2=result.r_squared, n=result.n_obs, k=len(indeps),
42
+ )
43
+ elif model_type == "logit":
44
+ from openstat.stats.models import fit_logit
45
+ result, _ = fit_logit(df, dep, indeps)
46
+ formula = f"{dep} ~ {' + '.join(indeps)}"
47
+ return _Candidate(
48
+ formula=formula, model_type="Logit",
49
+ aic=result.aic or float("inf"), bic=result.bic or float("inf"),
50
+ r2=result.pseudo_r2, n=result.n_obs, k=len(indeps),
51
+ )
52
+ elif model_type == "poisson":
53
+ from openstat.stats.models import fit_poisson
54
+ result, _ = fit_poisson(df, dep, indeps)
55
+ formula = f"{dep} ~ {' + '.join(indeps)}"
56
+ return _Candidate(
57
+ formula=formula, model_type="Poisson",
58
+ aic=result.aic or float("inf"), bic=result.bic or float("inf"),
59
+ r2=result.pseudo_r2, n=result.n_obs, k=len(indeps),
60
+ )
61
+ except Exception:
62
+ return None
63
+ return None
64
+
65
+
66
+ @command("automodel", usage="automodel <depvar> ~ <x1> <x2> ... [--ols|--logit|--poisson] [--criterion=aic|bic] [--maxvars=N]")
67
+ def cmd_automodel(session: Session, args: str) -> str:
68
+ """Automatic model selection: fits all variable subsets and ranks by AIC/BIC.
69
+
70
+ Uses exhaustive search for ≤ 8 predictors, forward stepwise for more.
71
+
72
+ Examples:
73
+ automodel score ~ age income education
74
+ automodel employed ~ age income score region --logit --criterion=bic
75
+ automodel score ~ age income education region --criterion=aic --maxvars=3
76
+ """
77
+ import re
78
+ df = session.require_data()
79
+
80
+ # Parse flags
81
+ use_logit = "--logit" in args
82
+ use_poisson = "--poisson" in args
83
+ model_type = "logit" if use_logit else "poisson" if use_poisson else "ols"
84
+
85
+ m_crit = re.search(r"--criterion[= ](\w+)", args)
86
+ criterion = m_crit.group(1).lower() if m_crit else "aic"
87
+ if criterion not in ("aic", "bic"):
88
+ criterion = "aic"
89
+
90
+ m_max = re.search(r"--maxvars[= ](\d+)", args)
91
+ max_vars = int(m_max.group(1)) if m_max else None
92
+
93
+ # Clean flags from formula
94
+ formula_str = re.sub(r"--\w+(?:[= ]\w+)?", "", args).strip()
95
+ if "~" not in formula_str:
96
+ return (
97
+ "Usage: automodel <depvar> ~ <x1> <x2> ... [--ols|--logit|--poisson]\n"
98
+ "Example: automodel score ~ age income education"
99
+ )
100
+
101
+ # Normalize: allow space-separated predictors (convert to + separated)
102
+ if "~" in formula_str:
103
+ lhs, rhs = formula_str.split("~", 1)
104
+ # If no + in rhs, convert spaces to +
105
+ if "+" not in rhs:
106
+ rhs = " + ".join(rhs.split())
107
+ formula_str = f"{lhs.strip()} ~ {rhs.strip()}"
108
+
109
+ try:
110
+ dep, indeps = parse_formula(formula_str)
111
+ except ParseError as e:
112
+ return f"Formula error: {e}"
113
+
114
+ if dep not in df.columns:
115
+ return f"Dependent variable not found: {dep}"
116
+ missing = [x for x in indeps if x not in df.columns]
117
+ if missing:
118
+ return f"Predictors not found: {', '.join(missing)}"
119
+
120
+ if max_vars:
121
+ indeps = indeps[:max_vars + 10] # allow some buffer
122
+
123
+ k = len(indeps)
124
+ strategy = "exhaustive" if k <= 8 else "forward stepwise"
125
+
126
+ # Build candidates
127
+ candidates: list[_Candidate] = []
128
+
129
+ if strategy == "exhaustive":
130
+ total = 2 ** k - 1 # exclude empty model
131
+ for r in range(1, k + 1):
132
+ if max_vars and r > max_vars:
133
+ break
134
+ for subset in itertools.combinations(indeps, r):
135
+ c = _fit_candidate(df, dep, list(subset), model_type)
136
+ if c:
137
+ candidates.append(c)
138
+ else:
139
+ # Forward stepwise
140
+ current = []
141
+ remaining = list(indeps)
142
+ while remaining and (max_vars is None or len(current) < max_vars):
143
+ best: _Candidate | None = None
144
+ for var in remaining:
145
+ trial = current + [var]
146
+ c = _fit_candidate(df, dep, trial, model_type)
147
+ if c:
148
+ if best is None or getattr(c, criterion) < getattr(best, criterion):
149
+ best = c
150
+ if best is None:
151
+ break
152
+ # Find which var was added
153
+ best_vars = best.formula.split("~")[1].strip().split(" + ")
154
+ added = [v for v in best_vars if v not in current]
155
+ current.extend(added)
156
+ remaining = [v for v in remaining if v not in current]
157
+ candidates.append(best)
158
+
159
+ if not candidates:
160
+ return "No valid models found. Check your data and variable names."
161
+
162
+ # Sort by criterion
163
+ candidates.sort(key=lambda c: getattr(c, criterion))
164
+ top_n = min(10, len(candidates))
165
+ top = candidates[:top_n]
166
+
167
+ # Store best model result in session
168
+ best = candidates[0]
169
+ try:
170
+ dep2, indeps2 = parse_formula(best.formula)
171
+ if model_type == "ols":
172
+ from openstat.stats.models import fit_ols
173
+ result, raw = fit_ols(df, dep2, indeps2)
174
+ elif model_type == "logit":
175
+ from openstat.stats.models import fit_logit
176
+ result, raw = fit_logit(df, dep2, indeps2)
177
+ else:
178
+ from openstat.stats.models import fit_poisson
179
+ result, raw = fit_poisson(df, dep2, indeps2)
180
+ session._last_model = raw
181
+ session._last_model_vars = (dep2, indeps2)
182
+ session._last_fit_result = result
183
+ session._last_fit_kwargs = {}
184
+ except Exception:
185
+ pass
186
+
187
+ crit_label = criterion.upper()
188
+ lines = [
189
+ f"Dependent: {dep} Candidates: {len(candidates)} Strategy: {strategy}",
190
+ f"Model type: {model_type.upper()} Selection criterion: {crit_label}",
191
+ "",
192
+ f"Top {top_n} models by {crit_label}:",
193
+ f" {'#':<3} {'AIC':>9} {'BIC':>9} {'R²/PseudoR²':>11} k Formula",
194
+ " " + "-" * 76,
195
+ ]
196
+ for i, c in enumerate(top, 1):
197
+ r2_str = f"{c.r2:.4f}" if c.r2 is not None else " —"
198
+ marker = " ← best" if i == 1 else ""
199
+ lines.append(
200
+ f" {i:<3} {c.aic:>9.2f} {c.bic:>9.2f} "
201
+ f"{r2_str:>11} {c.k} {c.formula}{marker}"
202
+ )
203
+
204
+ lines += [
205
+ "",
206
+ f"Best model: {best.formula}",
207
+ f" AIC = {best.aic:.2f} BIC = {best.bic:.2f}",
208
+ "",
209
+ "Best model loaded. Use 'estimates', 'vif', 'residuals', 'plot coef' for diagnostics.",
210
+ ]
211
+
212
+ return "\n" + "=" * 60 + "\nAutomatic Model Selection\n" + "=" * 60 + "\n" + "\n".join(lines) + "\n" + "=" * 60
@@ -0,0 +1,82 @@
1
+ """Backend management commands: set backend, sql."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from rich.console import Console
6
+ from rich.table import Table
7
+
8
+ from openstat.session import Session
9
+ from openstat.commands.base import command, CommandArgs, rich_to_str, friendly_error
10
+
11
+
12
+ @command("set", usage="set seed <N> | set backend polars|duckdb")
13
+ def cmd_set(session: Session, args: str) -> str:
14
+ """Change settings: random seed, backend."""
15
+ ca = CommandArgs(args)
16
+ if not ca.positional:
17
+ return "Usage: set seed <N> | set backend polars|duckdb"
18
+
19
+ subcmd = ca.positional[0].lower()
20
+
21
+ if subcmd == "seed":
22
+ if len(ca.positional) < 2:
23
+ seed = getattr(session, "_repro_seed", None)
24
+ return f"Current seed: {seed}" if seed is not None else "No seed set."
25
+ try:
26
+ seed = int(ca.positional[1])
27
+ except ValueError:
28
+ return f"Invalid seed: {ca.positional[1]}. Must be an integer."
29
+ import numpy as np
30
+ import random as _random
31
+ np.random.seed(seed)
32
+ _random.seed(seed)
33
+ session._repro_seed = seed # type: ignore[attr-defined]
34
+ return f"Seed set to {seed}. Reproducible random operations enabled."
35
+
36
+ elif subcmd == "backend":
37
+ backend_name = ca.positional[1].lower() if len(ca.positional) > 1 else ""
38
+ if backend_name == "polars":
39
+ session._backend = "polars"
40
+ session._backend_obj = None
41
+ return "Backend set to: polars"
42
+ elif backend_name == "duckdb":
43
+ try:
44
+ from openstat.backends.duckdb_backend import DuckDBBackend
45
+ session._backend_obj = DuckDBBackend()
46
+ session._backend = "duckdb"
47
+ # If data already loaded, register it
48
+ if session.df is not None:
49
+ session._backend_obj._conn.register("data", session.df.to_pandas())
50
+ session._backend_obj._table_loaded = True
51
+ return "Backend set to: duckdb"
52
+ except ImportError as e:
53
+ return str(e)
54
+ else:
55
+ return f"Unknown backend: {backend_name}. Use 'polars' or 'duckdb'."
56
+ else:
57
+ return f"Unknown setting: {subcmd}. Available: seed, backend"
58
+
59
+
60
+ @command("sql", usage='sql "SELECT * FROM data WHERE ..."')
61
+ def cmd_sql(session: Session, args: str) -> str:
62
+ """Execute SQL query on the loaded dataset (DuckDB backend recommended)."""
63
+ query = args.strip().strip('"\'')
64
+ if not query:
65
+ return 'Usage: sql "SELECT * FROM data WHERE ..."'
66
+
67
+ try:
68
+ if session._backend == "duckdb" and session._backend_obj is not None:
69
+ result_df = session._backend_obj.sql(query)
70
+ elif session.df is not None:
71
+ # Use Polars SQL context as fallback
72
+ import polars as pl
73
+ ctx = pl.SQLContext({"data": session.df})
74
+ result_df = ctx.execute(query).collect()
75
+ else:
76
+ return "No data loaded."
77
+
78
+ session.snapshot()
79
+ session.df = result_df
80
+ return f"Query returned {session.shape_str}"
81
+ except Exception as e:
82
+ return friendly_error(e, "sql")
@@ -0,0 +1,170 @@
1
+ """Command registration infrastructure.
2
+
3
+ Provides a @command decorator that auto-registers handler functions,
4
+ and a CommandArgs helper for standardized argument parsing.
5
+
6
+ Usage:
7
+ from openstat.commands.base import command
8
+
9
+ @command("mycommand", usage="mycommand <arg>")
10
+ def cmd_mycommand(session, args):
11
+ '''One-line description shown in help.'''
12
+ ...
13
+ return "result text"
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import io
19
+ import re
20
+ from typing import Callable
21
+
22
+ from rich.console import Console
23
+
24
+ from openstat.session import Session
25
+ from openstat.logging_config import get_logger
26
+
27
+ log = get_logger("commands")
28
+
29
+ # Type alias for command handlers
30
+ Handler = Callable[[Session, str], str]
31
+
32
+ # Global registry — populated by @command decorator
33
+ _REGISTRY: dict[str, Handler] = {}
34
+ _USAGE: dict[str, str] = {}
35
+
36
+
37
+ class CommandArgs:
38
+ """Standardized argument parser for commands.
39
+
40
+ Handles: positional args, --flags, key=value options.
41
+
42
+ Usage:
43
+ ca = CommandArgs(args)
44
+ ca.positional # list of positional tokens
45
+ ca.has_flag("--robust") # True/False
46
+ ca.get_option("how", "inner") # key=value with default
47
+ ca.rest_after("on") # everything after keyword "on"
48
+ """
49
+
50
+ def __init__(self, raw: str) -> None:
51
+ self.raw = raw
52
+ self._tokens = raw.split()
53
+ self.flags: set[str] = set()
54
+ self.options: dict[str, str] = {}
55
+ self.positional: list[str] = []
56
+
57
+ for tok in self._tokens:
58
+ if tok.startswith("--"):
59
+ if "=" in tok:
60
+ k, v = tok.split("=", 1)
61
+ self.options[k.lstrip("-")] = v
62
+ else:
63
+ self.flags.add(tok)
64
+ elif "=" in tok and not tok.startswith('"') and not tok.startswith("'"):
65
+ k, v = tok.split("=", 1)
66
+ self.options[k] = v
67
+ else:
68
+ self.positional.append(tok)
69
+
70
+ def has_flag(self, flag: str) -> bool:
71
+ return flag in self.flags
72
+
73
+ def get_option(self, key: str, default: str | None = None) -> str | None:
74
+ return self.options.get(key, default)
75
+
76
+ def get_option_float(self, key: str, default: float) -> float:
77
+ val = self.options.get(key)
78
+ if val is None:
79
+ return default
80
+ try:
81
+ return float(val)
82
+ except ValueError:
83
+ return default
84
+
85
+ def rest_after(self, keyword: str) -> str | None:
86
+ """Return everything after a keyword (case-insensitive)."""
87
+ parts = re.split(rf"\b{keyword}\b", self.raw, maxsplit=1, flags=re.IGNORECASE)
88
+ if len(parts) < 2:
89
+ return None
90
+ return parts[1].strip()
91
+
92
+ def strip_flags_and_options(self) -> str:
93
+ """Return raw string with all --flags and key=value removed."""
94
+ result = self.raw
95
+ for flag in self.flags:
96
+ result = result.replace(flag, "")
97
+ for k, v in self.options.items():
98
+ result = result.replace(f"--{k}={v}", "")
99
+ result = result.replace(f"{k}={v}", "")
100
+ return result.strip()
101
+
102
+ def __bool__(self) -> bool:
103
+ return bool(self.raw.strip())
104
+
105
+
106
+ def command(name: str, *, usage: str = "") -> Callable[[Handler], Handler]:
107
+ """Decorator to register a command handler.
108
+
109
+ Args:
110
+ name: Command name as typed by the user.
111
+ usage: One-line usage example shown in help.
112
+ """
113
+ def decorator(fn: Handler) -> Handler:
114
+ if name in _REGISTRY:
115
+ log.warning("Command '%s' re-registered (overriding previous)", name)
116
+ _REGISTRY[name] = fn
117
+ _USAGE[name] = usage or f"{name} ..."
118
+ return fn
119
+ return decorator
120
+
121
+
122
+ def get_registry() -> dict[str, Handler]:
123
+ """Return a live read-only view of the command registry.
124
+
125
+ The returned mapping always reflects the current state of the
126
+ registry, so commands registered after import time (e.g. plugins)
127
+ are visible automatically.
128
+ """
129
+ from types import MappingProxyType
130
+ return MappingProxyType(_REGISTRY) # type: ignore[return-value]
131
+
132
+
133
+ def get_usage(name: str) -> str:
134
+ return _USAGE.get(name, "")
135
+
136
+
137
+ def run_command(session, line: str) -> str:
138
+ """Run a command line string against the session. Used by DSL loops."""
139
+ line = line.strip()
140
+ if not line:
141
+ return ""
142
+ parts = line.split(None, 1)
143
+ cmd_name = parts[0].lower()
144
+ args = parts[1] if len(parts) > 1 else ""
145
+ handler = _REGISTRY.get(cmd_name)
146
+ if handler is None:
147
+ return f"Unknown command: {cmd_name}"
148
+ return handler(session, args) or ""
149
+
150
+
151
+ def rich_to_str(fn) -> str:
152
+ """Capture Rich output as plain text (no stdout side-effect)."""
153
+ buf = io.StringIO()
154
+ console = Console(file=buf, width=120, record=True)
155
+ fn(console)
156
+ return console.export_text().rstrip()
157
+
158
+
159
+ def friendly_error(e: Exception, context: str) -> str:
160
+ """Convert common Polars/statsmodels errors to user-friendly messages."""
161
+ msg = str(e)
162
+ etype = type(e).__name__
163
+ if "not found" in msg.lower() or "ColumnNotFoundError" in etype:
164
+ return f"[red]Error:[/red] {context}: Column not found. Check column names with 'describe'."
165
+ if "type" in msg.lower() and ("str" in msg.lower() or "string" in msg.lower()):
166
+ return f"[red]Error:[/red] {context}: Type mismatch — cannot use arithmetic on text columns."
167
+ if "singular" in msg.lower() or "linalg" in msg.lower():
168
+ return f"[red]Error:[/red] {context}: Matrix is singular — check for perfect multicollinearity or constant columns."
169
+ log.debug("Unhandled error in %s: %s: %s", context, etype, msg)
170
+ return f"[red]Error:[/red] {context}: {e}"
@@ -0,0 +1,71 @@
1
+ """Bayesian regression commands."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ from openstat.commands.base import command
8
+ from openstat.session import Session
9
+
10
+
11
+ def _stata_opts(raw: str) -> tuple[list[str], dict[str, str]]:
12
+ opts: dict[str, str] = {}
13
+ for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
14
+ opts[m.group(1).lower()] = m.group(2)
15
+ rest = re.sub(r'\w+\([^)]*\)', '', raw)
16
+ positional = [t.strip(',') for t in rest.split() if t.strip(',')]
17
+ return positional, opts
18
+
19
+
20
+ @command("bayes", usage="bayes: ols depvar indepvars [, samples(4000) priorscale(10) ci(0.95)]")
21
+ def cmd_bayes(session: Session, args: str) -> str:
22
+ """Bayesian OLS with conjugate Normal-Inverse-Gamma prior (no MCMC required)."""
23
+ df = session.require_data()
24
+
25
+ # strip "ols" or ": ols" prefix
26
+ clean = re.sub(r'^\s*:?\s*ols\s+', '', args, flags=re.IGNORECASE)
27
+ positional, opts = _stata_opts(clean)
28
+
29
+ dep = positional[0] if positional else ""
30
+ indeps = [c for c in positional[1:] if c in df.columns]
31
+
32
+ if not dep or not indeps:
33
+ return "Usage: bayes: ols depvar indepvar1 indepvar2 ... [, samples(4000)]"
34
+
35
+ n_samples = int(opts.get("samples", 4000))
36
+ prior_scale = float(opts.get("priorscale", 10.0))
37
+ ci = float(opts.get("ci", 0.95))
38
+
39
+ try:
40
+ from openstat.stats.bayesian import bayes_ols
41
+ result = bayes_ols(
42
+ df, dep, indeps,
43
+ n_samples=n_samples,
44
+ prior_scale=prior_scale,
45
+ credible_interval=ci,
46
+ )
47
+ except Exception as exc:
48
+ return f"bayes error: {exc}"
49
+
50
+ ci_pct = int(ci * 100)
51
+ lines = [f"\n{result['model']}", "=" * 70]
52
+ lines.append(f" Dependent: {dep} N = {result['n_obs']} "
53
+ f"Draws = {n_samples} R² ≈ {result['r_squared']:.4f}")
54
+ lines.append(f" σ̂ = {result['sigma_mean']:.4f} (±{result['sigma_std']:.4f})")
55
+ lines.append("")
56
+ lines.append(
57
+ f" {'Variable':<20} {'Post. Mean':>12} {'Post. SD':>10} "
58
+ f"{'CI Lo ({ci_pct}%)':>12} {'CI Hi':>12} {'P(β>0)':>8}"
59
+ )
60
+ lines.append(" " + "-" * 66)
61
+ for name, stats in result["coefficients"].items():
62
+ lo_key = f"ci_{ci_pct}_lo"
63
+ hi_key = f"ci_{ci_pct}_hi"
64
+ lines.append(
65
+ f" {name:<20} {stats['mean']:>12.4f} {stats['std']:>10.4f} "
66
+ f" {stats[lo_key]:>12.4f} {stats[hi_key]:>12.4f} {stats['prob_positive']:>8.4f}"
67
+ )
68
+ lines.append("=" * 70)
69
+ lines.append(f" Prior: Normal(0, {prior_scale}²) on coefficients | IG(0.001, 0.001) on σ²")
70
+ session._last_model = result
71
+ return "\n".join(lines)