openstat-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstat/__init__.py +3 -0
- openstat/__main__.py +4 -0
- openstat/backends/__init__.py +16 -0
- openstat/backends/duckdb_backend.py +70 -0
- openstat/backends/polars_backend.py +52 -0
- openstat/cli.py +92 -0
- openstat/commands/__init__.py +82 -0
- openstat/commands/adv_stat_cmds.py +1255 -0
- openstat/commands/advanced_ml_cmds.py +576 -0
- openstat/commands/advreg_cmds.py +207 -0
- openstat/commands/alias_cmds.py +135 -0
- openstat/commands/arch_cmds.py +82 -0
- openstat/commands/arules_cmds.py +111 -0
- openstat/commands/automodel_cmds.py +212 -0
- openstat/commands/backend_cmds.py +82 -0
- openstat/commands/base.py +170 -0
- openstat/commands/bayes_cmds.py +71 -0
- openstat/commands/causal_cmds.py +269 -0
- openstat/commands/cluster_cmds.py +152 -0
- openstat/commands/data_cmds.py +996 -0
- openstat/commands/datamanip_cmds.py +672 -0
- openstat/commands/dataquality_cmds.py +174 -0
- openstat/commands/datetime_cmds.py +176 -0
- openstat/commands/dimreduce_cmds.py +184 -0
- openstat/commands/discrete_cmds.py +149 -0
- openstat/commands/dsl_cmds.py +143 -0
- openstat/commands/epi_cmds.py +93 -0
- openstat/commands/equiv_tobit_cmds.py +94 -0
- openstat/commands/esttab_cmds.py +196 -0
- openstat/commands/export_beamer_cmds.py +142 -0
- openstat/commands/export_cmds.py +201 -0
- openstat/commands/export_extra_cmds.py +240 -0
- openstat/commands/factor_cmds.py +180 -0
- openstat/commands/groupby_cmds.py +155 -0
- openstat/commands/help_cmds.py +237 -0
- openstat/commands/i18n_cmds.py +43 -0
- openstat/commands/import_extra_cmds.py +561 -0
- openstat/commands/influence_cmds.py +134 -0
- openstat/commands/iv_cmds.py +106 -0
- openstat/commands/manova_cmds.py +105 -0
- openstat/commands/mediate_cmds.py +233 -0
- openstat/commands/meta_cmds.py +284 -0
- openstat/commands/mi_cmds.py +228 -0
- openstat/commands/mixed_cmds.py +79 -0
- openstat/commands/mixture_changepoint_cmds.py +166 -0
- openstat/commands/ml_adv_cmds.py +147 -0
- openstat/commands/ml_cmds.py +178 -0
- openstat/commands/model_eval_cmds.py +142 -0
- openstat/commands/network_cmds.py +288 -0
- openstat/commands/nlquery_cmds.py +161 -0
- openstat/commands/nonparam_cmds.py +149 -0
- openstat/commands/outreg_cmds.py +247 -0
- openstat/commands/panel_cmds.py +141 -0
- openstat/commands/pdf_cmds.py +226 -0
- openstat/commands/pipeline_cmds.py +319 -0
- openstat/commands/plot_cmds.py +189 -0
- openstat/commands/plugin_cmds.py +79 -0
- openstat/commands/posthoc_cmds.py +153 -0
- openstat/commands/power_cmds.py +172 -0
- openstat/commands/profile_cmds.py +246 -0
- openstat/commands/rbridge_cmds.py +81 -0
- openstat/commands/regex_cmds.py +104 -0
- openstat/commands/report_cmds.py +48 -0
- openstat/commands/repro_cmds.py +129 -0
- openstat/commands/resampling_cmds.py +109 -0
- openstat/commands/reshape_cmds.py +223 -0
- openstat/commands/sem_cmds.py +177 -0
- openstat/commands/stat_cmds.py +1040 -0
- openstat/commands/stata_import_cmds.py +215 -0
- openstat/commands/string_cmds.py +124 -0
- openstat/commands/surv_cmds.py +145 -0
- openstat/commands/survey_cmds.py +153 -0
- openstat/commands/textanalysis_cmds.py +192 -0
- openstat/commands/ts_adv_cmds.py +136 -0
- openstat/commands/ts_cmds.py +195 -0
- openstat/commands/tui_cmds.py +111 -0
- openstat/commands/ux_cmds.py +191 -0
- openstat/commands/validate_cmds.py +270 -0
- openstat/commands/viz_adv_cmds.py +312 -0
- openstat/commands/viz_extra_cmds.py +251 -0
- openstat/commands/watch_cmds.py +69 -0
- openstat/config.py +106 -0
- openstat/dsl/__init__.py +0 -0
- openstat/dsl/parser.py +332 -0
- openstat/dsl/tokenizer.py +105 -0
- openstat/i18n.py +120 -0
- openstat/io/__init__.py +0 -0
- openstat/io/loader.py +187 -0
- openstat/jupyter/__init__.py +18 -0
- openstat/jupyter/display.py +18 -0
- openstat/jupyter/magic.py +60 -0
- openstat/logging_config.py +59 -0
- openstat/plots/__init__.py +0 -0
- openstat/plots/plotter.py +437 -0
- openstat/plots/surv_plots.py +32 -0
- openstat/plots/ts_plots.py +59 -0
- openstat/plugins/__init__.py +5 -0
- openstat/plugins/manager.py +69 -0
- openstat/repl.py +457 -0
- openstat/reporting/__init__.py +0 -0
- openstat/reporting/eda.py +208 -0
- openstat/reporting/report.py +67 -0
- openstat/script_runner.py +319 -0
- openstat/session.py +133 -0
- openstat/stats/__init__.py +0 -0
- openstat/stats/advanced_regression.py +269 -0
- openstat/stats/arch_garch.py +84 -0
- openstat/stats/bayesian.py +103 -0
- openstat/stats/causal.py +258 -0
- openstat/stats/clustering.py +206 -0
- openstat/stats/discrete.py +311 -0
- openstat/stats/epidemiology.py +119 -0
- openstat/stats/equiv_tobit.py +163 -0
- openstat/stats/factor.py +174 -0
- openstat/stats/imputation.py +282 -0
- openstat/stats/influence.py +78 -0
- openstat/stats/iv.py +131 -0
- openstat/stats/manova.py +124 -0
- openstat/stats/mixed.py +128 -0
- openstat/stats/ml.py +275 -0
- openstat/stats/ml_advanced.py +117 -0
- openstat/stats/model_eval.py +183 -0
- openstat/stats/models.py +1342 -0
- openstat/stats/nonparametric.py +130 -0
- openstat/stats/panel.py +179 -0
- openstat/stats/power.py +295 -0
- openstat/stats/resampling.py +203 -0
- openstat/stats/survey.py +213 -0
- openstat/stats/survival.py +196 -0
- openstat/stats/timeseries.py +142 -0
- openstat/stats/ts_advanced.py +114 -0
- openstat/types.py +11 -0
- openstat/web/__init__.py +1 -0
- openstat/web/app.py +117 -0
- openstat/web/session_manager.py +73 -0
- openstat/web/static/app.js +117 -0
- openstat/web/static/index.html +38 -0
- openstat/web/static/style.css +103 -0
- openstat_cli-1.0.0.dist-info/METADATA +748 -0
- openstat_cli-1.0.0.dist-info/RECORD +143 -0
- openstat_cli-1.0.0.dist-info/WHEEL +4 -0
- openstat_cli-1.0.0.dist-info/entry_points.txt +2 -0
- openstat_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""Advanced regression commands: nls, betareg, zip, zinb, hurdle, sureg."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
import re
|
|
5
|
+
from openstat.commands.base import command
|
|
6
|
+
from openstat.session import Session
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _stata_opts(raw: str) -> tuple[list[str], dict[str, str]]:
|
|
10
|
+
opts: dict[str, str] = {}
|
|
11
|
+
for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
|
|
12
|
+
opts[m.group(1).lower()] = m.group(2)
|
|
13
|
+
rest = re.sub(r'\w+\([^)]*\)', '', raw)
|
|
14
|
+
positional = [t.strip(',') for t in rest.split() if t.strip(',')]
|
|
15
|
+
return positional, opts
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _parse_eq(args: str, df_cols: list[str]) -> tuple[str, list[str], dict]:
|
|
19
|
+
positional, opts = _stata_opts(args)
|
|
20
|
+
dep = positional[0] if positional else ""
|
|
21
|
+
indeps = [c for c in positional[1:] if c in df_cols]
|
|
22
|
+
return dep, indeps, opts
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _coef_table(params: dict, se: dict = None, pvals: dict = None) -> str:
|
|
26
|
+
lines = [f" {'Variable':<22} {'Coef.':>10}"]
|
|
27
|
+
if se:
|
|
28
|
+
lines[0] += f" {'Std.Err.':>10}"
|
|
29
|
+
if pvals:
|
|
30
|
+
lines[0] += f" {'p-value':>8}"
|
|
31
|
+
lines.append(" " + "-" * 55)
|
|
32
|
+
for k, v in params.items():
|
|
33
|
+
row = f" {k:<22} {v:>10.4f}"
|
|
34
|
+
if se and k in se:
|
|
35
|
+
row += f" {se[k]:>10.4f}"
|
|
36
|
+
if pvals and k in pvals:
|
|
37
|
+
row += f" {pvals[k]:>8.4f}"
|
|
38
|
+
lines.append(row)
|
|
39
|
+
return "\n".join(lines)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@command("nls", usage="nls depvar indepvars [, p0(a,b,c) fn(power|exp|log)]")
|
|
43
|
+
def cmd_nls(session: Session, args: str) -> str:
|
|
44
|
+
"""Nonlinear least squares with built-in functional forms."""
|
|
45
|
+
df = session.require_data()
|
|
46
|
+
dep, indeps, opts = _parse_eq(args, df.columns)
|
|
47
|
+
if not dep or not indeps:
|
|
48
|
+
return "Usage: nls depvar indepvar1 ... [, fn(power) p0(1,1)]"
|
|
49
|
+
|
|
50
|
+
fn_name = opts.get("fn", "power")
|
|
51
|
+
p0_raw = opts.get("p0", "1,1")
|
|
52
|
+
try:
|
|
53
|
+
p0 = [float(x) for x in p0_raw.split(",")]
|
|
54
|
+
except ValueError:
|
|
55
|
+
p0 = [1.0, 1.0]
|
|
56
|
+
|
|
57
|
+
built_in = {
|
|
58
|
+
"power": lambda X, a, b: a * X[:, 0] ** b,
|
|
59
|
+
"exp": lambda X, a, b: a * np.exp(b * X[:, 0]),
|
|
60
|
+
"log": lambda X, a, b: a + b * np.log(np.maximum(X[:, 0], 1e-15)),
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
import numpy as np
|
|
64
|
+
fn = built_in.get(fn_name)
|
|
65
|
+
if fn is None:
|
|
66
|
+
return f"Unknown function '{fn_name}'. Use: power, exp, log"
|
|
67
|
+
|
|
68
|
+
# Adjust p0 to match function arity
|
|
69
|
+
import inspect
|
|
70
|
+
n_params = len(inspect.signature(fn).parameters) - 1
|
|
71
|
+
p0 = (p0 + [1.0] * n_params)[:n_params]
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
from openstat.stats.advanced_regression import fit_nls
|
|
75
|
+
result = fit_nls(df, dep, indeps, fn, p0)
|
|
76
|
+
session._last_model = result
|
|
77
|
+
lines = [f"\nNLS ({fn_name}): {dep}", "=" * 50]
|
|
78
|
+
lines.append(f" {'N obs':<22} {result['n_obs']:>10}")
|
|
79
|
+
lines.append(f" {'R²':<22} {result['r_squared']:>10.4f}")
|
|
80
|
+
lines.append(f" {'Converged':<22} {result['converged']!s:>10}")
|
|
81
|
+
lines.append("\nParameters:")
|
|
82
|
+
lines.append(_coef_table(result["params"], result["std_errors"]))
|
|
83
|
+
lines.append("=" * 50)
|
|
84
|
+
return "\n".join(lines)
|
|
85
|
+
except Exception as exc:
|
|
86
|
+
return f"nls error: {exc}"
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@command("betareg", usage="betareg depvar indepvars")
|
|
90
|
+
def cmd_betareg(session: Session, args: str) -> str:
|
|
91
|
+
"""Beta regression for (0,1) bounded outcomes."""
|
|
92
|
+
df = session.require_data()
|
|
93
|
+
dep, indeps, opts = _parse_eq(args, df.columns)
|
|
94
|
+
if not dep or not indeps:
|
|
95
|
+
return "Usage: betareg depvar indepvar1 ..."
|
|
96
|
+
try:
|
|
97
|
+
from openstat.stats.advanced_regression import fit_betareg
|
|
98
|
+
result = fit_betareg(df, dep, indeps)
|
|
99
|
+
session._last_model = result
|
|
100
|
+
lines = [f"\n{result['method']}: {dep}", "=" * 55]
|
|
101
|
+
lines.append(f" N = {result['n_obs']} AIC = {result['aic']:.4f} Pseudo-R² = {result['pseudo_r2']:.4f}")
|
|
102
|
+
lines.append("\nCoefficients:")
|
|
103
|
+
lines.append(_coef_table(result["params"], result["std_errors"], result["p_values"]))
|
|
104
|
+
lines.append("=" * 55)
|
|
105
|
+
return "\n".join(lines)
|
|
106
|
+
except Exception as exc:
|
|
107
|
+
return f"betareg error: {exc}"
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@command("zip", usage="zip depvar indepvars")
|
|
111
|
+
def cmd_zip(session: Session, args: str) -> str:
|
|
112
|
+
"""Zero-Inflated Poisson regression."""
|
|
113
|
+
df = session.require_data()
|
|
114
|
+
dep, indeps, opts = _parse_eq(args, df.columns)
|
|
115
|
+
if not dep or not indeps:
|
|
116
|
+
return "Usage: zip depvar indepvar1 ..."
|
|
117
|
+
try:
|
|
118
|
+
from openstat.stats.advanced_regression import fit_zip
|
|
119
|
+
result = fit_zip(df, dep, indeps)
|
|
120
|
+
session._last_model = result
|
|
121
|
+
lines = [f"\n{result['method']}: {dep}", "=" * 55]
|
|
122
|
+
lines.append(f" N = {result['n_obs']} AIC = {result['aic']:.4f} LL = {result['log_likelihood']:.4f}")
|
|
123
|
+
lines.append("\nCoefficients:")
|
|
124
|
+
lines.append(_coef_table(result["params"], result["std_errors"], result["p_values"]))
|
|
125
|
+
lines.append("=" * 55)
|
|
126
|
+
return "\n".join(lines)
|
|
127
|
+
except Exception as exc:
|
|
128
|
+
return f"zip error: {exc}"
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
@command("zinb", usage="zinb depvar indepvars")
|
|
132
|
+
def cmd_zinb(session: Session, args: str) -> str:
|
|
133
|
+
"""Zero-Inflated Negative Binomial regression."""
|
|
134
|
+
df = session.require_data()
|
|
135
|
+
dep, indeps, opts = _parse_eq(args, df.columns)
|
|
136
|
+
if not dep or not indeps:
|
|
137
|
+
return "Usage: zinb depvar indepvar1 ..."
|
|
138
|
+
try:
|
|
139
|
+
from openstat.stats.advanced_regression import fit_zinb
|
|
140
|
+
result = fit_zinb(df, dep, indeps)
|
|
141
|
+
session._last_model = result
|
|
142
|
+
lines = [f"\n{result['method']}: {dep}", "=" * 55]
|
|
143
|
+
lines.append(f" N = {result['n_obs']} AIC = {result['aic']:.4f} LL = {result['log_likelihood']:.4f}")
|
|
144
|
+
lines.append("\nCoefficients:")
|
|
145
|
+
lines.append(_coef_table(result["params"], result["std_errors"], result["p_values"]))
|
|
146
|
+
lines.append("=" * 55)
|
|
147
|
+
return "\n".join(lines)
|
|
148
|
+
except Exception as exc:
|
|
149
|
+
return f"zinb error: {exc}"
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
@command("hurdle", usage="hurdle depvar indepvars")
|
|
153
|
+
def cmd_hurdle(session: Session, args: str) -> str:
|
|
154
|
+
"""Hurdle model: Logit for zeros + Poisson for positives."""
|
|
155
|
+
df = session.require_data()
|
|
156
|
+
dep, indeps, opts = _parse_eq(args, df.columns)
|
|
157
|
+
if not dep or not indeps:
|
|
158
|
+
return "Usage: hurdle depvar indepvar1 ..."
|
|
159
|
+
try:
|
|
160
|
+
from openstat.stats.advanced_regression import fit_hurdle
|
|
161
|
+
result = fit_hurdle(df, dep, indeps)
|
|
162
|
+
session._last_model = result
|
|
163
|
+
lines = [f"\nHurdle Model: {dep}", "=" * 55]
|
|
164
|
+
lines.append(f" N = {result['n_obs']} Zeros = {result['n_zeros']} Positive = {result['n_positive']}")
|
|
165
|
+
lines.append("\nPart 1 — Logit (P(y > 0)):")
|
|
166
|
+
lines.append(_coef_table(result["logit_params"], pvals=result["logit_pvalues"]))
|
|
167
|
+
lines.append("\nPart 2 — Poisson (E[y | y > 0]):")
|
|
168
|
+
lines.append(_coef_table(result["count_params"], pvals=result["count_pvalues"]))
|
|
169
|
+
lines.append("=" * 55)
|
|
170
|
+
return "\n".join(lines)
|
|
171
|
+
except Exception as exc:
|
|
172
|
+
return f"hurdle error: {exc}"
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
@command("sureg", usage="sureg (dep1 x1 x2) (dep2 x3 x4)")
|
|
176
|
+
def cmd_sureg(session: Session, args: str) -> str:
|
|
177
|
+
"""Seemingly Unrelated Regression."""
|
|
178
|
+
df = session.require_data()
|
|
179
|
+
# Parse parenthesized equations: (dep x1 x2) (dep2 x3 x4)
|
|
180
|
+
eq_matches = re.findall(r'\(([^)]+)\)', args)
|
|
181
|
+
if not eq_matches:
|
|
182
|
+
return "Usage: sureg (dep1 x1 x2) (dep2 x3 x4)"
|
|
183
|
+
|
|
184
|
+
equations = []
|
|
185
|
+
for eq_str in eq_matches:
|
|
186
|
+
parts = eq_str.split()
|
|
187
|
+
if len(parts) < 2:
|
|
188
|
+
continue
|
|
189
|
+
dep = parts[0]
|
|
190
|
+
indeps = [c for c in parts[1:] if c in df.columns]
|
|
191
|
+
if dep in df.columns and indeps:
|
|
192
|
+
equations.append((dep, indeps))
|
|
193
|
+
|
|
194
|
+
if not equations:
|
|
195
|
+
return "No valid equations found."
|
|
196
|
+
try:
|
|
197
|
+
from openstat.stats.advanced_regression import fit_sur
|
|
198
|
+
result = fit_sur(df, equations)
|
|
199
|
+
session._last_model = result
|
|
200
|
+
lines = [f"\nSUR: {result['n_equations']} equations", "=" * 60]
|
|
201
|
+
for eq in result["equations"]:
|
|
202
|
+
lines.append(f"\nEquation {eq['equation']}: {eq['dep']} R² = {eq['r_squared']:.4f} N = {eq['n_obs']}")
|
|
203
|
+
lines.append(_coef_table(eq["params"], eq["std_errors"]))
|
|
204
|
+
lines.append("=" * 60)
|
|
205
|
+
return "\n".join(lines)
|
|
206
|
+
except Exception as exc:
|
|
207
|
+
return f"sureg error: {exc}"
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""Alias and theme commands."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from openstat.commands.base import command, CommandArgs, get_registry, friendly_error
|
|
6
|
+
from openstat.session import Session
|
|
7
|
+
|
|
8
|
+
# Module-level alias store
|
|
9
|
+
_ALIASES: dict[str, str] = {}
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def get_aliases() -> dict[str, str]:
|
|
13
|
+
return _ALIASES
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def resolve_alias(line: str) -> str:
|
|
17
|
+
"""If line starts with a known alias, expand it."""
|
|
18
|
+
token = line.split()[0] if line.split() else ""
|
|
19
|
+
if token in _ALIASES:
|
|
20
|
+
rest = line[len(token):].strip()
|
|
21
|
+
expanded = _ALIASES[token]
|
|
22
|
+
return f"{expanded} {rest}".strip() if rest else expanded
|
|
23
|
+
return line
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@command("alias", usage="alias [<name> <expansion>] | alias list | alias rm <name>")
|
|
27
|
+
def cmd_alias(session: Session, args: str) -> str:
|
|
28
|
+
"""Define or manage command aliases.
|
|
29
|
+
|
|
30
|
+
Examples:
|
|
31
|
+
alias reg ols — 'reg y x' → 'ols y x'
|
|
32
|
+
alias desc describe
|
|
33
|
+
alias list — show all aliases
|
|
34
|
+
alias rm reg — remove alias
|
|
35
|
+
"""
|
|
36
|
+
tokens = args.strip().split(None, 2)
|
|
37
|
+
|
|
38
|
+
if not tokens or tokens[0] == "list":
|
|
39
|
+
if not _ALIASES:
|
|
40
|
+
return "No aliases defined. Use: alias <name> <expansion>"
|
|
41
|
+
lines = ["Aliases:", " {:<15} {}".format("Name", "Expansion"), "-" * 40]
|
|
42
|
+
for k, v in sorted(_ALIASES.items()):
|
|
43
|
+
lines.append(f" {k:<15} {v}")
|
|
44
|
+
return "\n".join(lines)
|
|
45
|
+
|
|
46
|
+
if tokens[0] == "rm":
|
|
47
|
+
if len(tokens) < 2:
|
|
48
|
+
return "Usage: alias rm <name>"
|
|
49
|
+
name = tokens[1]
|
|
50
|
+
if name in _ALIASES:
|
|
51
|
+
del _ALIASES[name]
|
|
52
|
+
return f"Alias '{name}' removed."
|
|
53
|
+
return f"Alias '{name}' not found."
|
|
54
|
+
|
|
55
|
+
if len(tokens) < 2:
|
|
56
|
+
return "Usage: alias <name> <expansion>"
|
|
57
|
+
|
|
58
|
+
name = tokens[0]
|
|
59
|
+
expansion = " ".join(tokens[1:])
|
|
60
|
+
|
|
61
|
+
# Prevent aliasing built-in if it would shadow itself
|
|
62
|
+
if name == expansion.split()[0]:
|
|
63
|
+
return f"Cannot alias '{name}' to itself."
|
|
64
|
+
|
|
65
|
+
_ALIASES[name] = expansion
|
|
66
|
+
return f"Alias set: {name} → {expansion}"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# ── theme ─────────────────────────────────────────────────────────────────────
|
|
70
|
+
|
|
71
|
+
_THEMES: dict[str, dict[str, str]] = {
|
|
72
|
+
"dark": {
|
|
73
|
+
"prompt": "bold cyan",
|
|
74
|
+
"output": "white",
|
|
75
|
+
"error": "bold red",
|
|
76
|
+
"info": "bright_blue",
|
|
77
|
+
},
|
|
78
|
+
"light": {
|
|
79
|
+
"prompt": "bold blue",
|
|
80
|
+
"output": "black",
|
|
81
|
+
"error": "bold red",
|
|
82
|
+
"info": "blue",
|
|
83
|
+
},
|
|
84
|
+
"solarized": {
|
|
85
|
+
"prompt": "bold yellow",
|
|
86
|
+
"output": "bright_white",
|
|
87
|
+
"error": "bold magenta",
|
|
88
|
+
"info": "cyan",
|
|
89
|
+
},
|
|
90
|
+
"matrix": {
|
|
91
|
+
"prompt": "bold green",
|
|
92
|
+
"output": "green",
|
|
93
|
+
"error": "bold red",
|
|
94
|
+
"info": "bright_green",
|
|
95
|
+
},
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
_ACTIVE_THEME: str = "dark"
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def get_active_theme() -> dict[str, str]:
|
|
102
|
+
return _THEMES.get(_ACTIVE_THEME, _THEMES["dark"])
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@command("theme", usage="theme [<name>] | theme list")
|
|
106
|
+
def cmd_theme(session: Session, args: str) -> str:
|
|
107
|
+
"""Set or list color themes.
|
|
108
|
+
|
|
109
|
+
Available themes: dark (default), light, solarized, matrix.
|
|
110
|
+
|
|
111
|
+
Examples:
|
|
112
|
+
theme — show current theme
|
|
113
|
+
theme list — list all themes
|
|
114
|
+
theme solarized — switch to solarized
|
|
115
|
+
"""
|
|
116
|
+
tokens = args.strip().split()
|
|
117
|
+
|
|
118
|
+
if not tokens:
|
|
119
|
+
return f"Current theme: {_ACTIVE_THEME}"
|
|
120
|
+
|
|
121
|
+
if tokens[0] == "list":
|
|
122
|
+
lines = ["Available themes:"]
|
|
123
|
+
for name in _THEMES:
|
|
124
|
+
marker = " (active)" if name == _ACTIVE_THEME else ""
|
|
125
|
+
lines.append(f" {name}{marker}")
|
|
126
|
+
return "\n".join(lines)
|
|
127
|
+
|
|
128
|
+
name = tokens[0].lower()
|
|
129
|
+
if name not in _THEMES:
|
|
130
|
+
available = ", ".join(_THEMES)
|
|
131
|
+
return f"Unknown theme: {name}. Available: {available}"
|
|
132
|
+
|
|
133
|
+
import openstat.commands.alias_cmds as _self
|
|
134
|
+
_self._ACTIVE_THEME = name
|
|
135
|
+
return f"Theme set to: {name}"
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""ARCH/GARCH volatility model commands."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
from openstat.commands.base import command
|
|
8
|
+
from openstat.session import Session
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _stata_opts(raw: str) -> tuple[list[str], dict[str, str]]:
|
|
12
|
+
opts: dict[str, str] = {}
|
|
13
|
+
for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
|
|
14
|
+
opts[m.group(1).lower()] = m.group(2)
|
|
15
|
+
rest = re.sub(r'\w+\([^)]*\)', '', raw)
|
|
16
|
+
positional = [t.strip(',') for t in rest.split() if t.strip(',')]
|
|
17
|
+
return positional, opts
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _arch_table(result: dict) -> str:
|
|
21
|
+
lines = [f"\n{result['model']}: {result['var']}", "=" * 55]
|
|
22
|
+
lines.append(f" {'N observations':<25} {result['n_obs']}")
|
|
23
|
+
lines.append(f" {'Log-likelihood':<25} {result['log_likelihood']:.4f}")
|
|
24
|
+
lines.append(f" {'AIC':<25} {result['aic']:.4f}")
|
|
25
|
+
lines.append(f" {'BIC':<25} {result['bic']:.4f}")
|
|
26
|
+
lines.append("\nParameters:")
|
|
27
|
+
for name, val in result["params"].items():
|
|
28
|
+
lines.append(f" {name:<25} {val:>12.6f}")
|
|
29
|
+
if "cond_volatility_last5" in result:
|
|
30
|
+
lines.append("\nConditional volatility (last 5 obs):")
|
|
31
|
+
for v in result["cond_volatility_last5"]:
|
|
32
|
+
lines.append(f" {v:.4f}")
|
|
33
|
+
lines.append("=" * 55)
|
|
34
|
+
return "\n".join(lines)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@command("arch", usage="arch var [, p(1) dist(normal|t)]")
|
|
38
|
+
def cmd_arch(session: Session, args: str) -> str:
|
|
39
|
+
"""ARCH(p) model for volatility clustering."""
|
|
40
|
+
df = session.require_data()
|
|
41
|
+
positional, opts = _stata_opts(args)
|
|
42
|
+
if not positional:
|
|
43
|
+
return "Usage: arch var [, p(1) dist(normal)]"
|
|
44
|
+
|
|
45
|
+
var = positional[0]
|
|
46
|
+
p = int(opts.get("p", 1))
|
|
47
|
+
dist = opts.get("dist", "normal")
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
from openstat.stats.arch_garch import fit_arch
|
|
51
|
+
result = fit_arch(df, var, p=p, dist=dist)
|
|
52
|
+
session._last_model = result
|
|
53
|
+
return _arch_table(result)
|
|
54
|
+
except ImportError as e:
|
|
55
|
+
return str(e)
|
|
56
|
+
except Exception as exc:
|
|
57
|
+
return f"arch error: {exc}"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@command("garch", usage="garch var [, p(1) q(1) model(GARCH|EGARCH|GJR-GARCH) dist(normal)]")
|
|
61
|
+
def cmd_garch(session: Session, args: str) -> str:
|
|
62
|
+
"""GARCH(p,q) or variant volatility model."""
|
|
63
|
+
df = session.require_data()
|
|
64
|
+
positional, opts = _stata_opts(args)
|
|
65
|
+
if not positional:
|
|
66
|
+
return "Usage: garch var [, p(1) q(1) model(GARCH) dist(normal)]"
|
|
67
|
+
|
|
68
|
+
var = positional[0]
|
|
69
|
+
p = int(opts.get("p", 1))
|
|
70
|
+
q = int(opts.get("q", 1))
|
|
71
|
+
dist = opts.get("dist", "normal")
|
|
72
|
+
model = opts.get("model", "GARCH")
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
from openstat.stats.arch_garch import fit_garch
|
|
76
|
+
result = fit_garch(df, var, p=p, q=q, dist=dist, model=model)
|
|
77
|
+
session._last_model = result
|
|
78
|
+
return _arch_table(result)
|
|
79
|
+
except ImportError as e:
|
|
80
|
+
return str(e)
|
|
81
|
+
except Exception as exc:
|
|
82
|
+
return f"garch error: {exc}"
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""Association rules: Apriori / FP-Growth."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
from openstat.commands.base import command, CommandArgs, friendly_error
|
|
5
|
+
from openstat.session import Session
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@command("arules", usage="arules <item_col> [<id_col>] [--minsup=0.05] [--minconf=0.5] [--algo=fpgrowth]")
|
|
9
|
+
def cmd_arules(session: Session, args: str) -> str:
|
|
10
|
+
"""Association rule mining (Apriori / FP-Growth).
|
|
11
|
+
|
|
12
|
+
Mines frequent itemsets and generates association rules.
|
|
13
|
+
Expects one-row-per-transaction with an item column, OR
|
|
14
|
+
a transaction-id column + item column.
|
|
15
|
+
|
|
16
|
+
Options:
|
|
17
|
+
--minsup=<f> minimum support (0–1, default: 0.05)
|
|
18
|
+
--minconf=<f> minimum confidence (0–1, default: 0.5)
|
|
19
|
+
--minlift=<f> minimum lift (default: 1.0)
|
|
20
|
+
--algo=<a> apriori or fpgrowth (default: fpgrowth)
|
|
21
|
+
--top=<n> show top N rules (default: 20)
|
|
22
|
+
|
|
23
|
+
Examples:
|
|
24
|
+
arules item transaction_id --minsup=0.1 --minconf=0.6
|
|
25
|
+
arules product --minsup=0.05 --algo=apriori --top=10
|
|
26
|
+
"""
|
|
27
|
+
try:
|
|
28
|
+
from mlxtend.frequent_patterns import fpgrowth, apriori, association_rules
|
|
29
|
+
from mlxtend.preprocessing import TransactionEncoder
|
|
30
|
+
except ImportError:
|
|
31
|
+
return "mlxtend required. Install: pip install mlxtend"
|
|
32
|
+
|
|
33
|
+
import polars as pl
|
|
34
|
+
import pandas as pd
|
|
35
|
+
|
|
36
|
+
ca = CommandArgs(args)
|
|
37
|
+
if not ca.positional:
|
|
38
|
+
return "Usage: arules <item_col> [<id_col>]"
|
|
39
|
+
|
|
40
|
+
item_col = ca.positional[0]
|
|
41
|
+
id_col = ca.positional[1] if len(ca.positional) > 1 else None
|
|
42
|
+
min_sup = float(ca.options.get("minsup", 0.05))
|
|
43
|
+
min_conf = float(ca.options.get("minconf", 0.5))
|
|
44
|
+
min_lift = float(ca.options.get("minlift", 1.0))
|
|
45
|
+
algo = ca.options.get("algo", "fpgrowth").lower()
|
|
46
|
+
top_n = int(ca.options.get("top", 20))
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
df = session.require_data()
|
|
50
|
+
if item_col not in df.columns:
|
|
51
|
+
return f"Column not found: {item_col}"
|
|
52
|
+
if id_col and id_col not in df.columns:
|
|
53
|
+
return f"Column not found: {id_col}"
|
|
54
|
+
|
|
55
|
+
# Build transactions
|
|
56
|
+
if id_col:
|
|
57
|
+
# Group items by transaction id
|
|
58
|
+
transactions = (
|
|
59
|
+
df.select([id_col, item_col])
|
|
60
|
+
.group_by(id_col)
|
|
61
|
+
.agg(pl.col(item_col).cast(pl.Utf8).alias("items"))
|
|
62
|
+
["items"].to_list()
|
|
63
|
+
)
|
|
64
|
+
else:
|
|
65
|
+
# Each row is a transaction; split by comma if needed
|
|
66
|
+
col_vals = df[item_col].cast(pl.Utf8).to_list()
|
|
67
|
+
transactions = [[v.strip() for v in row.split(",") if v.strip()]
|
|
68
|
+
for row in col_vals if row]
|
|
69
|
+
|
|
70
|
+
if len(transactions) < 2:
|
|
71
|
+
return "Need at least 2 transactions."
|
|
72
|
+
|
|
73
|
+
te = TransactionEncoder()
|
|
74
|
+
te_array = te.fit(transactions).transform(transactions)
|
|
75
|
+
basket_df = pd.DataFrame(te_array, columns=te.columns_)
|
|
76
|
+
|
|
77
|
+
if algo == "apriori":
|
|
78
|
+
frequent = apriori(basket_df, min_support=min_sup, use_colnames=True)
|
|
79
|
+
else:
|
|
80
|
+
frequent = fpgrowth(basket_df, min_support=min_sup, use_colnames=True)
|
|
81
|
+
|
|
82
|
+
if frequent.empty:
|
|
83
|
+
return f"No frequent itemsets found at min_support={min_sup}. Try lowering --minsup."
|
|
84
|
+
|
|
85
|
+
rules = association_rules(frequent, metric="confidence", min_threshold=min_conf)
|
|
86
|
+
rules = rules[rules["lift"] >= min_lift].sort_values("lift", ascending=False)
|
|
87
|
+
|
|
88
|
+
lines = [
|
|
89
|
+
f"Association Rules ({algo.upper()}) — {item_col}",
|
|
90
|
+
f" Transactions: {len(transactions)}, Items: {len(te.columns_)}",
|
|
91
|
+
f" min_support={min_sup}, min_confidence={min_conf}, min_lift={min_lift}",
|
|
92
|
+
f" Frequent itemsets: {len(frequent)}, Rules: {len(rules)}",
|
|
93
|
+
"",
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
if rules.empty:
|
|
97
|
+
lines.append(" No rules found. Try lowering --minconf or --minlift.")
|
|
98
|
+
else:
|
|
99
|
+
lines.append(f" Top {min(top_n, len(rules))} rules by lift:")
|
|
100
|
+
lines.append(f" {'Antecedent':<30} {'Consequent':<20} {'Sup':>7} {'Conf':>7} {'Lift':>7}")
|
|
101
|
+
lines.append(" " + "-" * 75)
|
|
102
|
+
for _, row in rules.head(top_n).iterrows():
|
|
103
|
+
ant = ", ".join(sorted(row["antecedents"]))[:28]
|
|
104
|
+
con = ", ".join(sorted(row["consequents"]))[:18]
|
|
105
|
+
lines.append(
|
|
106
|
+
f" {ant:<30} {con:<20} {row['support']:>7.3f} {row['confidence']:>7.3f} {row['lift']:>7.3f}"
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
return "\n".join(lines)
|
|
110
|
+
except Exception as e:
|
|
111
|
+
return friendly_error(e, "arules")
|