openstat-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. openstat/__init__.py +3 -0
  2. openstat/__main__.py +4 -0
  3. openstat/backends/__init__.py +16 -0
  4. openstat/backends/duckdb_backend.py +70 -0
  5. openstat/backends/polars_backend.py +52 -0
  6. openstat/cli.py +92 -0
  7. openstat/commands/__init__.py +82 -0
  8. openstat/commands/adv_stat_cmds.py +1255 -0
  9. openstat/commands/advanced_ml_cmds.py +576 -0
  10. openstat/commands/advreg_cmds.py +207 -0
  11. openstat/commands/alias_cmds.py +135 -0
  12. openstat/commands/arch_cmds.py +82 -0
  13. openstat/commands/arules_cmds.py +111 -0
  14. openstat/commands/automodel_cmds.py +212 -0
  15. openstat/commands/backend_cmds.py +82 -0
  16. openstat/commands/base.py +170 -0
  17. openstat/commands/bayes_cmds.py +71 -0
  18. openstat/commands/causal_cmds.py +269 -0
  19. openstat/commands/cluster_cmds.py +152 -0
  20. openstat/commands/data_cmds.py +996 -0
  21. openstat/commands/datamanip_cmds.py +672 -0
  22. openstat/commands/dataquality_cmds.py +174 -0
  23. openstat/commands/datetime_cmds.py +176 -0
  24. openstat/commands/dimreduce_cmds.py +184 -0
  25. openstat/commands/discrete_cmds.py +149 -0
  26. openstat/commands/dsl_cmds.py +143 -0
  27. openstat/commands/epi_cmds.py +93 -0
  28. openstat/commands/equiv_tobit_cmds.py +94 -0
  29. openstat/commands/esttab_cmds.py +196 -0
  30. openstat/commands/export_beamer_cmds.py +142 -0
  31. openstat/commands/export_cmds.py +201 -0
  32. openstat/commands/export_extra_cmds.py +240 -0
  33. openstat/commands/factor_cmds.py +180 -0
  34. openstat/commands/groupby_cmds.py +155 -0
  35. openstat/commands/help_cmds.py +237 -0
  36. openstat/commands/i18n_cmds.py +43 -0
  37. openstat/commands/import_extra_cmds.py +561 -0
  38. openstat/commands/influence_cmds.py +134 -0
  39. openstat/commands/iv_cmds.py +106 -0
  40. openstat/commands/manova_cmds.py +105 -0
  41. openstat/commands/mediate_cmds.py +233 -0
  42. openstat/commands/meta_cmds.py +284 -0
  43. openstat/commands/mi_cmds.py +228 -0
  44. openstat/commands/mixed_cmds.py +79 -0
  45. openstat/commands/mixture_changepoint_cmds.py +166 -0
  46. openstat/commands/ml_adv_cmds.py +147 -0
  47. openstat/commands/ml_cmds.py +178 -0
  48. openstat/commands/model_eval_cmds.py +142 -0
  49. openstat/commands/network_cmds.py +288 -0
  50. openstat/commands/nlquery_cmds.py +161 -0
  51. openstat/commands/nonparam_cmds.py +149 -0
  52. openstat/commands/outreg_cmds.py +247 -0
  53. openstat/commands/panel_cmds.py +141 -0
  54. openstat/commands/pdf_cmds.py +226 -0
  55. openstat/commands/pipeline_cmds.py +319 -0
  56. openstat/commands/plot_cmds.py +189 -0
  57. openstat/commands/plugin_cmds.py +79 -0
  58. openstat/commands/posthoc_cmds.py +153 -0
  59. openstat/commands/power_cmds.py +172 -0
  60. openstat/commands/profile_cmds.py +246 -0
  61. openstat/commands/rbridge_cmds.py +81 -0
  62. openstat/commands/regex_cmds.py +104 -0
  63. openstat/commands/report_cmds.py +48 -0
  64. openstat/commands/repro_cmds.py +129 -0
  65. openstat/commands/resampling_cmds.py +109 -0
  66. openstat/commands/reshape_cmds.py +223 -0
  67. openstat/commands/sem_cmds.py +177 -0
  68. openstat/commands/stat_cmds.py +1040 -0
  69. openstat/commands/stata_import_cmds.py +215 -0
  70. openstat/commands/string_cmds.py +124 -0
  71. openstat/commands/surv_cmds.py +145 -0
  72. openstat/commands/survey_cmds.py +153 -0
  73. openstat/commands/textanalysis_cmds.py +192 -0
  74. openstat/commands/ts_adv_cmds.py +136 -0
  75. openstat/commands/ts_cmds.py +195 -0
  76. openstat/commands/tui_cmds.py +111 -0
  77. openstat/commands/ux_cmds.py +191 -0
  78. openstat/commands/validate_cmds.py +270 -0
  79. openstat/commands/viz_adv_cmds.py +312 -0
  80. openstat/commands/viz_extra_cmds.py +251 -0
  81. openstat/commands/watch_cmds.py +69 -0
  82. openstat/config.py +106 -0
  83. openstat/dsl/__init__.py +0 -0
  84. openstat/dsl/parser.py +332 -0
  85. openstat/dsl/tokenizer.py +105 -0
  86. openstat/i18n.py +120 -0
  87. openstat/io/__init__.py +0 -0
  88. openstat/io/loader.py +187 -0
  89. openstat/jupyter/__init__.py +18 -0
  90. openstat/jupyter/display.py +18 -0
  91. openstat/jupyter/magic.py +60 -0
  92. openstat/logging_config.py +59 -0
  93. openstat/plots/__init__.py +0 -0
  94. openstat/plots/plotter.py +437 -0
  95. openstat/plots/surv_plots.py +32 -0
  96. openstat/plots/ts_plots.py +59 -0
  97. openstat/plugins/__init__.py +5 -0
  98. openstat/plugins/manager.py +69 -0
  99. openstat/repl.py +457 -0
  100. openstat/reporting/__init__.py +0 -0
  101. openstat/reporting/eda.py +208 -0
  102. openstat/reporting/report.py +67 -0
  103. openstat/script_runner.py +319 -0
  104. openstat/session.py +133 -0
  105. openstat/stats/__init__.py +0 -0
  106. openstat/stats/advanced_regression.py +269 -0
  107. openstat/stats/arch_garch.py +84 -0
  108. openstat/stats/bayesian.py +103 -0
  109. openstat/stats/causal.py +258 -0
  110. openstat/stats/clustering.py +206 -0
  111. openstat/stats/discrete.py +311 -0
  112. openstat/stats/epidemiology.py +119 -0
  113. openstat/stats/equiv_tobit.py +163 -0
  114. openstat/stats/factor.py +174 -0
  115. openstat/stats/imputation.py +282 -0
  116. openstat/stats/influence.py +78 -0
  117. openstat/stats/iv.py +131 -0
  118. openstat/stats/manova.py +124 -0
  119. openstat/stats/mixed.py +128 -0
  120. openstat/stats/ml.py +275 -0
  121. openstat/stats/ml_advanced.py +117 -0
  122. openstat/stats/model_eval.py +183 -0
  123. openstat/stats/models.py +1342 -0
  124. openstat/stats/nonparametric.py +130 -0
  125. openstat/stats/panel.py +179 -0
  126. openstat/stats/power.py +295 -0
  127. openstat/stats/resampling.py +203 -0
  128. openstat/stats/survey.py +213 -0
  129. openstat/stats/survival.py +196 -0
  130. openstat/stats/timeseries.py +142 -0
  131. openstat/stats/ts_advanced.py +114 -0
  132. openstat/types.py +11 -0
  133. openstat/web/__init__.py +1 -0
  134. openstat/web/app.py +117 -0
  135. openstat/web/session_manager.py +73 -0
  136. openstat/web/static/app.js +117 -0
  137. openstat/web/static/index.html +38 -0
  138. openstat/web/static/style.css +103 -0
  139. openstat_cli-1.0.0.dist-info/METADATA +748 -0
  140. openstat_cli-1.0.0.dist-info/RECORD +143 -0
  141. openstat_cli-1.0.0.dist-info/WHEEL +4 -0
  142. openstat_cli-1.0.0.dist-info/entry_points.txt +2 -0
  143. openstat_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,207 @@
1
+ """Advanced regression commands: nls, betareg, zip, zinb, hurdle, sureg."""
2
+
3
+ from __future__ import annotations
4
+ import re
5
+ from openstat.commands.base import command
6
+ from openstat.session import Session
7
+
8
+
9
+ def _stata_opts(raw: str) -> tuple[list[str], dict[str, str]]:
10
+ opts: dict[str, str] = {}
11
+ for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
12
+ opts[m.group(1).lower()] = m.group(2)
13
+ rest = re.sub(r'\w+\([^)]*\)', '', raw)
14
+ positional = [t.strip(',') for t in rest.split() if t.strip(',')]
15
+ return positional, opts
16
+
17
+
18
+ def _parse_eq(args: str, df_cols: list[str]) -> tuple[str, list[str], dict]:
19
+ positional, opts = _stata_opts(args)
20
+ dep = positional[0] if positional else ""
21
+ indeps = [c for c in positional[1:] if c in df_cols]
22
+ return dep, indeps, opts
23
+
24
+
25
+ def _coef_table(params: dict, se: dict = None, pvals: dict = None) -> str:
26
+ lines = [f" {'Variable':<22} {'Coef.':>10}"]
27
+ if se:
28
+ lines[0] += f" {'Std.Err.':>10}"
29
+ if pvals:
30
+ lines[0] += f" {'p-value':>8}"
31
+ lines.append(" " + "-" * 55)
32
+ for k, v in params.items():
33
+ row = f" {k:<22} {v:>10.4f}"
34
+ if se and k in se:
35
+ row += f" {se[k]:>10.4f}"
36
+ if pvals and k in pvals:
37
+ row += f" {pvals[k]:>8.4f}"
38
+ lines.append(row)
39
+ return "\n".join(lines)
40
+
41
+
42
+ @command("nls", usage="nls depvar indepvars [, p0(a,b,c) fn(power|exp|log)]")
43
+ def cmd_nls(session: Session, args: str) -> str:
44
+ """Nonlinear least squares with built-in functional forms."""
45
+ df = session.require_data()
46
+ dep, indeps, opts = _parse_eq(args, df.columns)
47
+ if not dep or not indeps:
48
+ return "Usage: nls depvar indepvar1 ... [, fn(power) p0(1,1)]"
49
+
50
+ fn_name = opts.get("fn", "power")
51
+ p0_raw = opts.get("p0", "1,1")
52
+ try:
53
+ p0 = [float(x) for x in p0_raw.split(",")]
54
+ except ValueError:
55
+ p0 = [1.0, 1.0]
56
+
57
+ built_in = {
58
+ "power": lambda X, a, b: a * X[:, 0] ** b,
59
+ "exp": lambda X, a, b: a * np.exp(b * X[:, 0]),
60
+ "log": lambda X, a, b: a + b * np.log(np.maximum(X[:, 0], 1e-15)),
61
+ }
62
+
63
+ import numpy as np
64
+ fn = built_in.get(fn_name)
65
+ if fn is None:
66
+ return f"Unknown function '{fn_name}'. Use: power, exp, log"
67
+
68
+ # Adjust p0 to match function arity
69
+ import inspect
70
+ n_params = len(inspect.signature(fn).parameters) - 1
71
+ p0 = (p0 + [1.0] * n_params)[:n_params]
72
+
73
+ try:
74
+ from openstat.stats.advanced_regression import fit_nls
75
+ result = fit_nls(df, dep, indeps, fn, p0)
76
+ session._last_model = result
77
+ lines = [f"\nNLS ({fn_name}): {dep}", "=" * 50]
78
+ lines.append(f" {'N obs':<22} {result['n_obs']:>10}")
79
+ lines.append(f" {'R²':<22} {result['r_squared']:>10.4f}")
80
+ lines.append(f" {'Converged':<22} {result['converged']!s:>10}")
81
+ lines.append("\nParameters:")
82
+ lines.append(_coef_table(result["params"], result["std_errors"]))
83
+ lines.append("=" * 50)
84
+ return "\n".join(lines)
85
+ except Exception as exc:
86
+ return f"nls error: {exc}"
87
+
88
+
89
+ @command("betareg", usage="betareg depvar indepvars")
90
+ def cmd_betareg(session: Session, args: str) -> str:
91
+ """Beta regression for (0,1) bounded outcomes."""
92
+ df = session.require_data()
93
+ dep, indeps, opts = _parse_eq(args, df.columns)
94
+ if not dep or not indeps:
95
+ return "Usage: betareg depvar indepvar1 ..."
96
+ try:
97
+ from openstat.stats.advanced_regression import fit_betareg
98
+ result = fit_betareg(df, dep, indeps)
99
+ session._last_model = result
100
+ lines = [f"\n{result['method']}: {dep}", "=" * 55]
101
+ lines.append(f" N = {result['n_obs']} AIC = {result['aic']:.4f} Pseudo-R² = {result['pseudo_r2']:.4f}")
102
+ lines.append("\nCoefficients:")
103
+ lines.append(_coef_table(result["params"], result["std_errors"], result["p_values"]))
104
+ lines.append("=" * 55)
105
+ return "\n".join(lines)
106
+ except Exception as exc:
107
+ return f"betareg error: {exc}"
108
+
109
+
110
+ @command("zip", usage="zip depvar indepvars")
111
+ def cmd_zip(session: Session, args: str) -> str:
112
+ """Zero-Inflated Poisson regression."""
113
+ df = session.require_data()
114
+ dep, indeps, opts = _parse_eq(args, df.columns)
115
+ if not dep or not indeps:
116
+ return "Usage: zip depvar indepvar1 ..."
117
+ try:
118
+ from openstat.stats.advanced_regression import fit_zip
119
+ result = fit_zip(df, dep, indeps)
120
+ session._last_model = result
121
+ lines = [f"\n{result['method']}: {dep}", "=" * 55]
122
+ lines.append(f" N = {result['n_obs']} AIC = {result['aic']:.4f} LL = {result['log_likelihood']:.4f}")
123
+ lines.append("\nCoefficients:")
124
+ lines.append(_coef_table(result["params"], result["std_errors"], result["p_values"]))
125
+ lines.append("=" * 55)
126
+ return "\n".join(lines)
127
+ except Exception as exc:
128
+ return f"zip error: {exc}"
129
+
130
+
131
+ @command("zinb", usage="zinb depvar indepvars")
132
+ def cmd_zinb(session: Session, args: str) -> str:
133
+ """Zero-Inflated Negative Binomial regression."""
134
+ df = session.require_data()
135
+ dep, indeps, opts = _parse_eq(args, df.columns)
136
+ if not dep or not indeps:
137
+ return "Usage: zinb depvar indepvar1 ..."
138
+ try:
139
+ from openstat.stats.advanced_regression import fit_zinb
140
+ result = fit_zinb(df, dep, indeps)
141
+ session._last_model = result
142
+ lines = [f"\n{result['method']}: {dep}", "=" * 55]
143
+ lines.append(f" N = {result['n_obs']} AIC = {result['aic']:.4f} LL = {result['log_likelihood']:.4f}")
144
+ lines.append("\nCoefficients:")
145
+ lines.append(_coef_table(result["params"], result["std_errors"], result["p_values"]))
146
+ lines.append("=" * 55)
147
+ return "\n".join(lines)
148
+ except Exception as exc:
149
+ return f"zinb error: {exc}"
150
+
151
+
152
+ @command("hurdle", usage="hurdle depvar indepvars")
153
+ def cmd_hurdle(session: Session, args: str) -> str:
154
+ """Hurdle model: Logit for zeros + Poisson for positives."""
155
+ df = session.require_data()
156
+ dep, indeps, opts = _parse_eq(args, df.columns)
157
+ if not dep or not indeps:
158
+ return "Usage: hurdle depvar indepvar1 ..."
159
+ try:
160
+ from openstat.stats.advanced_regression import fit_hurdle
161
+ result = fit_hurdle(df, dep, indeps)
162
+ session._last_model = result
163
+ lines = [f"\nHurdle Model: {dep}", "=" * 55]
164
+ lines.append(f" N = {result['n_obs']} Zeros = {result['n_zeros']} Positive = {result['n_positive']}")
165
+ lines.append("\nPart 1 — Logit (P(y > 0)):")
166
+ lines.append(_coef_table(result["logit_params"], pvals=result["logit_pvalues"]))
167
+ lines.append("\nPart 2 — Poisson (E[y | y > 0]):")
168
+ lines.append(_coef_table(result["count_params"], pvals=result["count_pvalues"]))
169
+ lines.append("=" * 55)
170
+ return "\n".join(lines)
171
+ except Exception as exc:
172
+ return f"hurdle error: {exc}"
173
+
174
+
175
+ @command("sureg", usage="sureg (dep1 x1 x2) (dep2 x3 x4)")
176
+ def cmd_sureg(session: Session, args: str) -> str:
177
+ """Seemingly Unrelated Regression."""
178
+ df = session.require_data()
179
+ # Parse parenthesized equations: (dep x1 x2) (dep2 x3 x4)
180
+ eq_matches = re.findall(r'\(([^)]+)\)', args)
181
+ if not eq_matches:
182
+ return "Usage: sureg (dep1 x1 x2) (dep2 x3 x4)"
183
+
184
+ equations = []
185
+ for eq_str in eq_matches:
186
+ parts = eq_str.split()
187
+ if len(parts) < 2:
188
+ continue
189
+ dep = parts[0]
190
+ indeps = [c for c in parts[1:] if c in df.columns]
191
+ if dep in df.columns and indeps:
192
+ equations.append((dep, indeps))
193
+
194
+ if not equations:
195
+ return "No valid equations found."
196
+ try:
197
+ from openstat.stats.advanced_regression import fit_sur
198
+ result = fit_sur(df, equations)
199
+ session._last_model = result
200
+ lines = [f"\nSUR: {result['n_equations']} equations", "=" * 60]
201
+ for eq in result["equations"]:
202
+ lines.append(f"\nEquation {eq['equation']}: {eq['dep']} R² = {eq['r_squared']:.4f} N = {eq['n_obs']}")
203
+ lines.append(_coef_table(eq["params"], eq["std_errors"]))
204
+ lines.append("=" * 60)
205
+ return "\n".join(lines)
206
+ except Exception as exc:
207
+ return f"sureg error: {exc}"
@@ -0,0 +1,135 @@
1
+ """Alias and theme commands."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from openstat.commands.base import command, CommandArgs, get_registry, friendly_error
6
+ from openstat.session import Session
7
+
8
+ # Module-level alias store
9
+ _ALIASES: dict[str, str] = {}
10
+
11
+
12
+ def get_aliases() -> dict[str, str]:
13
+ return _ALIASES
14
+
15
+
16
+ def resolve_alias(line: str) -> str:
17
+ """If line starts with a known alias, expand it."""
18
+ token = line.split()[0] if line.split() else ""
19
+ if token in _ALIASES:
20
+ rest = line[len(token):].strip()
21
+ expanded = _ALIASES[token]
22
+ return f"{expanded} {rest}".strip() if rest else expanded
23
+ return line
24
+
25
+
26
+ @command("alias", usage="alias [<name> <expansion>] | alias list | alias rm <name>")
27
+ def cmd_alias(session: Session, args: str) -> str:
28
+ """Define or manage command aliases.
29
+
30
+ Examples:
31
+ alias reg ols — 'reg y x' → 'ols y x'
32
+ alias desc describe
33
+ alias list — show all aliases
34
+ alias rm reg — remove alias
35
+ """
36
+ tokens = args.strip().split(None, 2)
37
+
38
+ if not tokens or tokens[0] == "list":
39
+ if not _ALIASES:
40
+ return "No aliases defined. Use: alias <name> <expansion>"
41
+ lines = ["Aliases:", " {:<15} {}".format("Name", "Expansion"), "-" * 40]
42
+ for k, v in sorted(_ALIASES.items()):
43
+ lines.append(f" {k:<15} {v}")
44
+ return "\n".join(lines)
45
+
46
+ if tokens[0] == "rm":
47
+ if len(tokens) < 2:
48
+ return "Usage: alias rm <name>"
49
+ name = tokens[1]
50
+ if name in _ALIASES:
51
+ del _ALIASES[name]
52
+ return f"Alias '{name}' removed."
53
+ return f"Alias '{name}' not found."
54
+
55
+ if len(tokens) < 2:
56
+ return "Usage: alias <name> <expansion>"
57
+
58
+ name = tokens[0]
59
+ expansion = " ".join(tokens[1:])
60
+
61
+ # Prevent aliasing built-in if it would shadow itself
62
+ if name == expansion.split()[0]:
63
+ return f"Cannot alias '{name}' to itself."
64
+
65
+ _ALIASES[name] = expansion
66
+ return f"Alias set: {name} → {expansion}"
67
+
68
+
69
+ # ── theme ─────────────────────────────────────────────────────────────────────
70
+
71
+ _THEMES: dict[str, dict[str, str]] = {
72
+ "dark": {
73
+ "prompt": "bold cyan",
74
+ "output": "white",
75
+ "error": "bold red",
76
+ "info": "bright_blue",
77
+ },
78
+ "light": {
79
+ "prompt": "bold blue",
80
+ "output": "black",
81
+ "error": "bold red",
82
+ "info": "blue",
83
+ },
84
+ "solarized": {
85
+ "prompt": "bold yellow",
86
+ "output": "bright_white",
87
+ "error": "bold magenta",
88
+ "info": "cyan",
89
+ },
90
+ "matrix": {
91
+ "prompt": "bold green",
92
+ "output": "green",
93
+ "error": "bold red",
94
+ "info": "bright_green",
95
+ },
96
+ }
97
+
98
+ _ACTIVE_THEME: str = "dark"
99
+
100
+
101
+ def get_active_theme() -> dict[str, str]:
102
+ return _THEMES.get(_ACTIVE_THEME, _THEMES["dark"])
103
+
104
+
105
+ @command("theme", usage="theme [<name>] | theme list")
106
+ def cmd_theme(session: Session, args: str) -> str:
107
+ """Set or list color themes.
108
+
109
+ Available themes: dark (default), light, solarized, matrix.
110
+
111
+ Examples:
112
+ theme — show current theme
113
+ theme list — list all themes
114
+ theme solarized — switch to solarized
115
+ """
116
+ tokens = args.strip().split()
117
+
118
+ if not tokens:
119
+ return f"Current theme: {_ACTIVE_THEME}"
120
+
121
+ if tokens[0] == "list":
122
+ lines = ["Available themes:"]
123
+ for name in _THEMES:
124
+ marker = " (active)" if name == _ACTIVE_THEME else ""
125
+ lines.append(f" {name}{marker}")
126
+ return "\n".join(lines)
127
+
128
+ name = tokens[0].lower()
129
+ if name not in _THEMES:
130
+ available = ", ".join(_THEMES)
131
+ return f"Unknown theme: {name}. Available: {available}"
132
+
133
+ import openstat.commands.alias_cmds as _self
134
+ _self._ACTIVE_THEME = name
135
+ return f"Theme set to: {name}"
@@ -0,0 +1,82 @@
1
+ """ARCH/GARCH volatility model commands."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ from openstat.commands.base import command
8
+ from openstat.session import Session
9
+
10
+
11
+ def _stata_opts(raw: str) -> tuple[list[str], dict[str, str]]:
12
+ opts: dict[str, str] = {}
13
+ for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
14
+ opts[m.group(1).lower()] = m.group(2)
15
+ rest = re.sub(r'\w+\([^)]*\)', '', raw)
16
+ positional = [t.strip(',') for t in rest.split() if t.strip(',')]
17
+ return positional, opts
18
+
19
+
20
+ def _arch_table(result: dict) -> str:
21
+ lines = [f"\n{result['model']}: {result['var']}", "=" * 55]
22
+ lines.append(f" {'N observations':<25} {result['n_obs']}")
23
+ lines.append(f" {'Log-likelihood':<25} {result['log_likelihood']:.4f}")
24
+ lines.append(f" {'AIC':<25} {result['aic']:.4f}")
25
+ lines.append(f" {'BIC':<25} {result['bic']:.4f}")
26
+ lines.append("\nParameters:")
27
+ for name, val in result["params"].items():
28
+ lines.append(f" {name:<25} {val:>12.6f}")
29
+ if "cond_volatility_last5" in result:
30
+ lines.append("\nConditional volatility (last 5 obs):")
31
+ for v in result["cond_volatility_last5"]:
32
+ lines.append(f" {v:.4f}")
33
+ lines.append("=" * 55)
34
+ return "\n".join(lines)
35
+
36
+
37
+ @command("arch", usage="arch var [, p(1) dist(normal|t)]")
38
+ def cmd_arch(session: Session, args: str) -> str:
39
+ """ARCH(p) model for volatility clustering."""
40
+ df = session.require_data()
41
+ positional, opts = _stata_opts(args)
42
+ if not positional:
43
+ return "Usage: arch var [, p(1) dist(normal)]"
44
+
45
+ var = positional[0]
46
+ p = int(opts.get("p", 1))
47
+ dist = opts.get("dist", "normal")
48
+
49
+ try:
50
+ from openstat.stats.arch_garch import fit_arch
51
+ result = fit_arch(df, var, p=p, dist=dist)
52
+ session._last_model = result
53
+ return _arch_table(result)
54
+ except ImportError as e:
55
+ return str(e)
56
+ except Exception as exc:
57
+ return f"arch error: {exc}"
58
+
59
+
60
+ @command("garch", usage="garch var [, p(1) q(1) model(GARCH|EGARCH|GJR-GARCH) dist(normal)]")
61
+ def cmd_garch(session: Session, args: str) -> str:
62
+ """GARCH(p,q) or variant volatility model."""
63
+ df = session.require_data()
64
+ positional, opts = _stata_opts(args)
65
+ if not positional:
66
+ return "Usage: garch var [, p(1) q(1) model(GARCH) dist(normal)]"
67
+
68
+ var = positional[0]
69
+ p = int(opts.get("p", 1))
70
+ q = int(opts.get("q", 1))
71
+ dist = opts.get("dist", "normal")
72
+ model = opts.get("model", "GARCH")
73
+
74
+ try:
75
+ from openstat.stats.arch_garch import fit_garch
76
+ result = fit_garch(df, var, p=p, q=q, dist=dist, model=model)
77
+ session._last_model = result
78
+ return _arch_table(result)
79
+ except ImportError as e:
80
+ return str(e)
81
+ except Exception as exc:
82
+ return f"garch error: {exc}"
@@ -0,0 +1,111 @@
1
+ """Association rules: Apriori / FP-Growth."""
2
+
3
+ from __future__ import annotations
4
+ from openstat.commands.base import command, CommandArgs, friendly_error
5
+ from openstat.session import Session
6
+
7
+
8
+ @command("arules", usage="arules <item_col> [<id_col>] [--minsup=0.05] [--minconf=0.5] [--algo=fpgrowth]")
9
+ def cmd_arules(session: Session, args: str) -> str:
10
+ """Association rule mining (Apriori / FP-Growth).
11
+
12
+ Mines frequent itemsets and generates association rules.
13
+ Expects one-row-per-transaction with an item column, OR
14
+ a transaction-id column + item column.
15
+
16
+ Options:
17
+ --minsup=<f> minimum support (0–1, default: 0.05)
18
+ --minconf=<f> minimum confidence (0–1, default: 0.5)
19
+ --minlift=<f> minimum lift (default: 1.0)
20
+ --algo=<a> apriori or fpgrowth (default: fpgrowth)
21
+ --top=<n> show top N rules (default: 20)
22
+
23
+ Examples:
24
+ arules item transaction_id --minsup=0.1 --minconf=0.6
25
+ arules product --minsup=0.05 --algo=apriori --top=10
26
+ """
27
+ try:
28
+ from mlxtend.frequent_patterns import fpgrowth, apriori, association_rules
29
+ from mlxtend.preprocessing import TransactionEncoder
30
+ except ImportError:
31
+ return "mlxtend required. Install: pip install mlxtend"
32
+
33
+ import polars as pl
34
+ import pandas as pd
35
+
36
+ ca = CommandArgs(args)
37
+ if not ca.positional:
38
+ return "Usage: arules <item_col> [<id_col>]"
39
+
40
+ item_col = ca.positional[0]
41
+ id_col = ca.positional[1] if len(ca.positional) > 1 else None
42
+ min_sup = float(ca.options.get("minsup", 0.05))
43
+ min_conf = float(ca.options.get("minconf", 0.5))
44
+ min_lift = float(ca.options.get("minlift", 1.0))
45
+ algo = ca.options.get("algo", "fpgrowth").lower()
46
+ top_n = int(ca.options.get("top", 20))
47
+
48
+ try:
49
+ df = session.require_data()
50
+ if item_col not in df.columns:
51
+ return f"Column not found: {item_col}"
52
+ if id_col and id_col not in df.columns:
53
+ return f"Column not found: {id_col}"
54
+
55
+ # Build transactions
56
+ if id_col:
57
+ # Group items by transaction id
58
+ transactions = (
59
+ df.select([id_col, item_col])
60
+ .group_by(id_col)
61
+ .agg(pl.col(item_col).cast(pl.Utf8).alias("items"))
62
+ ["items"].to_list()
63
+ )
64
+ else:
65
+ # Each row is a transaction; split by comma if needed
66
+ col_vals = df[item_col].cast(pl.Utf8).to_list()
67
+ transactions = [[v.strip() for v in row.split(",") if v.strip()]
68
+ for row in col_vals if row]
69
+
70
+ if len(transactions) < 2:
71
+ return "Need at least 2 transactions."
72
+
73
+ te = TransactionEncoder()
74
+ te_array = te.fit(transactions).transform(transactions)
75
+ basket_df = pd.DataFrame(te_array, columns=te.columns_)
76
+
77
+ if algo == "apriori":
78
+ frequent = apriori(basket_df, min_support=min_sup, use_colnames=True)
79
+ else:
80
+ frequent = fpgrowth(basket_df, min_support=min_sup, use_colnames=True)
81
+
82
+ if frequent.empty:
83
+ return f"No frequent itemsets found at min_support={min_sup}. Try lowering --minsup."
84
+
85
+ rules = association_rules(frequent, metric="confidence", min_threshold=min_conf)
86
+ rules = rules[rules["lift"] >= min_lift].sort_values("lift", ascending=False)
87
+
88
+ lines = [
89
+ f"Association Rules ({algo.upper()}) — {item_col}",
90
+ f" Transactions: {len(transactions)}, Items: {len(te.columns_)}",
91
+ f" min_support={min_sup}, min_confidence={min_conf}, min_lift={min_lift}",
92
+ f" Frequent itemsets: {len(frequent)}, Rules: {len(rules)}",
93
+ "",
94
+ ]
95
+
96
+ if rules.empty:
97
+ lines.append(" No rules found. Try lowering --minconf or --minlift.")
98
+ else:
99
+ lines.append(f" Top {min(top_n, len(rules))} rules by lift:")
100
+ lines.append(f" {'Antecedent':<30} {'Consequent':<20} {'Sup':>7} {'Conf':>7} {'Lift':>7}")
101
+ lines.append(" " + "-" * 75)
102
+ for _, row in rules.head(top_n).iterrows():
103
+ ant = ", ".join(sorted(row["antecedents"]))[:28]
104
+ con = ", ".join(sorted(row["consequents"]))[:18]
105
+ lines.append(
106
+ f" {ant:<30} {con:<20} {row['support']:>7.3f} {row['confidence']:>7.3f} {row['lift']:>7.3f}"
107
+ )
108
+
109
+ return "\n".join(lines)
110
+ except Exception as e:
111
+ return friendly_error(e, "arules")