openstat-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. openstat/__init__.py +3 -0
  2. openstat/__main__.py +4 -0
  3. openstat/backends/__init__.py +16 -0
  4. openstat/backends/duckdb_backend.py +70 -0
  5. openstat/backends/polars_backend.py +52 -0
  6. openstat/cli.py +92 -0
  7. openstat/commands/__init__.py +82 -0
  8. openstat/commands/adv_stat_cmds.py +1255 -0
  9. openstat/commands/advanced_ml_cmds.py +576 -0
  10. openstat/commands/advreg_cmds.py +207 -0
  11. openstat/commands/alias_cmds.py +135 -0
  12. openstat/commands/arch_cmds.py +82 -0
  13. openstat/commands/arules_cmds.py +111 -0
  14. openstat/commands/automodel_cmds.py +212 -0
  15. openstat/commands/backend_cmds.py +82 -0
  16. openstat/commands/base.py +170 -0
  17. openstat/commands/bayes_cmds.py +71 -0
  18. openstat/commands/causal_cmds.py +269 -0
  19. openstat/commands/cluster_cmds.py +152 -0
  20. openstat/commands/data_cmds.py +996 -0
  21. openstat/commands/datamanip_cmds.py +672 -0
  22. openstat/commands/dataquality_cmds.py +174 -0
  23. openstat/commands/datetime_cmds.py +176 -0
  24. openstat/commands/dimreduce_cmds.py +184 -0
  25. openstat/commands/discrete_cmds.py +149 -0
  26. openstat/commands/dsl_cmds.py +143 -0
  27. openstat/commands/epi_cmds.py +93 -0
  28. openstat/commands/equiv_tobit_cmds.py +94 -0
  29. openstat/commands/esttab_cmds.py +196 -0
  30. openstat/commands/export_beamer_cmds.py +142 -0
  31. openstat/commands/export_cmds.py +201 -0
  32. openstat/commands/export_extra_cmds.py +240 -0
  33. openstat/commands/factor_cmds.py +180 -0
  34. openstat/commands/groupby_cmds.py +155 -0
  35. openstat/commands/help_cmds.py +237 -0
  36. openstat/commands/i18n_cmds.py +43 -0
  37. openstat/commands/import_extra_cmds.py +561 -0
  38. openstat/commands/influence_cmds.py +134 -0
  39. openstat/commands/iv_cmds.py +106 -0
  40. openstat/commands/manova_cmds.py +105 -0
  41. openstat/commands/mediate_cmds.py +233 -0
  42. openstat/commands/meta_cmds.py +284 -0
  43. openstat/commands/mi_cmds.py +228 -0
  44. openstat/commands/mixed_cmds.py +79 -0
  45. openstat/commands/mixture_changepoint_cmds.py +166 -0
  46. openstat/commands/ml_adv_cmds.py +147 -0
  47. openstat/commands/ml_cmds.py +178 -0
  48. openstat/commands/model_eval_cmds.py +142 -0
  49. openstat/commands/network_cmds.py +288 -0
  50. openstat/commands/nlquery_cmds.py +161 -0
  51. openstat/commands/nonparam_cmds.py +149 -0
  52. openstat/commands/outreg_cmds.py +247 -0
  53. openstat/commands/panel_cmds.py +141 -0
  54. openstat/commands/pdf_cmds.py +226 -0
  55. openstat/commands/pipeline_cmds.py +319 -0
  56. openstat/commands/plot_cmds.py +189 -0
  57. openstat/commands/plugin_cmds.py +79 -0
  58. openstat/commands/posthoc_cmds.py +153 -0
  59. openstat/commands/power_cmds.py +172 -0
  60. openstat/commands/profile_cmds.py +246 -0
  61. openstat/commands/rbridge_cmds.py +81 -0
  62. openstat/commands/regex_cmds.py +104 -0
  63. openstat/commands/report_cmds.py +48 -0
  64. openstat/commands/repro_cmds.py +129 -0
  65. openstat/commands/resampling_cmds.py +109 -0
  66. openstat/commands/reshape_cmds.py +223 -0
  67. openstat/commands/sem_cmds.py +177 -0
  68. openstat/commands/stat_cmds.py +1040 -0
  69. openstat/commands/stata_import_cmds.py +215 -0
  70. openstat/commands/string_cmds.py +124 -0
  71. openstat/commands/surv_cmds.py +145 -0
  72. openstat/commands/survey_cmds.py +153 -0
  73. openstat/commands/textanalysis_cmds.py +192 -0
  74. openstat/commands/ts_adv_cmds.py +136 -0
  75. openstat/commands/ts_cmds.py +195 -0
  76. openstat/commands/tui_cmds.py +111 -0
  77. openstat/commands/ux_cmds.py +191 -0
  78. openstat/commands/validate_cmds.py +270 -0
  79. openstat/commands/viz_adv_cmds.py +312 -0
  80. openstat/commands/viz_extra_cmds.py +251 -0
  81. openstat/commands/watch_cmds.py +69 -0
  82. openstat/config.py +106 -0
  83. openstat/dsl/__init__.py +0 -0
  84. openstat/dsl/parser.py +332 -0
  85. openstat/dsl/tokenizer.py +105 -0
  86. openstat/i18n.py +120 -0
  87. openstat/io/__init__.py +0 -0
  88. openstat/io/loader.py +187 -0
  89. openstat/jupyter/__init__.py +18 -0
  90. openstat/jupyter/display.py +18 -0
  91. openstat/jupyter/magic.py +60 -0
  92. openstat/logging_config.py +59 -0
  93. openstat/plots/__init__.py +0 -0
  94. openstat/plots/plotter.py +437 -0
  95. openstat/plots/surv_plots.py +32 -0
  96. openstat/plots/ts_plots.py +59 -0
  97. openstat/plugins/__init__.py +5 -0
  98. openstat/plugins/manager.py +69 -0
  99. openstat/repl.py +457 -0
  100. openstat/reporting/__init__.py +0 -0
  101. openstat/reporting/eda.py +208 -0
  102. openstat/reporting/report.py +67 -0
  103. openstat/script_runner.py +319 -0
  104. openstat/session.py +133 -0
  105. openstat/stats/__init__.py +0 -0
  106. openstat/stats/advanced_regression.py +269 -0
  107. openstat/stats/arch_garch.py +84 -0
  108. openstat/stats/bayesian.py +103 -0
  109. openstat/stats/causal.py +258 -0
  110. openstat/stats/clustering.py +206 -0
  111. openstat/stats/discrete.py +311 -0
  112. openstat/stats/epidemiology.py +119 -0
  113. openstat/stats/equiv_tobit.py +163 -0
  114. openstat/stats/factor.py +174 -0
  115. openstat/stats/imputation.py +282 -0
  116. openstat/stats/influence.py +78 -0
  117. openstat/stats/iv.py +131 -0
  118. openstat/stats/manova.py +124 -0
  119. openstat/stats/mixed.py +128 -0
  120. openstat/stats/ml.py +275 -0
  121. openstat/stats/ml_advanced.py +117 -0
  122. openstat/stats/model_eval.py +183 -0
  123. openstat/stats/models.py +1342 -0
  124. openstat/stats/nonparametric.py +130 -0
  125. openstat/stats/panel.py +179 -0
  126. openstat/stats/power.py +295 -0
  127. openstat/stats/resampling.py +203 -0
  128. openstat/stats/survey.py +213 -0
  129. openstat/stats/survival.py +196 -0
  130. openstat/stats/timeseries.py +142 -0
  131. openstat/stats/ts_advanced.py +114 -0
  132. openstat/types.py +11 -0
  133. openstat/web/__init__.py +1 -0
  134. openstat/web/app.py +117 -0
  135. openstat/web/session_manager.py +73 -0
  136. openstat/web/static/app.js +117 -0
  137. openstat/web/static/index.html +38 -0
  138. openstat/web/static/style.css +103 -0
  139. openstat_cli-1.0.0.dist-info/METADATA +748 -0
  140. openstat_cli-1.0.0.dist-info/RECORD +143 -0
  141. openstat_cli-1.0.0.dist-info/WHEEL +4 -0
  142. openstat_cli-1.0.0.dist-info/entry_points.txt +2 -0
  143. openstat_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,192 @@
1
+ """Text analysis: TF-IDF, word frequency, topic modeling (LDA)."""
2
+
3
+ from __future__ import annotations
4
+ from openstat.commands.base import command, CommandArgs, friendly_error
5
+ from openstat.session import Session
6
+
7
+
8
+ @command("textfreq", usage="textfreq <col> [--top=20] [--stopwords]")
9
+ def cmd_textfreq(session: Session, args: str) -> str:
10
+ """Word frequency analysis for a text column.
11
+
12
+ Options:
13
+ --top=<n> show top N words (default: 20)
14
+ --stopwords remove common English stopwords
15
+ --min=<n> minimum word length (default: 2)
16
+
17
+ Examples:
18
+ textfreq review_text --top=30 --stopwords
19
+ textfreq title --min=4
20
+ """
21
+ import re
22
+ from collections import Counter
23
+ import polars as pl
24
+
25
+ ca = CommandArgs(args)
26
+ if not ca.positional:
27
+ return "Usage: textfreq <col> [--top=20]"
28
+
29
+ col = ca.positional[0]
30
+ top_n = int(ca.options.get("top", 20))
31
+ use_stopwords = "stopwords" in ca.flags
32
+ min_len = int(ca.options.get("min", 2))
33
+
34
+ STOPWORDS = {
35
+ "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for",
36
+ "of", "with", "by", "from", "is", "was", "are", "were", "be", "been",
37
+ "have", "has", "had", "do", "does", "did", "will", "would", "could",
38
+ "should", "may", "might", "this", "that", "these", "those", "it", "its",
39
+ "i", "we", "you", "he", "she", "they", "my", "your", "our", "their",
40
+ "not", "no", "as", "if", "so", "than", "then", "when", "where", "how",
41
+ "what", "which", "who",
42
+ }
43
+
44
+ try:
45
+ df = session.require_data()
46
+ if col not in df.columns:
47
+ return f"Column not found: {col}"
48
+
49
+ texts = df[col].drop_nulls().cast(pl.Utf8).to_list()
50
+ all_words = []
51
+ for text in texts:
52
+ words = re.findall(r"[a-zA-Z]+", text.lower())
53
+ if min_len > 1:
54
+ words = [w for w in words if len(w) >= min_len]
55
+ if use_stopwords:
56
+ words = [w for w in words if w not in STOPWORDS]
57
+ all_words.extend(words)
58
+
59
+ if not all_words:
60
+ return "No words found after filtering."
61
+
62
+ counter = Counter(all_words)
63
+ most_common = counter.most_common(top_n)
64
+
65
+ lines = [f"Word Frequency — {col} (docs={len(texts)}, unique_words={len(counter)})", ""]
66
+ lines.append(f" {'Rank':<6} {'Word':<25} {'Count':>8} {'%':>7}")
67
+ lines.append(" " + "-" * 50)
68
+ total = len(all_words)
69
+ for rank, (word, cnt) in enumerate(most_common, 1):
70
+ lines.append(f" {rank:<6} {word:<25} {cnt:>8,} {100*cnt/total:>6.2f}%")
71
+ return "\n".join(lines)
72
+ except Exception as e:
73
+ return friendly_error(e, "textfreq")
74
+
75
+
76
+ @command("tfidf", usage="tfidf <col> [--top=20] [--max_features=1000]")
77
+ def cmd_tfidf(session: Session, args: str) -> str:
78
+ """TF-IDF analysis: identify most distinctive terms in a text column.
79
+
80
+ Options:
81
+ --top=<n> top N terms by mean TF-IDF score (default: 20)
82
+ --max_features=<n> vocabulary size limit (default: 1000)
83
+ --ngram_min=<n> minimum n-gram size (default: 1)
84
+ --ngram_max=<n> maximum n-gram size (default: 1)
85
+
86
+ Examples:
87
+ tfidf review_text --top=30
88
+ tfidf comments --ngram_min=2 --ngram_max=2 --top=15
89
+ """
90
+ try:
91
+ from sklearn.feature_extraction.text import TfidfVectorizer
92
+ except ImportError:
93
+ return "scikit-learn required. Install: pip install scikit-learn"
94
+
95
+ import polars as pl
96
+ import numpy as np
97
+
98
+ ca = CommandArgs(args)
99
+ if not ca.positional:
100
+ return "Usage: tfidf <col> [--top=20]"
101
+
102
+ col = ca.positional[0]
103
+ top_n = int(ca.options.get("top", 20))
104
+ max_features = int(ca.options.get("max_features", 1000))
105
+ ngram_min = int(ca.options.get("ngram_min", 1))
106
+ ngram_max = int(ca.options.get("ngram_max", 1))
107
+
108
+ try:
109
+ df = session.require_data()
110
+ if col not in df.columns:
111
+ return f"Column not found: {col}"
112
+
113
+ texts = df[col].drop_nulls().cast(pl.Utf8).to_list()
114
+ if len(texts) < 2:
115
+ return "Need at least 2 documents for TF-IDF."
116
+
117
+ vec = TfidfVectorizer(max_features=max_features, stop_words="english",
118
+ ngram_range=(ngram_min, ngram_max))
119
+ tfidf_matrix = vec.fit_transform(texts)
120
+ feature_names = vec.get_feature_names_out()
121
+ mean_scores = np.asarray(tfidf_matrix.mean(axis=0)).flatten()
122
+ top_idx = mean_scores.argsort()[::-1][:top_n]
123
+
124
+ lines = [f"TF-IDF Analysis — {col} (docs={len(texts)}, vocab={len(feature_names)})", ""]
125
+ lines.append(f" {'Rank':<6} {'Term':<30} {'Mean TF-IDF':>12}")
126
+ lines.append(" " + "-" * 52)
127
+ for rank, idx in enumerate(top_idx, 1):
128
+ lines.append(f" {rank:<6} {feature_names[idx]:<30} {mean_scores[idx]:>12.5f}")
129
+ return "\n".join(lines)
130
+ except Exception as e:
131
+ return friendly_error(e, "tfidf")
132
+
133
+
134
+ @command("lda", usage="lda <col> [--topics=5] [--words=10] [--iter=10]")
135
+ def cmd_lda(session: Session, args: str) -> str:
136
+ """Latent Dirichlet Allocation (LDA) topic modeling.
137
+
138
+ Options:
139
+ --topics=<n> number of topics (default: 5)
140
+ --words=<n> top words per topic to show (default: 10)
141
+ --iter=<n> max iterations (default: 10)
142
+ --max_features=<n> vocabulary size (default: 1000)
143
+
144
+ Examples:
145
+ lda review_text --topics=5
146
+ lda abstract --topics=8 --words=12
147
+ """
148
+ try:
149
+ from sklearn.decomposition import LatentDirichletAllocation
150
+ from sklearn.feature_extraction.text import CountVectorizer
151
+ except ImportError:
152
+ return "scikit-learn required. Install: pip install scikit-learn"
153
+
154
+ import polars as pl
155
+ import numpy as np
156
+
157
+ ca = CommandArgs(args)
158
+ if not ca.positional:
159
+ return "Usage: lda <col> [--topics=5]"
160
+
161
+ col = ca.positional[0]
162
+ n_topics = int(ca.options.get("topics", 5))
163
+ n_words = int(ca.options.get("words", 10))
164
+ n_iter = int(ca.options.get("iter", 10))
165
+ max_features = int(ca.options.get("max_features", 1000))
166
+
167
+ try:
168
+ df = session.require_data()
169
+ if col not in df.columns:
170
+ return f"Column not found: {col}"
171
+
172
+ texts = df[col].drop_nulls().cast(pl.Utf8).to_list()
173
+ if len(texts) < n_topics:
174
+ return f"Need at least {n_topics} documents (got {len(texts)})."
175
+
176
+ vec = CountVectorizer(max_features=max_features, stop_words="english", min_df=2)
177
+ dtm = vec.fit_transform(texts)
178
+ feature_names = vec.get_feature_names_out()
179
+
180
+ lda_model = LatentDirichletAllocation(
181
+ n_components=n_topics, max_iter=n_iter, random_state=42
182
+ )
183
+ lda_model.fit(dtm)
184
+
185
+ lines = [f"LDA Topic Modeling — {col} (docs={len(texts)}, topics={n_topics})", ""]
186
+ for topic_idx, topic in enumerate(lda_model.components_):
187
+ top_words_idx = topic.argsort()[::-1][:n_words]
188
+ top_words = [feature_names[i] for i in top_words_idx]
189
+ lines.append(f" Topic {topic_idx + 1}: {', '.join(top_words)}")
190
+ return "\n".join(lines)
191
+ except Exception as e:
192
+ return friendly_error(e, "lda")
@@ -0,0 +1,136 @@
1
+ """Advanced time-series commands: granger, johansen, vecm, stl, tssmooth."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ from openstat.commands.base import command
8
+ from openstat.session import Session
9
+
10
+
11
+ def _stata_opts(raw: str) -> tuple[list[str], dict[str, str]]:
12
+ opts: dict[str, str] = {}
13
+ for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
14
+ opts[m.group(1).lower()] = m.group(2)
15
+ rest = re.sub(r'\w+\([^)]*\)', '', raw)
16
+ positional = [t.strip(',') for t in rest.split() if t.strip(',')]
17
+ return positional, opts
18
+
19
+
20
+ @command("granger", usage="granger dep cause [maxlag(4)]")
21
+ def cmd_granger(session: Session, args: str) -> str:
22
+ """Granger causality test."""
23
+ from openstat.stats.ts_advanced import granger_causality
24
+ df = session.require_data()
25
+ positional, opts = _stata_opts(args)
26
+ if len(positional) < 2:
27
+ return "Usage: granger dep cause [maxlag(4)]"
28
+ dep, cause = positional[0], positional[1]
29
+ maxlag = int(opts.get("maxlag", 4))
30
+ try:
31
+ r = granger_causality(df, dep, cause, maxlag=maxlag)
32
+ lines = [f"\nGranger Causality: {cause} → {dep}", "-" * 50]
33
+ for lag, pval in r["lag_pvalues"].items():
34
+ lines.append(f" Lag {lag:2d}: F-test p-value = {pval:.4f}")
35
+ lines.append(f"\n Min p-value: {r['min_pvalue']:.4f} at lag {r['best_lag']}")
36
+ lines.append(f" Granger-causes at 5%: {'YES' if r['reject_null_5pct'] else 'NO'}")
37
+ return "\n".join(lines)
38
+ except Exception as exc:
39
+ return f"granger error: {exc}"
40
+
41
+
42
+ @command("johansen", usage="johansen var1 var2 [var3 ...] [lags(1)]")
43
+ def cmd_johansen(session: Session, args: str) -> str:
44
+ """Johansen cointegration test."""
45
+ from openstat.stats.ts_advanced import johansen_test
46
+ df = session.require_data()
47
+ positional, opts = _stata_opts(args)
48
+ cols = [c for c in positional if c in df.columns]
49
+ if len(cols) < 2:
50
+ return "johansen requires at least 2 variables."
51
+ k_ar_diff = int(opts.get("lags", 1))
52
+ try:
53
+ r = johansen_test(df, cols, k_ar_diff=k_ar_diff)
54
+ lines = ["\nJohansen Cointegration Test", "=" * 55]
55
+ lines.append(f" Variables: {', '.join(cols)}")
56
+ lines.append(f" Cointegrating vectors: {r['n_cointegrating_vectors']}")
57
+ lines.append("\n Trace Statistics:")
58
+ lines.append(f" {'r=0':>10} {'Statistic':>12} {'CV 95%':>10} {'CV 90%':>10}")
59
+ for i, (ts, cv95, cv90) in enumerate(zip(r["trace_statistics"], r["trace_cv_95"], r["trace_cv_90"])):
60
+ lines.append(f" r<={i:<8} {ts:>12.4f} {cv95:>10.4f} {cv90:>10.4f}")
61
+ return "\n".join(lines)
62
+ except Exception as exc:
63
+ return f"johansen error: {exc}"
64
+
65
+
66
+ @command("vecm", usage="vecm var1 var2 [var3 ...] [lags(1) rank(1)]")
67
+ def cmd_vecm(session: Session, args: str) -> str:
68
+ """Vector Error Correction Model."""
69
+ from openstat.stats.ts_advanced import fit_vecm
70
+ df = session.require_data()
71
+ positional, opts = _stata_opts(args)
72
+ cols = [c for c in positional if c in df.columns]
73
+ if len(cols) < 2:
74
+ return "vecm requires at least 2 variables."
75
+ k_ar_diff = int(opts.get("lags", 1))
76
+ coint_rank = int(opts.get("rank", 1))
77
+ try:
78
+ r = fit_vecm(df, cols, k_ar_diff=k_ar_diff, coint_rank=coint_rank)
79
+ session._last_model = r
80
+ lines = ["\nVECM Results", "=" * 50]
81
+ lines.append(f" Variables: {', '.join(cols)}")
82
+ lines.append(f" Cointegration rank: {coint_rank}, AR lags: {k_ar_diff}")
83
+ lines.append(f"\n Alpha (adjustment coefficients):")
84
+ for row in r["alpha"]:
85
+ lines.append(f" {row}")
86
+ return "\n".join(lines)
87
+ except Exception as exc:
88
+ return f"vecm error: {exc}"
89
+
90
+
91
+ @command("stl", usage="stl varname [period(12)]")
92
+ def cmd_stl(session: Session, args: str) -> str:
93
+ """STL decomposition (trend + seasonal + residual)."""
94
+ from openstat.stats.ts_advanced import stl_decompose
95
+ df = session.require_data()
96
+ positional, opts = _stata_opts(args)
97
+ if not positional:
98
+ return "Usage: stl varname [period(12)]"
99
+ col = positional[0]
100
+ if col not in df.columns:
101
+ return f"Column '{col}' not found."
102
+ period = int(opts.get("period", 12))
103
+ try:
104
+ r = stl_decompose(df, col, period=period)
105
+ lines = [f"\nSTL Decomposition: {col} (period={period})", "-" * 45]
106
+ lines.append(f" {'Strength of trend':<30} {r['strength_trend']:.4f}")
107
+ lines.append(f" {'Strength of seasonal':<30} {r['strength_seasonal']:.4f}")
108
+ lines.append(f" Trend range: [{min(r['trend']):.4f}, {max(r['trend']):.4f}]")
109
+ lines.append(f" Seasonal range: [{min(r['seasonal']):.4f}, {max(r['seasonal']):.4f}]")
110
+ session._last_model = r
111
+ return "\n".join(lines)
112
+ except Exception as exc:
113
+ return f"stl error: {exc}"
114
+
115
+
116
+ @command("tssmooth", usage="tssmooth varname [method(ma|exp) window(3) alpha(0.3)]")
117
+ def cmd_tssmooth(session: Session, args: str) -> str:
118
+ """Smooth a time series column."""
119
+ from openstat.stats.ts_advanced import tssmooth
120
+ df = session.require_data()
121
+ positional, opts = _stata_opts(args)
122
+ if not positional:
123
+ return "Usage: tssmooth varname [method(ma|exp) window(3) alpha(0.3)]"
124
+ col = positional[0]
125
+ if col not in df.columns:
126
+ return f"Column '{col}' not found."
127
+ method = opts.get("method", "ma")
128
+ window = int(opts.get("window", 3))
129
+ alpha = float(opts.get("alpha", 0.3))
130
+ session.snapshot()
131
+ try:
132
+ session.df = tssmooth(df, col, method=method, window=window, alpha=alpha)
133
+ new_col = f"{col}_smooth"
134
+ return f"Smoothed '{col}' → '{new_col}' using {method} (window={window}, alpha={alpha})"
135
+ except Exception as exc:
136
+ return f"tssmooth error: {exc}"
@@ -0,0 +1,195 @@
1
+ """Time series commands: tsset, arima, var, dfuller, forecast, irf."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ import numpy as np
8
+ import polars as pl
9
+ from rich.console import Console
10
+ from rich.table import Table
11
+
12
+ from openstat.session import Session, ModelResult
13
+ from openstat.commands.base import command, CommandArgs, rich_to_str, friendly_error
14
+ from openstat.dsl.parser import parse_formula, ParseError
15
+
16
+
17
+ @command("tsset", usage="tsset <time_var> [freq=M|Q|Y|D]")
18
+ def cmd_tsset(session: Session, args: str) -> str:
19
+ """Declare the time variable for time series analysis."""
20
+ df = session.require_data()
21
+ ca = CommandArgs(args)
22
+ if not ca.positional:
23
+ return "Usage: tsset <time_var> [freq=M|Q|Y|D]"
24
+
25
+ time_var = ca.positional[0]
26
+ if time_var not in df.columns:
27
+ return f"Column not found: {time_var}"
28
+
29
+ session._time_var = time_var
30
+ session._ts_freq = ca.get_option("freq")
31
+ n = df[time_var].n_unique()
32
+ return f"Time variable: {time_var} ({n} unique values)" + (
33
+ f"\nFrequency: {session._ts_freq}" if session._ts_freq else ""
34
+ )
35
+
36
+
37
+ @command("dfuller", usage="dfuller <variable>")
38
+ def cmd_dfuller(session: Session, args: str) -> str:
39
+ """Augmented Dickey-Fuller unit root test."""
40
+ df = session.require_data()
41
+ var_name = args.strip()
42
+ if not var_name:
43
+ return "Usage: dfuller <variable>"
44
+ if var_name not in df.columns:
45
+ return f"Column not found: {var_name}"
46
+
47
+ from openstat.stats.timeseries import adf_test
48
+ series = df[var_name].drop_nulls().to_numpy()
49
+ return adf_test(series, var_name)
50
+
51
+
52
+ @command("arima", usage="arima y [~ x1], order(p,d,q)")
53
+ def cmd_arima(session: Session, args: str) -> str:
54
+ """Fit ARIMA(p,d,q) or ARIMAX model."""
55
+ df = session.require_data()
56
+
57
+ # Parse order(p,d,q)
58
+ m = re.search(r'order\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)', args)
59
+ if not m:
60
+ return "Usage: arima y [~ x1], order(p,d,q)"
61
+ order = (int(m.group(1)), int(m.group(2)), int(m.group(3)))
62
+
63
+ # Remove order(...) from args for formula parsing
64
+ formula_str = args[:m.start()] + args[m.end():]
65
+ formula_str = formula_str.strip().rstrip(",").strip()
66
+
67
+ # Parse formula
68
+ if "~" in formula_str:
69
+ try:
70
+ dep, exog_vars = parse_formula(formula_str)
71
+ except ParseError as e:
72
+ return f"Formula error: {e}"
73
+ else:
74
+ parts = formula_str.split()
75
+ dep = parts[0] if parts else ""
76
+ exog_vars = None
77
+
78
+ if not dep or dep not in df.columns:
79
+ return f"Dependent variable not found: {dep}"
80
+
81
+ try:
82
+ from openstat.stats.timeseries import fit_arima
83
+
84
+ result, raw = fit_arima(df, dep, order, exog_vars, session._time_var)
85
+
86
+ session._last_model = raw
87
+ session._last_model_vars = (dep, exog_vars or [])
88
+ session._last_fit_result = result
89
+ session._last_fit_kwargs = {"order": order}
90
+
91
+ md = result.to_markdown() if hasattr(result, "to_markdown") else ""
92
+ session.results.append(ModelResult(
93
+ name=f"ARIMA{order}", formula=result.formula,
94
+ table=md, details={
95
+ "n_obs": result.n_obs,
96
+ "params": dict(result.params),
97
+ "aic": result.aic,
98
+ "bic": result.bic,
99
+ },
100
+ ))
101
+
102
+ output = result.summary_table()
103
+ if result.warnings:
104
+ output += "\n" + "\n".join(result.warnings)
105
+ return output
106
+ except Exception as e:
107
+ return friendly_error(e, "arima")
108
+
109
+
110
+ @command("var", usage="var y1 y2 [y3], lags(n)")
111
+ def cmd_var(session: Session, args: str) -> str:
112
+ """Fit a Vector Autoregression (VAR) model."""
113
+ df = session.require_data()
114
+
115
+ # Parse lags(n)
116
+ m = re.search(r'lags\((\d+)\)', args)
117
+ if not m:
118
+ return "Usage: var y1 y2, lags(n)"
119
+ lags = int(m.group(1))
120
+
121
+ # Parse variable list
122
+ var_str = args[:m.start()].strip().rstrip(",").strip()
123
+ variables = var_str.split()
124
+ if len(variables) < 2:
125
+ return "VAR requires at least 2 variables."
126
+
127
+ missing = [v for v in variables if v not in df.columns]
128
+ if missing:
129
+ return f"Columns not found: {', '.join(missing)}"
130
+
131
+ try:
132
+ from openstat.stats.timeseries import fit_var
133
+
134
+ summary, raw = fit_var(df, variables, lags, session._time_var)
135
+
136
+ session._last_model = raw
137
+ session._last_model_vars = (variables[0], variables[1:])
138
+ session._last_fit_kwargs = {"lags": lags, "variables": variables}
139
+
140
+ session.results.append(ModelResult(
141
+ name=f"VAR({lags})", formula=f"VAR({', '.join(variables)})",
142
+ table=summary, details={"lags": lags, "variables": variables},
143
+ ))
144
+
145
+ return summary
146
+ except Exception as e:
147
+ return friendly_error(e, "var")
148
+
149
+
150
+ @command("forecast", usage="forecast <steps>")
151
+ def cmd_forecast(session: Session, args: str) -> str:
152
+ """Generate forecasts from the last fitted time series model."""
153
+ if session._last_model is None:
154
+ return "No model fitted. Run arima or var first."
155
+
156
+ steps = int(args.strip()) if args.strip().isdigit() else 12
157
+
158
+ try:
159
+ from openstat.stats.timeseries import forecast_model
160
+ fc = forecast_model(session._last_model, steps)
161
+
162
+ def render(console: Console) -> None:
163
+ table = Table(title=f"Forecast ({steps} steps)")
164
+ table.add_column("Step", justify="right")
165
+ if fc.ndim == 1:
166
+ table.add_column("Forecast", justify="right")
167
+ for i, val in enumerate(fc, 1):
168
+ table.add_row(str(i), f"{val:.4f}")
169
+ else:
170
+ vars_list = session._last_fit_kwargs.get("variables", [])
171
+ for v in vars_list:
172
+ table.add_column(v, justify="right")
173
+ for i, row in enumerate(fc, 1):
174
+ table.add_row(str(i), *[f"{v:.4f}" for v in row])
175
+ console.print(table)
176
+
177
+ return rich_to_str(render)
178
+ except Exception as e:
179
+ return friendly_error(e, "forecast")
180
+
181
+
182
+ @command("irf", usage="irf [steps=N]")
183
+ def cmd_irf(session: Session, args: str) -> str:
184
+ """Impulse response functions for VAR model."""
185
+ if session._last_model is None:
186
+ return "No model fitted. Run var first."
187
+
188
+ ca = CommandArgs(args)
189
+ steps = int(ca.get_option("steps", "10"))
190
+
191
+ try:
192
+ from openstat.stats.timeseries import compute_irf
193
+ return compute_irf(session._last_model, steps)
194
+ except Exception as e:
195
+ return friendly_error(e, "irf")
@@ -0,0 +1,111 @@
1
+ """TUI dashboard command: dashboard (requires textual)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from openstat.commands.base import command
6
+ from openstat.session import Session
7
+
8
+
9
+ @command("dashboard", usage="dashboard")
10
+ def cmd_dashboard(session: Session, args: str) -> str:
11
+ """Launch an interactive TUI dashboard (requires: pip install textual).
12
+
13
+ Shows dataset overview, variable list, model results, and recent history
14
+ in a full-screen terminal UI. Press Q or Ctrl+C to exit.
15
+ """
16
+ try:
17
+ from textual.app import App, ComposeResult
18
+ from textual.widgets import (
19
+ DataTable, Footer, Header, Label, RichLog, TabbedContent, TabPane,
20
+ )
21
+ from textual.binding import Binding
22
+ except ImportError:
23
+ return (
24
+ "textual is required for the dashboard.\n"
25
+ "Install: pip install textual"
26
+ )
27
+
28
+ import polars as pl
29
+
30
+ # ── Snapshot data so the TUI doesn't need the live session ──────────────
31
+ dataset_name = session.dataset_name or "(no dataset)"
32
+ shape_str = session.shape_str
33
+ df = session.df
34
+ results = list(session.results)
35
+ history = list(session.history[-50:]) # last 50 commands
36
+
37
+ # Build column summary
38
+ col_rows: list[tuple[str, ...]] = []
39
+ if df is not None:
40
+ NUMERIC = (pl.Float32, pl.Float64, pl.Int8, pl.Int16, pl.Int32, pl.Int64,
41
+ pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64)
42
+ for c in df.columns:
43
+ dtype = str(df[c].dtype)
44
+ n_miss = str(df[c].null_count())
45
+ if df[c].dtype in NUMERIC:
46
+ col_data = df[c].drop_nulls()
47
+ if col_data.len() > 0:
48
+ mean_str = f"{col_data.mean():.3f}"
49
+ sd_str = f"{col_data.std():.3f}" if col_data.len() > 1 else "—"
50
+ else:
51
+ mean_str = sd_str = "—"
52
+ else:
53
+ mean_str = sd_str = "—"
54
+ col_rows.append((c, dtype, n_miss, mean_str, sd_str))
55
+
56
+ class OpenStatDashboard(App):
57
+ TITLE = f"OpenStat Dashboard — {dataset_name}"
58
+ BINDINGS = [
59
+ Binding("q", "quit", "Quit"),
60
+ Binding("ctrl+c", "quit", "Quit"),
61
+ ]
62
+ CSS = """
63
+ Screen { background: #1a1a2e; }
64
+ Header { background: #16213e; color: #e94560; }
65
+ Footer { background: #16213e; color: #a8b2d8; }
66
+ TabbedContent { background: #1a1a2e; }
67
+ TabPane { padding: 1 2; }
68
+ DataTable { height: auto; }
69
+ Label { color: #cdd6f4; margin: 0 0 1 0; }
70
+ RichLog { height: 30; border: solid #313244; background: #181825; }
71
+ """
72
+
73
+ def compose(self) -> ComposeResult:
74
+ yield Header()
75
+ with TabbedContent():
76
+ with TabPane("Overview", id="overview"):
77
+ yield Label(f"[bold]Dataset:[/bold] {dataset_name} | [bold]Shape:[/bold] {shape_str}")
78
+ yield Label(
79
+ f"[bold]Models fitted:[/bold] {len(results)} | "
80
+ f"[bold]Commands run:[/bold] {len(session.history)}"
81
+ )
82
+
83
+ with TabPane("Variables", id="variables"):
84
+ tbl = DataTable(zebra_stripes=True)
85
+ tbl.add_columns("Variable", "Type", "Missing", "Mean", "SD")
86
+ for row in col_rows:
87
+ tbl.add_row(*row)
88
+ yield tbl
89
+
90
+ with TabPane("Models", id="models"):
91
+ if results:
92
+ log = RichLog(markup=True, highlight=True)
93
+ for mr in results:
94
+ log.write(f"[bold cyan]{mr.name}: {mr.formula}[/bold cyan]")
95
+ log.write(mr.table)
96
+ log.write("")
97
+ yield log
98
+ else:
99
+ yield Label("No models fitted yet.")
100
+
101
+ with TabPane("History", id="history"):
102
+ log = RichLog(markup=False, highlight=False)
103
+ for cmd_line in history:
104
+ log.write(f". {cmd_line}")
105
+ yield log
106
+
107
+ yield Footer()
108
+
109
+ app = OpenStatDashboard()
110
+ app.run()
111
+ return "Dashboard closed."