openstat-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstat/__init__.py +3 -0
- openstat/__main__.py +4 -0
- openstat/backends/__init__.py +16 -0
- openstat/backends/duckdb_backend.py +70 -0
- openstat/backends/polars_backend.py +52 -0
- openstat/cli.py +92 -0
- openstat/commands/__init__.py +82 -0
- openstat/commands/adv_stat_cmds.py +1255 -0
- openstat/commands/advanced_ml_cmds.py +576 -0
- openstat/commands/advreg_cmds.py +207 -0
- openstat/commands/alias_cmds.py +135 -0
- openstat/commands/arch_cmds.py +82 -0
- openstat/commands/arules_cmds.py +111 -0
- openstat/commands/automodel_cmds.py +212 -0
- openstat/commands/backend_cmds.py +82 -0
- openstat/commands/base.py +170 -0
- openstat/commands/bayes_cmds.py +71 -0
- openstat/commands/causal_cmds.py +269 -0
- openstat/commands/cluster_cmds.py +152 -0
- openstat/commands/data_cmds.py +996 -0
- openstat/commands/datamanip_cmds.py +672 -0
- openstat/commands/dataquality_cmds.py +174 -0
- openstat/commands/datetime_cmds.py +176 -0
- openstat/commands/dimreduce_cmds.py +184 -0
- openstat/commands/discrete_cmds.py +149 -0
- openstat/commands/dsl_cmds.py +143 -0
- openstat/commands/epi_cmds.py +93 -0
- openstat/commands/equiv_tobit_cmds.py +94 -0
- openstat/commands/esttab_cmds.py +196 -0
- openstat/commands/export_beamer_cmds.py +142 -0
- openstat/commands/export_cmds.py +201 -0
- openstat/commands/export_extra_cmds.py +240 -0
- openstat/commands/factor_cmds.py +180 -0
- openstat/commands/groupby_cmds.py +155 -0
- openstat/commands/help_cmds.py +237 -0
- openstat/commands/i18n_cmds.py +43 -0
- openstat/commands/import_extra_cmds.py +561 -0
- openstat/commands/influence_cmds.py +134 -0
- openstat/commands/iv_cmds.py +106 -0
- openstat/commands/manova_cmds.py +105 -0
- openstat/commands/mediate_cmds.py +233 -0
- openstat/commands/meta_cmds.py +284 -0
- openstat/commands/mi_cmds.py +228 -0
- openstat/commands/mixed_cmds.py +79 -0
- openstat/commands/mixture_changepoint_cmds.py +166 -0
- openstat/commands/ml_adv_cmds.py +147 -0
- openstat/commands/ml_cmds.py +178 -0
- openstat/commands/model_eval_cmds.py +142 -0
- openstat/commands/network_cmds.py +288 -0
- openstat/commands/nlquery_cmds.py +161 -0
- openstat/commands/nonparam_cmds.py +149 -0
- openstat/commands/outreg_cmds.py +247 -0
- openstat/commands/panel_cmds.py +141 -0
- openstat/commands/pdf_cmds.py +226 -0
- openstat/commands/pipeline_cmds.py +319 -0
- openstat/commands/plot_cmds.py +189 -0
- openstat/commands/plugin_cmds.py +79 -0
- openstat/commands/posthoc_cmds.py +153 -0
- openstat/commands/power_cmds.py +172 -0
- openstat/commands/profile_cmds.py +246 -0
- openstat/commands/rbridge_cmds.py +81 -0
- openstat/commands/regex_cmds.py +104 -0
- openstat/commands/report_cmds.py +48 -0
- openstat/commands/repro_cmds.py +129 -0
- openstat/commands/resampling_cmds.py +109 -0
- openstat/commands/reshape_cmds.py +223 -0
- openstat/commands/sem_cmds.py +177 -0
- openstat/commands/stat_cmds.py +1040 -0
- openstat/commands/stata_import_cmds.py +215 -0
- openstat/commands/string_cmds.py +124 -0
- openstat/commands/surv_cmds.py +145 -0
- openstat/commands/survey_cmds.py +153 -0
- openstat/commands/textanalysis_cmds.py +192 -0
- openstat/commands/ts_adv_cmds.py +136 -0
- openstat/commands/ts_cmds.py +195 -0
- openstat/commands/tui_cmds.py +111 -0
- openstat/commands/ux_cmds.py +191 -0
- openstat/commands/validate_cmds.py +270 -0
- openstat/commands/viz_adv_cmds.py +312 -0
- openstat/commands/viz_extra_cmds.py +251 -0
- openstat/commands/watch_cmds.py +69 -0
- openstat/config.py +106 -0
- openstat/dsl/__init__.py +0 -0
- openstat/dsl/parser.py +332 -0
- openstat/dsl/tokenizer.py +105 -0
- openstat/i18n.py +120 -0
- openstat/io/__init__.py +0 -0
- openstat/io/loader.py +187 -0
- openstat/jupyter/__init__.py +18 -0
- openstat/jupyter/display.py +18 -0
- openstat/jupyter/magic.py +60 -0
- openstat/logging_config.py +59 -0
- openstat/plots/__init__.py +0 -0
- openstat/plots/plotter.py +437 -0
- openstat/plots/surv_plots.py +32 -0
- openstat/plots/ts_plots.py +59 -0
- openstat/plugins/__init__.py +5 -0
- openstat/plugins/manager.py +69 -0
- openstat/repl.py +457 -0
- openstat/reporting/__init__.py +0 -0
- openstat/reporting/eda.py +208 -0
- openstat/reporting/report.py +67 -0
- openstat/script_runner.py +319 -0
- openstat/session.py +133 -0
- openstat/stats/__init__.py +0 -0
- openstat/stats/advanced_regression.py +269 -0
- openstat/stats/arch_garch.py +84 -0
- openstat/stats/bayesian.py +103 -0
- openstat/stats/causal.py +258 -0
- openstat/stats/clustering.py +206 -0
- openstat/stats/discrete.py +311 -0
- openstat/stats/epidemiology.py +119 -0
- openstat/stats/equiv_tobit.py +163 -0
- openstat/stats/factor.py +174 -0
- openstat/stats/imputation.py +282 -0
- openstat/stats/influence.py +78 -0
- openstat/stats/iv.py +131 -0
- openstat/stats/manova.py +124 -0
- openstat/stats/mixed.py +128 -0
- openstat/stats/ml.py +275 -0
- openstat/stats/ml_advanced.py +117 -0
- openstat/stats/model_eval.py +183 -0
- openstat/stats/models.py +1342 -0
- openstat/stats/nonparametric.py +130 -0
- openstat/stats/panel.py +179 -0
- openstat/stats/power.py +295 -0
- openstat/stats/resampling.py +203 -0
- openstat/stats/survey.py +213 -0
- openstat/stats/survival.py +196 -0
- openstat/stats/timeseries.py +142 -0
- openstat/stats/ts_advanced.py +114 -0
- openstat/types.py +11 -0
- openstat/web/__init__.py +1 -0
- openstat/web/app.py +117 -0
- openstat/web/session_manager.py +73 -0
- openstat/web/static/app.js +117 -0
- openstat/web/static/index.html +38 -0
- openstat/web/static/style.css +103 -0
- openstat_cli-1.0.0.dist-info/METADATA +748 -0
- openstat_cli-1.0.0.dist-info/RECORD +143 -0
- openstat_cli-1.0.0.dist-info/WHEEL +4 -0
- openstat_cli-1.0.0.dist-info/entry_points.txt +2 -0
- openstat_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""Time series analysis: ARIMA, VAR, ADF test, ACF/PACF, forecasting."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import polars as pl
|
|
7
|
+
import statsmodels.api as sm
|
|
8
|
+
from scipy import stats as sp_stats
|
|
9
|
+
|
|
10
|
+
from openstat.stats.models import FitResult
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def adf_test(series: np.ndarray, variable_name: str = "y") -> str:
|
|
14
|
+
"""Augmented Dickey-Fuller unit root test."""
|
|
15
|
+
from statsmodels.tsa.stattools import adfuller
|
|
16
|
+
|
|
17
|
+
result = adfuller(series, autolag="AIC")
|
|
18
|
+
adf_stat, p_value, used_lag, nobs, crit_values, icbest = result
|
|
19
|
+
|
|
20
|
+
lines = [
|
|
21
|
+
f"Augmented Dickey-Fuller Test: {variable_name}",
|
|
22
|
+
f" ADF Statistic: {adf_stat:.4f}",
|
|
23
|
+
f" p-value: {p_value:.4f}",
|
|
24
|
+
f" Lags used: {used_lag}",
|
|
25
|
+
f" Observations: {nobs}",
|
|
26
|
+
" Critical Values:",
|
|
27
|
+
]
|
|
28
|
+
for level, val in crit_values.items():
|
|
29
|
+
lines.append(f" {level}: {val:.4f}")
|
|
30
|
+
|
|
31
|
+
if p_value < 0.05:
|
|
32
|
+
lines.append(" ✓ Reject H0: Series is stationary")
|
|
33
|
+
else:
|
|
34
|
+
lines.append(" ⚠ Cannot reject H0: Series has a unit root (non-stationary)")
|
|
35
|
+
|
|
36
|
+
return "\n".join(lines)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def fit_arima(
|
|
40
|
+
df: pl.DataFrame,
|
|
41
|
+
dep: str,
|
|
42
|
+
order: tuple[int, int, int],
|
|
43
|
+
exog_vars: list[str] | None = None,
|
|
44
|
+
time_var: str | None = None,
|
|
45
|
+
) -> tuple[FitResult, object]:
|
|
46
|
+
"""Fit an ARIMA(p,d,q) model, optionally with exogenous variables (ARIMAX)."""
|
|
47
|
+
from statsmodels.tsa.arima.model import ARIMA
|
|
48
|
+
|
|
49
|
+
pdf = df.to_pandas()
|
|
50
|
+
if time_var and time_var in pdf.columns:
|
|
51
|
+
pdf = pdf.sort_values(time_var)
|
|
52
|
+
try:
|
|
53
|
+
pdf = pdf.set_index(time_var)
|
|
54
|
+
except Exception:
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
endog = pdf[dep].dropna()
|
|
58
|
+
exog = pdf[exog_vars].loc[endog.index] if exog_vars else None
|
|
59
|
+
|
|
60
|
+
model = ARIMA(endog, exog=exog, order=order)
|
|
61
|
+
result = model.fit()
|
|
62
|
+
|
|
63
|
+
# Build FitResult
|
|
64
|
+
params = {name: float(val) for name, val in result.params.items()}
|
|
65
|
+
bse = result.bse
|
|
66
|
+
std_errors = {name: float(bse[name]) for name in params}
|
|
67
|
+
z_values = {name: float(result.tvalues[name]) for name in params}
|
|
68
|
+
p_values = {name: float(result.pvalues[name]) for name in params}
|
|
69
|
+
ci = result.conf_int()
|
|
70
|
+
conf_low = {name: float(ci.loc[name, 0]) for name in params}
|
|
71
|
+
conf_high = {name: float(ci.loc[name, 1]) for name in params}
|
|
72
|
+
|
|
73
|
+
warnings_list = [
|
|
74
|
+
f"Order: ARIMA{order}",
|
|
75
|
+
f"AIC: {result.aic:.1f}",
|
|
76
|
+
f"BIC: {result.bic:.1f}",
|
|
77
|
+
]
|
|
78
|
+
|
|
79
|
+
fit = FitResult(
|
|
80
|
+
model_type=f"ARIMA{order}",
|
|
81
|
+
formula=f"{dep} ~ ARIMA{order}" + (f" + {' + '.join(exog_vars)}" if exog_vars else ""),
|
|
82
|
+
dep_var=dep,
|
|
83
|
+
indep_vars=list(params.keys()),
|
|
84
|
+
n_obs=int(result.nobs),
|
|
85
|
+
params=params,
|
|
86
|
+
std_errors=std_errors,
|
|
87
|
+
t_values=z_values,
|
|
88
|
+
p_values=p_values,
|
|
89
|
+
conf_int_low=conf_low,
|
|
90
|
+
conf_int_high=conf_high,
|
|
91
|
+
aic=float(result.aic),
|
|
92
|
+
bic=float(result.bic),
|
|
93
|
+
log_likelihood=float(result.llf),
|
|
94
|
+
warnings=warnings_list,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
return fit, result
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def fit_var(
|
|
101
|
+
df: pl.DataFrame,
|
|
102
|
+
variables: list[str],
|
|
103
|
+
lags: int,
|
|
104
|
+
time_var: str | None = None,
|
|
105
|
+
) -> tuple[str, object]:
|
|
106
|
+
"""Fit a Vector Autoregression (VAR) model.
|
|
107
|
+
|
|
108
|
+
Returns summary string and raw result.
|
|
109
|
+
"""
|
|
110
|
+
from statsmodels.tsa.api import VAR
|
|
111
|
+
|
|
112
|
+
pdf = df.to_pandas()
|
|
113
|
+
if time_var and time_var in pdf.columns:
|
|
114
|
+
pdf = pdf.sort_values(time_var)
|
|
115
|
+
|
|
116
|
+
data = pdf[variables].dropna()
|
|
117
|
+
model = VAR(data)
|
|
118
|
+
result = model.fit(maxlags=lags)
|
|
119
|
+
|
|
120
|
+
summary = str(result.summary())
|
|
121
|
+
return summary, result
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def forecast_model(result, steps: int) -> np.ndarray:
|
|
125
|
+
"""Generate forecasts from a fitted time series model."""
|
|
126
|
+
if hasattr(result, 'forecast'):
|
|
127
|
+
# ARIMA result
|
|
128
|
+
fc = result.forecast(steps=steps)
|
|
129
|
+
return np.array(fc)
|
|
130
|
+
elif hasattr(result, 'forecast') and hasattr(result, 'endog'):
|
|
131
|
+
# VAR result
|
|
132
|
+
fc = result.forecast(result.endog[-result.k_ar:], steps=steps)
|
|
133
|
+
return fc
|
|
134
|
+
raise ValueError("Model does not support forecasting")
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def compute_irf(var_result, steps: int = 10) -> str:
|
|
138
|
+
"""Compute impulse response functions for VAR model."""
|
|
139
|
+
irf = var_result.irf(steps)
|
|
140
|
+
lines = ["Impulse Response Functions:"]
|
|
141
|
+
lines.append(str(irf.summary()))
|
|
142
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""Advanced time-series: Granger causality, VECM, Johansen cointegration, STL, tssmooth."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import polars as pl
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def granger_causality(df: pl.DataFrame, dep: str, cause: str, maxlag: int = 4) -> dict:
|
|
10
|
+
"""Granger causality test: does 'cause' Granger-cause 'dep'?"""
|
|
11
|
+
from statsmodels.tsa.stattools import grangercausalitytests
|
|
12
|
+
sub = df.select([dep, cause]).drop_nulls()
|
|
13
|
+
data = sub.to_numpy()
|
|
14
|
+
results = grangercausalitytests(data, maxlag=maxlag, verbose=False)
|
|
15
|
+
# Collect F-test p-values per lag
|
|
16
|
+
lag_pvals = {}
|
|
17
|
+
for lag, res in results.items():
|
|
18
|
+
lag_pvals[lag] = float(res[0]["ssr_ftest"][1])
|
|
19
|
+
min_pval = min(lag_pvals.values())
|
|
20
|
+
best_lag = min(lag_pvals, key=lag_pvals.get)
|
|
21
|
+
return {
|
|
22
|
+
"test": "Granger Causality",
|
|
23
|
+
"dep": dep,
|
|
24
|
+
"cause": cause,
|
|
25
|
+
"maxlag": maxlag,
|
|
26
|
+
"lag_pvalues": lag_pvals,
|
|
27
|
+
"min_pvalue": min_pval,
|
|
28
|
+
"best_lag": best_lag,
|
|
29
|
+
"reject_null_5pct": min_pval < 0.05,
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def johansen_test(df: pl.DataFrame, cols: list[str], det_order: int = -1, k_ar_diff: int = 1) -> dict:
|
|
34
|
+
"""Johansen cointegration test."""
|
|
35
|
+
from statsmodels.tsa.vector_ar.vecm import coint_johansen
|
|
36
|
+
sub = df.select(cols).drop_nulls()
|
|
37
|
+
data = sub.to_numpy()
|
|
38
|
+
result = coint_johansen(data, det_order, k_ar_diff)
|
|
39
|
+
trace_stat = result.lr1.tolist()
|
|
40
|
+
trace_cv_90 = result.cvt[:, 0].tolist()
|
|
41
|
+
trace_cv_95 = result.cvt[:, 1].tolist()
|
|
42
|
+
max_stat = result.lr2.tolist()
|
|
43
|
+
max_cv_95 = result.cvm[:, 1].tolist()
|
|
44
|
+
n_cointegrated = int(np.sum(np.array(trace_stat) > np.array(trace_cv_95)))
|
|
45
|
+
return {
|
|
46
|
+
"test": "Johansen Cointegration",
|
|
47
|
+
"cols": cols,
|
|
48
|
+
"trace_statistics": trace_stat,
|
|
49
|
+
"trace_cv_95": trace_cv_95,
|
|
50
|
+
"trace_cv_90": trace_cv_90,
|
|
51
|
+
"max_eigen_statistics": max_stat,
|
|
52
|
+
"max_eigen_cv_95": max_cv_95,
|
|
53
|
+
"n_cointegrating_vectors": n_cointegrated,
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def fit_vecm(df: pl.DataFrame, cols: list[str], k_ar_diff: int = 1, coint_rank: int = 1) -> dict:
|
|
58
|
+
"""Vector Error Correction Model."""
|
|
59
|
+
from statsmodels.tsa.vector_ar.vecm import VECM
|
|
60
|
+
sub = df.select(cols).drop_nulls()
|
|
61
|
+
data = sub.to_numpy()
|
|
62
|
+
model = VECM(data, k_ar_diff=k_ar_diff, coint_rank=coint_rank).fit()
|
|
63
|
+
return {
|
|
64
|
+
"test": "VECM",
|
|
65
|
+
"cols": cols,
|
|
66
|
+
"k_ar_diff": k_ar_diff,
|
|
67
|
+
"coint_rank": coint_rank,
|
|
68
|
+
"alpha": model.alpha.tolist(),
|
|
69
|
+
"beta": model.beta.tolist(),
|
|
70
|
+
"gamma": model.gamma.tolist() if hasattr(model, "gamma") else None,
|
|
71
|
+
"det_coef": model.det_coef.tolist() if hasattr(model, "det_coef") else None,
|
|
72
|
+
"llf": float(model.llf) if hasattr(model, "llf") else None,
|
|
73
|
+
"_model": model,
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def stl_decompose(df: pl.DataFrame, col: str, period: int = 12) -> dict:
|
|
78
|
+
"""STL decomposition: trend + seasonal + residual."""
|
|
79
|
+
from statsmodels.tsa.seasonal import STL
|
|
80
|
+
sub = df.select([col]).drop_nulls()
|
|
81
|
+
y = sub[col].to_numpy().astype(float)
|
|
82
|
+
stl = STL(y, period=period).fit()
|
|
83
|
+
return {
|
|
84
|
+
"test": "STL Decomposition",
|
|
85
|
+
"col": col,
|
|
86
|
+
"period": period,
|
|
87
|
+
"trend": stl.trend.tolist(),
|
|
88
|
+
"seasonal": stl.seasonal.tolist(),
|
|
89
|
+
"resid": stl.resid.tolist(),
|
|
90
|
+
"strength_trend": float(1 - np.var(stl.resid) / np.var(stl.trend + stl.resid)),
|
|
91
|
+
"strength_seasonal": float(1 - np.var(stl.resid) / np.var(stl.seasonal + stl.resid)),
|
|
92
|
+
"_model": stl,
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def tssmooth(df: pl.DataFrame, col: str, method: str = "ma", window: int = 3, alpha: float = 0.3) -> pl.DataFrame:
|
|
97
|
+
"""Smooth a time series: moving average (ma) or exponential smoothing (exp)."""
|
|
98
|
+
series = df[col].to_numpy().astype(float)
|
|
99
|
+
n = len(series)
|
|
100
|
+
if method == "ma":
|
|
101
|
+
smoothed = np.convolve(series, np.ones(window) / window, mode="same")
|
|
102
|
+
# Fix edges
|
|
103
|
+
for i in range(window // 2):
|
|
104
|
+
smoothed[i] = np.mean(series[:i + window // 2 + 1])
|
|
105
|
+
smoothed[n - 1 - i] = np.mean(series[n - i - window // 2 - 1:])
|
|
106
|
+
elif method == "exp":
|
|
107
|
+
smoothed = np.zeros(n)
|
|
108
|
+
smoothed[0] = series[0]
|
|
109
|
+
for t in range(1, n):
|
|
110
|
+
smoothed[t] = alpha * series[t] + (1 - alpha) * smoothed[t - 1]
|
|
111
|
+
else:
|
|
112
|
+
raise ValueError(f"Unknown smoothing method: {method}. Use 'ma' or 'exp'.")
|
|
113
|
+
new_col = f"{col}_smooth"
|
|
114
|
+
return df.with_columns(pl.Series(new_col, smoothed))
|
openstat/types.py
ADDED
openstat/web/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Web-based GUI for OpenStat using FastAPI and WebSocket."""
|
openstat/web/app.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""FastAPI application for OpenStat web GUI."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import base64
|
|
6
|
+
import json
|
|
7
|
+
import tempfile
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
from fastapi import FastAPI, WebSocket, WebSocketDisconnect, UploadFile, File
|
|
12
|
+
from fastapi.staticfiles import StaticFiles
|
|
13
|
+
from fastapi.responses import HTMLResponse
|
|
14
|
+
HAS_FASTAPI = True
|
|
15
|
+
except ImportError:
|
|
16
|
+
HAS_FASTAPI = False
|
|
17
|
+
|
|
18
|
+
from openstat.web.session_manager import SessionManager
|
|
19
|
+
from openstat.logging_config import get_logger
|
|
20
|
+
|
|
21
|
+
log = get_logger("web")
|
|
22
|
+
|
|
23
|
+
if HAS_FASTAPI:
|
|
24
|
+
app = FastAPI(title="OpenStat Web", version="0.3.0")
|
|
25
|
+
sessions = SessionManager()
|
|
26
|
+
|
|
27
|
+
# Serve static files
|
|
28
|
+
static_dir = Path(__file__).parent / "static"
|
|
29
|
+
if static_dir.exists():
|
|
30
|
+
app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")
|
|
31
|
+
|
|
32
|
+
@app.get("/", response_class=HTMLResponse)
|
|
33
|
+
async def index():
|
|
34
|
+
"""Serve the main HTML page."""
|
|
35
|
+
html_path = static_dir / "index.html"
|
|
36
|
+
if html_path.exists():
|
|
37
|
+
return html_path.read_text()
|
|
38
|
+
return "<h1>OpenStat Web</h1><p>Static files not found.</p>"
|
|
39
|
+
|
|
40
|
+
@app.post("/api/session")
|
|
41
|
+
async def create_session():
|
|
42
|
+
"""Create a new analysis session."""
|
|
43
|
+
session_id = sessions.create()
|
|
44
|
+
return {"session_id": session_id}
|
|
45
|
+
|
|
46
|
+
@app.post("/api/upload/{session_id}")
|
|
47
|
+
async def upload_file(session_id: str, file: UploadFile = File(...)):
|
|
48
|
+
"""Upload a data file to a session."""
|
|
49
|
+
session = sessions.get(session_id)
|
|
50
|
+
if session is None:
|
|
51
|
+
return {"error": "Session not found"}
|
|
52
|
+
|
|
53
|
+
# Save to temp file
|
|
54
|
+
suffix = Path(file.filename).suffix if file.filename else ".csv"
|
|
55
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
|
56
|
+
content = await file.read()
|
|
57
|
+
tmp.write(content)
|
|
58
|
+
tmp_path = tmp.name
|
|
59
|
+
|
|
60
|
+
# Load into session
|
|
61
|
+
from openstat.repl import _dispatch
|
|
62
|
+
result = _dispatch(session, f"load {tmp_path}")
|
|
63
|
+
return {
|
|
64
|
+
"result": result,
|
|
65
|
+
"shape": session.shape_str,
|
|
66
|
+
"filename": file.filename,
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
@app.websocket("/ws/{session_id}")
|
|
70
|
+
async def websocket_endpoint(websocket: WebSocket, session_id: str):
|
|
71
|
+
"""WebSocket REPL for interactive commands."""
|
|
72
|
+
await websocket.accept()
|
|
73
|
+
session = sessions.get_or_create(session_id)
|
|
74
|
+
log.info("WebSocket connected: %s", session_id)
|
|
75
|
+
|
|
76
|
+
try:
|
|
77
|
+
while True:
|
|
78
|
+
command = await websocket.receive_text()
|
|
79
|
+
from openstat.repl import _dispatch
|
|
80
|
+
result = _dispatch(session, command)
|
|
81
|
+
|
|
82
|
+
if result == "__QUIT__":
|
|
83
|
+
await websocket.send_json({
|
|
84
|
+
"type": "quit",
|
|
85
|
+
"content": "Session ended.",
|
|
86
|
+
})
|
|
87
|
+
break
|
|
88
|
+
|
|
89
|
+
# Check for new plot files
|
|
90
|
+
plot_data = None
|
|
91
|
+
if session.plot_paths:
|
|
92
|
+
last_plot = session.plot_paths[-1]
|
|
93
|
+
plot_path = Path(last_plot)
|
|
94
|
+
if plot_path.exists():
|
|
95
|
+
with open(plot_path, "rb") as f:
|
|
96
|
+
plot_data = base64.b64encode(f.read()).decode()
|
|
97
|
+
|
|
98
|
+
await websocket.send_json({
|
|
99
|
+
"type": "result",
|
|
100
|
+
"content": result or "",
|
|
101
|
+
"shape": session.shape_str,
|
|
102
|
+
"plot": plot_data,
|
|
103
|
+
})
|
|
104
|
+
except WebSocketDisconnect:
|
|
105
|
+
log.info("WebSocket disconnected: %s", session_id)
|
|
106
|
+
except Exception as e:
|
|
107
|
+
log.error("WebSocket error: %s", e)
|
|
108
|
+
|
|
109
|
+
@app.get("/api/status")
|
|
110
|
+
async def status():
|
|
111
|
+
"""Server status."""
|
|
112
|
+
return {
|
|
113
|
+
"active_sessions": sessions.active_count,
|
|
114
|
+
"status": "running",
|
|
115
|
+
}
|
|
116
|
+
else:
|
|
117
|
+
app = None
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Multi-user session management with TTL cleanup."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import uuid
|
|
6
|
+
from datetime import datetime, timedelta
|
|
7
|
+
|
|
8
|
+
from openstat.session import Session
|
|
9
|
+
from openstat.logging_config import get_logger
|
|
10
|
+
|
|
11
|
+
log = get_logger("web.sessions")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class SessionManager:
|
|
15
|
+
"""Manages multiple user sessions with automatic cleanup."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, max_sessions: int = 100, ttl_minutes: int = 60) -> None:
|
|
18
|
+
self._sessions: dict[str, Session] = {}
|
|
19
|
+
self._last_access: dict[str, datetime] = {}
|
|
20
|
+
self._max_sessions = max_sessions
|
|
21
|
+
self._ttl = timedelta(minutes=ttl_minutes)
|
|
22
|
+
|
|
23
|
+
def create(self) -> str:
|
|
24
|
+
"""Create a new session, return its ID."""
|
|
25
|
+
self._cleanup()
|
|
26
|
+
session_id = str(uuid.uuid4())[:8]
|
|
27
|
+
self._sessions[session_id] = Session()
|
|
28
|
+
self._last_access[session_id] = datetime.now()
|
|
29
|
+
log.info("Created session: %s", session_id)
|
|
30
|
+
return session_id
|
|
31
|
+
|
|
32
|
+
def get(self, session_id: str) -> Session | None:
|
|
33
|
+
"""Get an existing session by ID."""
|
|
34
|
+
session = self._sessions.get(session_id)
|
|
35
|
+
if session is not None:
|
|
36
|
+
self._last_access[session_id] = datetime.now()
|
|
37
|
+
return session
|
|
38
|
+
|
|
39
|
+
def get_or_create(self, session_id: str) -> Session:
|
|
40
|
+
"""Get existing session or create new one."""
|
|
41
|
+
session = self.get(session_id)
|
|
42
|
+
if session is None:
|
|
43
|
+
self._sessions[session_id] = Session()
|
|
44
|
+
self._last_access[session_id] = datetime.now()
|
|
45
|
+
log.info("Created session: %s", session_id)
|
|
46
|
+
session = self._sessions[session_id]
|
|
47
|
+
return session
|
|
48
|
+
|
|
49
|
+
def remove(self, session_id: str) -> None:
|
|
50
|
+
"""Remove a session."""
|
|
51
|
+
self._sessions.pop(session_id, None)
|
|
52
|
+
self._last_access.pop(session_id, None)
|
|
53
|
+
|
|
54
|
+
def _cleanup(self) -> None:
|
|
55
|
+
"""Remove expired sessions and enforce max count."""
|
|
56
|
+
now = datetime.now()
|
|
57
|
+
expired = [
|
|
58
|
+
sid for sid, last in self._last_access.items()
|
|
59
|
+
if now - last > self._ttl
|
|
60
|
+
]
|
|
61
|
+
for sid in expired:
|
|
62
|
+
self.remove(sid)
|
|
63
|
+
log.info("Expired session: %s", sid)
|
|
64
|
+
|
|
65
|
+
# If still over limit, remove oldest
|
|
66
|
+
while len(self._sessions) >= self._max_sessions:
|
|
67
|
+
oldest = min(self._last_access, key=self._last_access.get)
|
|
68
|
+
self.remove(oldest)
|
|
69
|
+
log.info("Evicted session: %s", oldest)
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def active_count(self) -> int:
|
|
73
|
+
return len(self._sessions)
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
// OpenStat Web Client
|
|
2
|
+
(function() {
|
|
3
|
+
let ws = null;
|
|
4
|
+
let sessionId = null;
|
|
5
|
+
const output = document.getElementById('output');
|
|
6
|
+
const input = document.getElementById('command-input');
|
|
7
|
+
const status = document.getElementById('status');
|
|
8
|
+
const shapeInfo = document.getElementById('shape-info');
|
|
9
|
+
const plotContainer = document.getElementById('plot-container');
|
|
10
|
+
const plotImg = document.getElementById('plot-img');
|
|
11
|
+
const fileUpload = document.getElementById('file-upload');
|
|
12
|
+
const history = [];
|
|
13
|
+
let historyIdx = -1;
|
|
14
|
+
|
|
15
|
+
async function init() {
|
|
16
|
+
const resp = await fetch('/api/session', { method: 'POST' });
|
|
17
|
+
const data = await resp.json();
|
|
18
|
+
sessionId = data.session_id;
|
|
19
|
+
connectWS();
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function connectWS() {
|
|
23
|
+
const proto = location.protocol === 'https:' ? 'wss:' : 'ws:';
|
|
24
|
+
ws = new WebSocket(`${proto}//${location.host}/ws/${sessionId}`);
|
|
25
|
+
|
|
26
|
+
ws.onopen = () => {
|
|
27
|
+
status.textContent = 'Connected';
|
|
28
|
+
status.style.color = '#a6e3a1';
|
|
29
|
+
appendOutput('OpenStat v0.3.0 — Web Interface\nType help for commands.\n', 'result');
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
ws.onmessage = (event) => {
|
|
33
|
+
const msg = JSON.parse(event.data);
|
|
34
|
+
if (msg.content) {
|
|
35
|
+
appendOutput(msg.content + '\n', 'result');
|
|
36
|
+
}
|
|
37
|
+
if (msg.shape) {
|
|
38
|
+
shapeInfo.textContent = msg.shape;
|
|
39
|
+
}
|
|
40
|
+
if (msg.plot) {
|
|
41
|
+
plotImg.src = 'data:image/png;base64,' + msg.plot;
|
|
42
|
+
plotContainer.style.display = 'block';
|
|
43
|
+
}
|
|
44
|
+
if (msg.type === 'quit') {
|
|
45
|
+
status.textContent = 'Disconnected';
|
|
46
|
+
status.style.color = '#f38ba8';
|
|
47
|
+
}
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
ws.onclose = () => {
|
|
51
|
+
status.textContent = 'Disconnected';
|
|
52
|
+
status.style.color = '#f38ba8';
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function appendOutput(text, cls) {
|
|
57
|
+
const span = document.createElement('span');
|
|
58
|
+
span.className = cls;
|
|
59
|
+
span.textContent = text;
|
|
60
|
+
output.appendChild(span);
|
|
61
|
+
output.scrollTop = output.scrollHeight;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
input.addEventListener('keydown', (e) => {
|
|
65
|
+
if (e.key === 'Enter') {
|
|
66
|
+
const cmd = input.value.trim();
|
|
67
|
+
if (!cmd) return;
|
|
68
|
+
history.push(cmd);
|
|
69
|
+
historyIdx = history.length;
|
|
70
|
+
appendOutput('openstat> ' + cmd + '\n', 'cmd');
|
|
71
|
+
if (ws && ws.readyState === WebSocket.OPEN) {
|
|
72
|
+
ws.send(cmd);
|
|
73
|
+
}
|
|
74
|
+
input.value = '';
|
|
75
|
+
} else if (e.key === 'ArrowUp') {
|
|
76
|
+
e.preventDefault();
|
|
77
|
+
if (historyIdx > 0) {
|
|
78
|
+
historyIdx--;
|
|
79
|
+
input.value = history[historyIdx];
|
|
80
|
+
}
|
|
81
|
+
} else if (e.key === 'ArrowDown') {
|
|
82
|
+
e.preventDefault();
|
|
83
|
+
if (historyIdx < history.length - 1) {
|
|
84
|
+
historyIdx++;
|
|
85
|
+
input.value = history[historyIdx];
|
|
86
|
+
} else {
|
|
87
|
+
historyIdx = history.length;
|
|
88
|
+
input.value = '';
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
fileUpload.addEventListener('change', async (e) => {
|
|
94
|
+
const file = e.target.files[0];
|
|
95
|
+
if (!file) return;
|
|
96
|
+
const formData = new FormData();
|
|
97
|
+
formData.append('file', file);
|
|
98
|
+
try {
|
|
99
|
+
const resp = await fetch(`/api/upload/${sessionId}`, {
|
|
100
|
+
method: 'POST',
|
|
101
|
+
body: formData,
|
|
102
|
+
});
|
|
103
|
+
const data = await resp.json();
|
|
104
|
+
if (data.result) {
|
|
105
|
+
appendOutput(data.result + '\n', 'result');
|
|
106
|
+
}
|
|
107
|
+
if (data.shape) {
|
|
108
|
+
shapeInfo.textContent = data.shape;
|
|
109
|
+
}
|
|
110
|
+
} catch (err) {
|
|
111
|
+
appendOutput('Upload failed: ' + err + '\n', 'error');
|
|
112
|
+
}
|
|
113
|
+
fileUpload.value = '';
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
init();
|
|
117
|
+
})();
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="UTF-8">
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
6
|
+
<title>OpenStat Web</title>
|
|
7
|
+
<link rel="stylesheet" href="/static/style.css">
|
|
8
|
+
</head>
|
|
9
|
+
<body>
|
|
10
|
+
<header>
|
|
11
|
+
<h1>OpenStat <span class="version">v0.3.0</span></h1>
|
|
12
|
+
<span class="status" id="status">Connecting...</span>
|
|
13
|
+
</header>
|
|
14
|
+
|
|
15
|
+
<main>
|
|
16
|
+
<div id="output" class="output"></div>
|
|
17
|
+
<div id="plot-container" class="plot-container" style="display:none;">
|
|
18
|
+
<img id="plot-img" alt="Plot" />
|
|
19
|
+
</div>
|
|
20
|
+
</main>
|
|
21
|
+
|
|
22
|
+
<footer>
|
|
23
|
+
<div class="input-row">
|
|
24
|
+
<span class="prompt">openstat></span>
|
|
25
|
+
<input type="text" id="command-input" placeholder="Type a command..." autofocus />
|
|
26
|
+
</div>
|
|
27
|
+
<div class="toolbar">
|
|
28
|
+
<label class="upload-label">
|
|
29
|
+
Upload Data
|
|
30
|
+
<input type="file" id="file-upload" accept=".csv,.parquet,.dta,.xlsx,.sas7bdat,.sav" />
|
|
31
|
+
</label>
|
|
32
|
+
<span id="shape-info" class="shape-info">No data</span>
|
|
33
|
+
</div>
|
|
34
|
+
</footer>
|
|
35
|
+
|
|
36
|
+
<script src="/static/app.js"></script>
|
|
37
|
+
</body>
|
|
38
|
+
</html>
|