openstat-cli 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openstat_cli-1.0.0/.github/workflows/ci.yml +47 -0
- openstat_cli-1.0.0/.gitignore +40 -0
- openstat_cli-1.0.0/CHANGELOG.md +133 -0
- openstat_cli-1.0.0/CONTRIBUTING.md +116 -0
- openstat_cli-1.0.0/LICENSE +21 -0
- openstat_cli-1.0.0/PKG-INFO +748 -0
- openstat_cli-1.0.0/README.md +623 -0
- openstat_cli-1.0.0/examples/data.csv +51 -0
- openstat_cli-1.0.0/examples/demo.ost +109 -0
- openstat_cli-1.0.0/pyproject.toml +154 -0
- openstat_cli-1.0.0/src/openstat/__init__.py +3 -0
- openstat_cli-1.0.0/src/openstat/__main__.py +4 -0
- openstat_cli-1.0.0/src/openstat/backends/__init__.py +16 -0
- openstat_cli-1.0.0/src/openstat/backends/duckdb_backend.py +70 -0
- openstat_cli-1.0.0/src/openstat/backends/polars_backend.py +52 -0
- openstat_cli-1.0.0/src/openstat/cli.py +92 -0
- openstat_cli-1.0.0/src/openstat/commands/__init__.py +82 -0
- openstat_cli-1.0.0/src/openstat/commands/adv_stat_cmds.py +1255 -0
- openstat_cli-1.0.0/src/openstat/commands/advanced_ml_cmds.py +576 -0
- openstat_cli-1.0.0/src/openstat/commands/advreg_cmds.py +207 -0
- openstat_cli-1.0.0/src/openstat/commands/alias_cmds.py +135 -0
- openstat_cli-1.0.0/src/openstat/commands/arch_cmds.py +82 -0
- openstat_cli-1.0.0/src/openstat/commands/arules_cmds.py +111 -0
- openstat_cli-1.0.0/src/openstat/commands/automodel_cmds.py +212 -0
- openstat_cli-1.0.0/src/openstat/commands/backend_cmds.py +82 -0
- openstat_cli-1.0.0/src/openstat/commands/base.py +170 -0
- openstat_cli-1.0.0/src/openstat/commands/bayes_cmds.py +71 -0
- openstat_cli-1.0.0/src/openstat/commands/causal_cmds.py +269 -0
- openstat_cli-1.0.0/src/openstat/commands/cluster_cmds.py +152 -0
- openstat_cli-1.0.0/src/openstat/commands/data_cmds.py +996 -0
- openstat_cli-1.0.0/src/openstat/commands/datamanip_cmds.py +672 -0
- openstat_cli-1.0.0/src/openstat/commands/dataquality_cmds.py +174 -0
- openstat_cli-1.0.0/src/openstat/commands/datetime_cmds.py +176 -0
- openstat_cli-1.0.0/src/openstat/commands/dimreduce_cmds.py +184 -0
- openstat_cli-1.0.0/src/openstat/commands/discrete_cmds.py +149 -0
- openstat_cli-1.0.0/src/openstat/commands/dsl_cmds.py +143 -0
- openstat_cli-1.0.0/src/openstat/commands/epi_cmds.py +93 -0
- openstat_cli-1.0.0/src/openstat/commands/equiv_tobit_cmds.py +94 -0
- openstat_cli-1.0.0/src/openstat/commands/esttab_cmds.py +196 -0
- openstat_cli-1.0.0/src/openstat/commands/export_beamer_cmds.py +142 -0
- openstat_cli-1.0.0/src/openstat/commands/export_cmds.py +201 -0
- openstat_cli-1.0.0/src/openstat/commands/export_extra_cmds.py +240 -0
- openstat_cli-1.0.0/src/openstat/commands/factor_cmds.py +180 -0
- openstat_cli-1.0.0/src/openstat/commands/groupby_cmds.py +155 -0
- openstat_cli-1.0.0/src/openstat/commands/help_cmds.py +237 -0
- openstat_cli-1.0.0/src/openstat/commands/i18n_cmds.py +43 -0
- openstat_cli-1.0.0/src/openstat/commands/import_extra_cmds.py +561 -0
- openstat_cli-1.0.0/src/openstat/commands/influence_cmds.py +134 -0
- openstat_cli-1.0.0/src/openstat/commands/iv_cmds.py +106 -0
- openstat_cli-1.0.0/src/openstat/commands/manova_cmds.py +105 -0
- openstat_cli-1.0.0/src/openstat/commands/mediate_cmds.py +233 -0
- openstat_cli-1.0.0/src/openstat/commands/meta_cmds.py +284 -0
- openstat_cli-1.0.0/src/openstat/commands/mi_cmds.py +228 -0
- openstat_cli-1.0.0/src/openstat/commands/mixed_cmds.py +79 -0
- openstat_cli-1.0.0/src/openstat/commands/mixture_changepoint_cmds.py +166 -0
- openstat_cli-1.0.0/src/openstat/commands/ml_adv_cmds.py +147 -0
- openstat_cli-1.0.0/src/openstat/commands/ml_cmds.py +178 -0
- openstat_cli-1.0.0/src/openstat/commands/model_eval_cmds.py +142 -0
- openstat_cli-1.0.0/src/openstat/commands/network_cmds.py +288 -0
- openstat_cli-1.0.0/src/openstat/commands/nlquery_cmds.py +161 -0
- openstat_cli-1.0.0/src/openstat/commands/nonparam_cmds.py +149 -0
- openstat_cli-1.0.0/src/openstat/commands/outreg_cmds.py +247 -0
- openstat_cli-1.0.0/src/openstat/commands/panel_cmds.py +141 -0
- openstat_cli-1.0.0/src/openstat/commands/pdf_cmds.py +226 -0
- openstat_cli-1.0.0/src/openstat/commands/pipeline_cmds.py +319 -0
- openstat_cli-1.0.0/src/openstat/commands/plot_cmds.py +189 -0
- openstat_cli-1.0.0/src/openstat/commands/plugin_cmds.py +79 -0
- openstat_cli-1.0.0/src/openstat/commands/posthoc_cmds.py +153 -0
- openstat_cli-1.0.0/src/openstat/commands/power_cmds.py +172 -0
- openstat_cli-1.0.0/src/openstat/commands/profile_cmds.py +246 -0
- openstat_cli-1.0.0/src/openstat/commands/rbridge_cmds.py +81 -0
- openstat_cli-1.0.0/src/openstat/commands/regex_cmds.py +104 -0
- openstat_cli-1.0.0/src/openstat/commands/report_cmds.py +48 -0
- openstat_cli-1.0.0/src/openstat/commands/repro_cmds.py +129 -0
- openstat_cli-1.0.0/src/openstat/commands/resampling_cmds.py +109 -0
- openstat_cli-1.0.0/src/openstat/commands/reshape_cmds.py +223 -0
- openstat_cli-1.0.0/src/openstat/commands/sem_cmds.py +177 -0
- openstat_cli-1.0.0/src/openstat/commands/stat_cmds.py +1040 -0
- openstat_cli-1.0.0/src/openstat/commands/stata_import_cmds.py +215 -0
- openstat_cli-1.0.0/src/openstat/commands/string_cmds.py +124 -0
- openstat_cli-1.0.0/src/openstat/commands/surv_cmds.py +145 -0
- openstat_cli-1.0.0/src/openstat/commands/survey_cmds.py +153 -0
- openstat_cli-1.0.0/src/openstat/commands/textanalysis_cmds.py +192 -0
- openstat_cli-1.0.0/src/openstat/commands/ts_adv_cmds.py +136 -0
- openstat_cli-1.0.0/src/openstat/commands/ts_cmds.py +195 -0
- openstat_cli-1.0.0/src/openstat/commands/tui_cmds.py +111 -0
- openstat_cli-1.0.0/src/openstat/commands/ux_cmds.py +191 -0
- openstat_cli-1.0.0/src/openstat/commands/validate_cmds.py +270 -0
- openstat_cli-1.0.0/src/openstat/commands/viz_adv_cmds.py +312 -0
- openstat_cli-1.0.0/src/openstat/commands/viz_extra_cmds.py +251 -0
- openstat_cli-1.0.0/src/openstat/commands/watch_cmds.py +69 -0
- openstat_cli-1.0.0/src/openstat/config.py +106 -0
- openstat_cli-1.0.0/src/openstat/dsl/__init__.py +0 -0
- openstat_cli-1.0.0/src/openstat/dsl/parser.py +332 -0
- openstat_cli-1.0.0/src/openstat/dsl/tokenizer.py +105 -0
- openstat_cli-1.0.0/src/openstat/i18n.py +120 -0
- openstat_cli-1.0.0/src/openstat/io/__init__.py +0 -0
- openstat_cli-1.0.0/src/openstat/io/loader.py +187 -0
- openstat_cli-1.0.0/src/openstat/jupyter/__init__.py +18 -0
- openstat_cli-1.0.0/src/openstat/jupyter/display.py +18 -0
- openstat_cli-1.0.0/src/openstat/jupyter/magic.py +60 -0
- openstat_cli-1.0.0/src/openstat/logging_config.py +59 -0
- openstat_cli-1.0.0/src/openstat/plots/__init__.py +0 -0
- openstat_cli-1.0.0/src/openstat/plots/plotter.py +437 -0
- openstat_cli-1.0.0/src/openstat/plots/surv_plots.py +32 -0
- openstat_cli-1.0.0/src/openstat/plots/ts_plots.py +59 -0
- openstat_cli-1.0.0/src/openstat/plugins/__init__.py +5 -0
- openstat_cli-1.0.0/src/openstat/plugins/manager.py +69 -0
- openstat_cli-1.0.0/src/openstat/repl.py +457 -0
- openstat_cli-1.0.0/src/openstat/reporting/__init__.py +0 -0
- openstat_cli-1.0.0/src/openstat/reporting/eda.py +208 -0
- openstat_cli-1.0.0/src/openstat/reporting/report.py +67 -0
- openstat_cli-1.0.0/src/openstat/script_runner.py +319 -0
- openstat_cli-1.0.0/src/openstat/session.py +133 -0
- openstat_cli-1.0.0/src/openstat/stats/__init__.py +0 -0
- openstat_cli-1.0.0/src/openstat/stats/advanced_regression.py +269 -0
- openstat_cli-1.0.0/src/openstat/stats/arch_garch.py +84 -0
- openstat_cli-1.0.0/src/openstat/stats/bayesian.py +103 -0
- openstat_cli-1.0.0/src/openstat/stats/causal.py +258 -0
- openstat_cli-1.0.0/src/openstat/stats/clustering.py +206 -0
- openstat_cli-1.0.0/src/openstat/stats/discrete.py +311 -0
- openstat_cli-1.0.0/src/openstat/stats/epidemiology.py +119 -0
- openstat_cli-1.0.0/src/openstat/stats/equiv_tobit.py +163 -0
- openstat_cli-1.0.0/src/openstat/stats/factor.py +174 -0
- openstat_cli-1.0.0/src/openstat/stats/imputation.py +282 -0
- openstat_cli-1.0.0/src/openstat/stats/influence.py +78 -0
- openstat_cli-1.0.0/src/openstat/stats/iv.py +131 -0
- openstat_cli-1.0.0/src/openstat/stats/manova.py +124 -0
- openstat_cli-1.0.0/src/openstat/stats/mixed.py +128 -0
- openstat_cli-1.0.0/src/openstat/stats/ml.py +275 -0
- openstat_cli-1.0.0/src/openstat/stats/ml_advanced.py +117 -0
- openstat_cli-1.0.0/src/openstat/stats/model_eval.py +183 -0
- openstat_cli-1.0.0/src/openstat/stats/models.py +1342 -0
- openstat_cli-1.0.0/src/openstat/stats/nonparametric.py +130 -0
- openstat_cli-1.0.0/src/openstat/stats/panel.py +179 -0
- openstat_cli-1.0.0/src/openstat/stats/power.py +295 -0
- openstat_cli-1.0.0/src/openstat/stats/resampling.py +203 -0
- openstat_cli-1.0.0/src/openstat/stats/survey.py +213 -0
- openstat_cli-1.0.0/src/openstat/stats/survival.py +196 -0
- openstat_cli-1.0.0/src/openstat/stats/timeseries.py +142 -0
- openstat_cli-1.0.0/src/openstat/stats/ts_advanced.py +114 -0
- openstat_cli-1.0.0/src/openstat/types.py +11 -0
- openstat_cli-1.0.0/src/openstat/web/__init__.py +1 -0
- openstat_cli-1.0.0/src/openstat/web/app.py +117 -0
- openstat_cli-1.0.0/src/openstat/web/session_manager.py +73 -0
- openstat_cli-1.0.0/src/openstat/web/static/app.js +117 -0
- openstat_cli-1.0.0/src/openstat/web/static/index.html +38 -0
- openstat_cli-1.0.0/src/openstat/web/static/style.css +103 -0
- openstat_cli-1.0.0/tests/__init__.py +0 -0
- openstat_cli-1.0.0/tests/test_advreg.py +210 -0
- openstat_cli-1.0.0/tests/test_backends.py +66 -0
- openstat_cli-1.0.0/tests/test_bayes.py +101 -0
- openstat_cli-1.0.0/tests/test_causal.py +118 -0
- openstat_cli-1.0.0/tests/test_clustering.py +181 -0
- openstat_cli-1.0.0/tests/test_commands.py +268 -0
- openstat_cli-1.0.0/tests/test_data_v040.py +160 -0
- openstat_cli-1.0.0/tests/test_dataquality.py +143 -0
- openstat_cli-1.0.0/tests/test_discrete.py +136 -0
- openstat_cli-1.0.0/tests/test_epidemiology.py +77 -0
- openstat_cli-1.0.0/tests/test_equiv_tobit.py +90 -0
- openstat_cli-1.0.0/tests/test_esttab.py +66 -0
- openstat_cli-1.0.0/tests/test_export.py +184 -0
- openstat_cli-1.0.0/tests/test_factor.py +212 -0
- openstat_cli-1.0.0/tests/test_file_formats.py +83 -0
- openstat_cli-1.0.0/tests/test_imputation.py +85 -0
- openstat_cli-1.0.0/tests/test_influence.py +81 -0
- openstat_cli-1.0.0/tests/test_integration.py +166 -0
- openstat_cli-1.0.0/tests/test_iv.py +73 -0
- openstat_cli-1.0.0/tests/test_jupyter.py +44 -0
- openstat_cli-1.0.0/tests/test_manova.py +106 -0
- openstat_cli-1.0.0/tests/test_mixed.py +71 -0
- openstat_cli-1.0.0/tests/test_ml.py +174 -0
- openstat_cli-1.0.0/tests/test_ml_advanced.py +101 -0
- openstat_cli-1.0.0/tests/test_model_eval.py +122 -0
- openstat_cli-1.0.0/tests/test_models.py +221 -0
- openstat_cli-1.0.0/tests/test_nonparam.py +187 -0
- openstat_cli-1.0.0/tests/test_outreg.py +67 -0
- openstat_cli-1.0.0/tests/test_panel.py +93 -0
- openstat_cli-1.0.0/tests/test_parser.py +202 -0
- openstat_cli-1.0.0/tests/test_plugins.py +81 -0
- openstat_cli-1.0.0/tests/test_power.py +177 -0
- openstat_cli-1.0.0/tests/test_resampling.py +106 -0
- openstat_cli-1.0.0/tests/test_reshape.py +195 -0
- openstat_cli-1.0.0/tests/test_round4.py +355 -0
- openstat_cli-1.0.0/tests/test_round5.py +225 -0
- openstat_cli-1.0.0/tests/test_round6.py +294 -0
- openstat_cli-1.0.0/tests/test_round7.py +274 -0
- openstat_cli-1.0.0/tests/test_round8.py +293 -0
- openstat_cli-1.0.0/tests/test_session.py +126 -0
- openstat_cli-1.0.0/tests/test_string_cmds.py +105 -0
- openstat_cli-1.0.0/tests/test_survey.py +95 -0
- openstat_cli-1.0.0/tests/test_survival.py +96 -0
- openstat_cli-1.0.0/tests/test_timeseries.py +107 -0
- openstat_cli-1.0.0/tests/test_ts_advanced.py +118 -0
- openstat_cli-1.0.0/tests/test_v020.py +648 -0
- openstat_cli-1.0.0/tests/test_web.py +79 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ${{ matrix.os }}
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
os: [ubuntu-latest, macos-latest]
|
|
15
|
+
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
20
|
+
uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: ${{ matrix.python-version }}
|
|
23
|
+
|
|
24
|
+
- name: Install dependencies
|
|
25
|
+
run: pip install -e ".[dev]"
|
|
26
|
+
|
|
27
|
+
- name: Run tests with coverage
|
|
28
|
+
run: pytest tests/ -v --tb=short --cov=openstat --cov-report=term-missing --cov-fail-under=75
|
|
29
|
+
|
|
30
|
+
- name: Run demo script
|
|
31
|
+
run: python -m openstat run examples/demo.ost
|
|
32
|
+
|
|
33
|
+
lint:
|
|
34
|
+
runs-on: ubuntu-latest
|
|
35
|
+
steps:
|
|
36
|
+
- uses: actions/checkout@v4
|
|
37
|
+
|
|
38
|
+
- name: Set up Python
|
|
39
|
+
uses: actions/setup-python@v5
|
|
40
|
+
with:
|
|
41
|
+
python-version: "3.12"
|
|
42
|
+
|
|
43
|
+
- name: Install dependencies
|
|
44
|
+
run: pip install -e ".[dev]" ruff
|
|
45
|
+
|
|
46
|
+
- name: Ruff check
|
|
47
|
+
run: ruff check src/ tests/
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.egg-info/
|
|
6
|
+
*.egg
|
|
7
|
+
dist/
|
|
8
|
+
build/
|
|
9
|
+
*.whl
|
|
10
|
+
|
|
11
|
+
# Virtual environments
|
|
12
|
+
.venv/
|
|
13
|
+
venv/
|
|
14
|
+
env/
|
|
15
|
+
|
|
16
|
+
# Testing
|
|
17
|
+
.pytest_cache/
|
|
18
|
+
.coverage
|
|
19
|
+
htmlcov/
|
|
20
|
+
.tox/
|
|
21
|
+
|
|
22
|
+
# IDE
|
|
23
|
+
.vscode/
|
|
24
|
+
.idea/
|
|
25
|
+
*.swp
|
|
26
|
+
*.swo
|
|
27
|
+
*~
|
|
28
|
+
|
|
29
|
+
# Ruff
|
|
30
|
+
.ruff_cache/
|
|
31
|
+
|
|
32
|
+
# OS
|
|
33
|
+
.DS_Store
|
|
34
|
+
Thumbs.db
|
|
35
|
+
|
|
36
|
+
# Outputs (generated by running OpenStat)
|
|
37
|
+
outputs/
|
|
38
|
+
|
|
39
|
+
# Claude Code
|
|
40
|
+
.claude/
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to OpenStat are documented here.
|
|
4
|
+
|
|
5
|
+
Format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
|
6
|
+
Versioning follows [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
## [0.9.0] – 2025-05-01
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- **Post-hoc tests** (`posthoc`): Tukey HSD, Bonferroni, Scheffé pairwise comparisons after ANOVA
|
|
14
|
+
- **Coefficient plot** (`plot coef`): horizontal error-bar plot of regression coefficients with 95% CI
|
|
15
|
+
- **Marginal effects plot** (`plot margins`): visualise marginal effects after `margins`
|
|
16
|
+
- **Interaction plot** (`plot interaction <y> <x> <moderator>`): shows regression lines by ±1 SD groups of the moderator
|
|
17
|
+
- **Real-time session log** (`log using <path>`): streams every command and its output to a file
|
|
18
|
+
- **Script runner enhancements**: `foreach`, `forvalues`, `if/else` blocks with variable substitution (`{var}`)
|
|
19
|
+
- **Database connectivity** (`sqlload`): load data directly from SQL databases via connection URL (requires `connectorx`)
|
|
20
|
+
- **SEM / CFA** (`sem`, `cfa`): structural equation modelling and confirmatory factor analysis via semopy
|
|
21
|
+
- **Meta-analysis** (`meta`): fixed-effects (inverse-variance) and random-effects (DerSimonian-Laird), forest and funnel plots
|
|
22
|
+
- **Network analysis** (`network`): build, describe, centrality, community detection, and plotting via networkx
|
|
23
|
+
- **Auto model selection** (`automodel`): exhaustive subset search (≤8 predictors) or forward stepwise, ranked by AIC/BIC
|
|
24
|
+
- **IPTW** (`iptw`): inverse probability of treatment weighting for causal inference; ATE/ATT; covariate balance table
|
|
25
|
+
- **Reproducibility** (`session info/save/replay`, `set seed`, `version`): full session management and script replay
|
|
26
|
+
- **TUI Dashboard** (`dashboard`): full-screen terminal UI with dataset overview, variable table, model results, and history (requires `textual`)
|
|
27
|
+
- **PyPI packaging**: `pyproject.toml` polished for release; new extras `database`, `sem`, `network`, `tui`
|
|
28
|
+
|
|
29
|
+
### Changed
|
|
30
|
+
- `export docx` now includes a Summary Statistics table (N, Mean, SD, Min–Max per numeric column)
|
|
31
|
+
- `set` command extended with `set seed <N>` sub-command
|
|
32
|
+
- `log` command moved from `report_cmds` to `outreg_cmds` and upgraded to real-time streaming
|
|
33
|
+
|
|
34
|
+
### Fixed
|
|
35
|
+
- `plot coef` crash when `params` is a numpy array (not a pandas Series)
|
|
36
|
+
- `} else {` block parsing in script runner
|
|
37
|
+
- `automodel` formula normalisation (space-separated predictors now work)
|
|
38
|
+
- `semopy.Model` API: `obj=` parameter goes in `fit()`, not `__init__()`
|
|
39
|
+
- Duplicate command registration for `set` and `log`
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## [0.8.0] – 2025-03-01
|
|
44
|
+
|
|
45
|
+
### Added
|
|
46
|
+
- **Bayesian inference** (`bayes`): MCMC sampling, posterior summaries, trace/posterior plots
|
|
47
|
+
- **ARCH/GARCH** (`arch`, `garch`): volatility modelling for financial time series
|
|
48
|
+
- **MANOVA** (`manova`): multivariate analysis of variance
|
|
49
|
+
- **Clustering** (`cluster kmeans`, `cluster hclust`, `discriminant`): k-means, hierarchical, LDA/QDA
|
|
50
|
+
- **Advanced ML** (`randomforest`, `gradientboost`, `neuralnet`, `svm`, `knn`): ensemble and deep learning models
|
|
51
|
+
- **Influence diagnostics** (`influence`): Cook's distance, DFFITS, leverage plots
|
|
52
|
+
- **Advanced regression** (`quantreg`, `truncreg`, `intreg`, `heckman`): quantile, truncated, interval, and selection models
|
|
53
|
+
- **Advanced time series** (`vecm`, `var`, `granger`, `threshold`): VAR, VECM, Granger causality, threshold models
|
|
54
|
+
- **Epidemiology** (`epi`): risk ratios, odds ratios, attributable risk, Mantel-Haenszel pooling
|
|
55
|
+
- **Equivalence testing** (`equiv`, `tobit`): TOST and Tobit censored regression
|
|
56
|
+
- **String commands** (`strreplace`, `strsplit`, `strextract`, `strpad`, `strcat`): column string manipulation
|
|
57
|
+
- **DSL / macro system** (`define`, `macro`, `eval`): variable macros and expressions
|
|
58
|
+
- **Resampling** (`bootstrap`, `jackknife`, `permtest`): resampling-based inference
|
|
59
|
+
- **Model evaluation** (`roc`, `calibration`, `confusion`): classification diagnostics
|
|
60
|
+
- **Data quality** (`missing`, `duplicates`, `outlier`, `assert`): profiling and validation
|
|
61
|
+
- **Reshape** (`reshape wide`, `reshape long`, `pivot`, `unpivot`): data reshaping
|
|
62
|
+
- **esttab** (`esttab`): publication-quality coefficient tables (LaTeX, HTML, Markdown)
|
|
63
|
+
- **outreg2** (`outreg2`): Word/RTF-compatible regression tables
|
|
64
|
+
- **Visualisation extras** (`plot violin`, `plot pairplot`, `plot parallel`, `plot density`): additional plot types
|
|
65
|
+
|
|
66
|
+
### Changed
|
|
67
|
+
- Session now tracks `_last_fit_result` and `_last_fit_kwargs` for bootstrap/esttab integration
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## [0.7.0] – 2025-01-01
|
|
72
|
+
|
|
73
|
+
### Added
|
|
74
|
+
- **Survey analysis** (`svyset`, `svy: mean`, `svy: total`, `svy: proportion`, `svy: reg`): complex survey design
|
|
75
|
+
- **Multiple imputation** (`mi impute`, `mi estimate`): MICE-based imputation with pooled estimates
|
|
76
|
+
- **DuckDB backend** (`set backend duckdb`, `sql`): fast in-memory SQL on datasets
|
|
77
|
+
- **Web API** (`openstat web`): FastAPI + WebSocket server for browser-based access
|
|
78
|
+
- **Jupyter magic** (`%openstat`): run OpenStat commands in Jupyter notebooks
|
|
79
|
+
- **Plugin system** (`plugin load/list/unload`): third-party command packages
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## [0.6.0] – 2024-11-01
|
|
84
|
+
|
|
85
|
+
### Added
|
|
86
|
+
- **Power analysis** (`power`): t-test, ANOVA, chi-square, proportion power and sample size
|
|
87
|
+
- **Factor analysis** (`factor`, `pca`, `rotate`): EFA, PCA, varimax/oblimin rotation
|
|
88
|
+
- **IV regression** (`ivregress`): two-stage least squares
|
|
89
|
+
- **Mixed models** (`mixed`): linear mixed-effects models via statsmodels
|
|
90
|
+
- **Panel data** (`xtset`, `xtreg`, `xttest`, `hausman`): fixed/random effects, Hausman test
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## [0.5.0] – 2024-09-01
|
|
95
|
+
|
|
96
|
+
### Added
|
|
97
|
+
- **Survival analysis** (`stset`, `sts graph`, `stcox`, `streg`, `stsum`): Kaplan-Meier, Cox PH, AFT
|
|
98
|
+
- **Time series** (`tsset`, `arima`, `ardl`, `adf`, `kpss`, `forecast`): ARIMA, ARDL, unit-root tests
|
|
99
|
+
- **Causal inference** (`pscore`, `teffects`, `did`, `rddesign`, `synth`): propensity score, DiD, RD
|
|
100
|
+
- **Discrete choice** (`logit`, `probit`, `ologit`, `oprobit`, `mlogit`, `clogit`): limited dependent variable models
|
|
101
|
+
- **Undo/redo** (`undo`, `redo`): step-back for data transformations
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
## [0.4.0] – 2024-07-01
|
|
106
|
+
|
|
107
|
+
### Added
|
|
108
|
+
- **Non-parametric tests** (`kruskal`, `mannwhitney`, `wilcoxon`, `friedman`, `spearman`): rank-based inference
|
|
109
|
+
- **Report generation** (`report html`, `report latex`): automated analysis reports
|
|
110
|
+
- **Plot diagnostics** (`plot diagnostics`): residuals vs fitted, Q-Q, scale-location
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
## [0.3.0] – 2024-05-01
|
|
115
|
+
|
|
116
|
+
### Added
|
|
117
|
+
- **Data management** (`load`, `save`, `drop`, `keep`, `rename`, `encode`, `decode`, `generate`, `replace`, `sort`, `merge`, `append`, `sample`, `undo`)
|
|
118
|
+
- **Descriptive statistics** (`describe`, `summarize`, `tabulate`, `correlate`, `crosstab`, `anova`)
|
|
119
|
+
- **Regression** (`ols`, `logit`, `poisson`, `margins`, `predict`, `test`)
|
|
120
|
+
- **Plots** (`plot hist`, `plot scatter`, `plot line`, `plot box`, `plot bar`, `plot heatmap`, `plot acf`, `plot pacf`)
|
|
121
|
+
- **Output** (`export docx`, `export pptx`)
|
|
122
|
+
- **Configuration** (`config show`, `config set`)
|
|
123
|
+
- **Script runner** (`run <script.ost>`): execute .ost script files
|
|
124
|
+
- Interactive REPL with tab completion, syntax highlighting, and command history
|
|
125
|
+
|
|
126
|
+
---
|
|
127
|
+
|
|
128
|
+
## [0.1.0] – 2024-01-01
|
|
129
|
+
|
|
130
|
+
### Added
|
|
131
|
+
- Initial project scaffold
|
|
132
|
+
- Basic REPL infrastructure
|
|
133
|
+
- Polars DataFrame backend
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# Contributing to OpenStat
|
|
2
|
+
|
|
3
|
+
Thanks for your interest in contributing! OpenStat is an open-source statistical analysis tool and we welcome contributions of all kinds.
|
|
4
|
+
|
|
5
|
+
## Getting Started
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
# Clone the repo
|
|
9
|
+
git clone https://github.com/YOUR_USERNAME/openstat.git
|
|
10
|
+
cd openstat
|
|
11
|
+
|
|
12
|
+
# Create a virtual environment
|
|
13
|
+
python -m venv .venv
|
|
14
|
+
source .venv/bin/activate # or .venv\Scripts\activate on Windows
|
|
15
|
+
|
|
16
|
+
# Install in editable mode with dev deps
|
|
17
|
+
pip install -e ".[dev]"
|
|
18
|
+
|
|
19
|
+
# Run tests
|
|
20
|
+
pytest
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Project Structure
|
|
24
|
+
|
|
25
|
+
```
|
|
26
|
+
src/openstat/
|
|
27
|
+
├── cli.py # Typer CLI entry point
|
|
28
|
+
├── repl.py # Interactive REPL with tab completion
|
|
29
|
+
├── session.py # Session state, undo system
|
|
30
|
+
├── commands/
|
|
31
|
+
│ ├── base.py # @command decorator, registry
|
|
32
|
+
│ ├── data_cmds.py # load, filter, select, derive, sort, ...
|
|
33
|
+
│ ├── stat_cmds.py # summarize, tabulate, corr, ols, logit, ...
|
|
34
|
+
│ ├── plot_cmds.py # plot hist/scatter/line/box
|
|
35
|
+
│ └── report_cmds.py # report, help
|
|
36
|
+
├── dsl/
|
|
37
|
+
│ ├── tokenizer.py # Safe expression tokenizer
|
|
38
|
+
│ └── parser.py # Recursive descent parser (no eval!)
|
|
39
|
+
├── stats/
|
|
40
|
+
│ └── models.py # OLS, Logit via statsmodels
|
|
41
|
+
├── plots/
|
|
42
|
+
│ └── plotter.py # matplotlib chart generation
|
|
43
|
+
├── io/
|
|
44
|
+
│ └── loader.py # CSV, Parquet, DTA, Excel loaders
|
|
45
|
+
└── reporting/
|
|
46
|
+
└── report.py # Markdown report generator
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Adding a New Command
|
|
50
|
+
|
|
51
|
+
1. Pick the right module in `src/openstat/commands/` (or create a new one).
|
|
52
|
+
2. Use the `@command` decorator:
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
from openstat.commands.base import command
|
|
56
|
+
|
|
57
|
+
@command("mycommand", usage="mycommand <arg>")
|
|
58
|
+
def cmd_mycommand(session, args):
|
|
59
|
+
"""One-line description shown in help."""
|
|
60
|
+
df = session.require_data()
|
|
61
|
+
# ... your logic ...
|
|
62
|
+
return "Result text shown to user"
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
3. If you created a new module, import it in `src/openstat/commands/__init__.py`.
|
|
66
|
+
4. Add tests in `tests/`.
|
|
67
|
+
|
|
68
|
+
## Adding a DSL Function
|
|
69
|
+
|
|
70
|
+
To add a new function to the expression language (used by `filter` and `derive`):
|
|
71
|
+
|
|
72
|
+
1. Edit `src/openstat/dsl/parser.py`
|
|
73
|
+
2. Add a case in `_apply_function()`
|
|
74
|
+
3. Add a test in `tests/test_parser.py`
|
|
75
|
+
|
|
76
|
+
## Guidelines
|
|
77
|
+
|
|
78
|
+
- **No `eval()`** — all user expressions go through the safe parser
|
|
79
|
+
- **Snapshot before mutation** — call `session.snapshot()` before modifying `session.df`
|
|
80
|
+
- **Return strings** — command handlers return plain text (use `rich_to_str()` for Rich tables)
|
|
81
|
+
- **Friendly errors** — use `friendly_error()` to wrap exceptions
|
|
82
|
+
- **Test real values** — assert on actual numbers, not just "contains some string"
|
|
83
|
+
|
|
84
|
+
## Running Tests
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
# Full suite
|
|
88
|
+
pytest
|
|
89
|
+
|
|
90
|
+
# Verbose with specific file
|
|
91
|
+
pytest tests/test_commands.py -v
|
|
92
|
+
|
|
93
|
+
# With coverage
|
|
94
|
+
pytest --cov=openstat --cov-report=term-missing
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Code Style
|
|
98
|
+
|
|
99
|
+
We use [ruff](https://docs.astral.sh/ruff/) for linting. Before submitting:
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
pip install ruff
|
|
103
|
+
ruff check src/ tests/
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Pull Request Process
|
|
107
|
+
|
|
108
|
+
1. Fork the repo and create a feature branch
|
|
109
|
+
2. Write tests for your changes
|
|
110
|
+
3. Ensure all tests pass (`pytest`)
|
|
111
|
+
4. Run `ruff check` with no errors
|
|
112
|
+
5. Submit a PR with a clear description of what and why
|
|
113
|
+
|
|
114
|
+
## License
|
|
115
|
+
|
|
116
|
+
By contributing, you agree that your contributions will be licensed under the MIT License.
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 OpenStat Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|