openstat-cli 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. openstat_cli-1.0.0/.github/workflows/ci.yml +47 -0
  2. openstat_cli-1.0.0/.gitignore +40 -0
  3. openstat_cli-1.0.0/CHANGELOG.md +133 -0
  4. openstat_cli-1.0.0/CONTRIBUTING.md +116 -0
  5. openstat_cli-1.0.0/LICENSE +21 -0
  6. openstat_cli-1.0.0/PKG-INFO +748 -0
  7. openstat_cli-1.0.0/README.md +623 -0
  8. openstat_cli-1.0.0/examples/data.csv +51 -0
  9. openstat_cli-1.0.0/examples/demo.ost +109 -0
  10. openstat_cli-1.0.0/pyproject.toml +154 -0
  11. openstat_cli-1.0.0/src/openstat/__init__.py +3 -0
  12. openstat_cli-1.0.0/src/openstat/__main__.py +4 -0
  13. openstat_cli-1.0.0/src/openstat/backends/__init__.py +16 -0
  14. openstat_cli-1.0.0/src/openstat/backends/duckdb_backend.py +70 -0
  15. openstat_cli-1.0.0/src/openstat/backends/polars_backend.py +52 -0
  16. openstat_cli-1.0.0/src/openstat/cli.py +92 -0
  17. openstat_cli-1.0.0/src/openstat/commands/__init__.py +82 -0
  18. openstat_cli-1.0.0/src/openstat/commands/adv_stat_cmds.py +1255 -0
  19. openstat_cli-1.0.0/src/openstat/commands/advanced_ml_cmds.py +576 -0
  20. openstat_cli-1.0.0/src/openstat/commands/advreg_cmds.py +207 -0
  21. openstat_cli-1.0.0/src/openstat/commands/alias_cmds.py +135 -0
  22. openstat_cli-1.0.0/src/openstat/commands/arch_cmds.py +82 -0
  23. openstat_cli-1.0.0/src/openstat/commands/arules_cmds.py +111 -0
  24. openstat_cli-1.0.0/src/openstat/commands/automodel_cmds.py +212 -0
  25. openstat_cli-1.0.0/src/openstat/commands/backend_cmds.py +82 -0
  26. openstat_cli-1.0.0/src/openstat/commands/base.py +170 -0
  27. openstat_cli-1.0.0/src/openstat/commands/bayes_cmds.py +71 -0
  28. openstat_cli-1.0.0/src/openstat/commands/causal_cmds.py +269 -0
  29. openstat_cli-1.0.0/src/openstat/commands/cluster_cmds.py +152 -0
  30. openstat_cli-1.0.0/src/openstat/commands/data_cmds.py +996 -0
  31. openstat_cli-1.0.0/src/openstat/commands/datamanip_cmds.py +672 -0
  32. openstat_cli-1.0.0/src/openstat/commands/dataquality_cmds.py +174 -0
  33. openstat_cli-1.0.0/src/openstat/commands/datetime_cmds.py +176 -0
  34. openstat_cli-1.0.0/src/openstat/commands/dimreduce_cmds.py +184 -0
  35. openstat_cli-1.0.0/src/openstat/commands/discrete_cmds.py +149 -0
  36. openstat_cli-1.0.0/src/openstat/commands/dsl_cmds.py +143 -0
  37. openstat_cli-1.0.0/src/openstat/commands/epi_cmds.py +93 -0
  38. openstat_cli-1.0.0/src/openstat/commands/equiv_tobit_cmds.py +94 -0
  39. openstat_cli-1.0.0/src/openstat/commands/esttab_cmds.py +196 -0
  40. openstat_cli-1.0.0/src/openstat/commands/export_beamer_cmds.py +142 -0
  41. openstat_cli-1.0.0/src/openstat/commands/export_cmds.py +201 -0
  42. openstat_cli-1.0.0/src/openstat/commands/export_extra_cmds.py +240 -0
  43. openstat_cli-1.0.0/src/openstat/commands/factor_cmds.py +180 -0
  44. openstat_cli-1.0.0/src/openstat/commands/groupby_cmds.py +155 -0
  45. openstat_cli-1.0.0/src/openstat/commands/help_cmds.py +237 -0
  46. openstat_cli-1.0.0/src/openstat/commands/i18n_cmds.py +43 -0
  47. openstat_cli-1.0.0/src/openstat/commands/import_extra_cmds.py +561 -0
  48. openstat_cli-1.0.0/src/openstat/commands/influence_cmds.py +134 -0
  49. openstat_cli-1.0.0/src/openstat/commands/iv_cmds.py +106 -0
  50. openstat_cli-1.0.0/src/openstat/commands/manova_cmds.py +105 -0
  51. openstat_cli-1.0.0/src/openstat/commands/mediate_cmds.py +233 -0
  52. openstat_cli-1.0.0/src/openstat/commands/meta_cmds.py +284 -0
  53. openstat_cli-1.0.0/src/openstat/commands/mi_cmds.py +228 -0
  54. openstat_cli-1.0.0/src/openstat/commands/mixed_cmds.py +79 -0
  55. openstat_cli-1.0.0/src/openstat/commands/mixture_changepoint_cmds.py +166 -0
  56. openstat_cli-1.0.0/src/openstat/commands/ml_adv_cmds.py +147 -0
  57. openstat_cli-1.0.0/src/openstat/commands/ml_cmds.py +178 -0
  58. openstat_cli-1.0.0/src/openstat/commands/model_eval_cmds.py +142 -0
  59. openstat_cli-1.0.0/src/openstat/commands/network_cmds.py +288 -0
  60. openstat_cli-1.0.0/src/openstat/commands/nlquery_cmds.py +161 -0
  61. openstat_cli-1.0.0/src/openstat/commands/nonparam_cmds.py +149 -0
  62. openstat_cli-1.0.0/src/openstat/commands/outreg_cmds.py +247 -0
  63. openstat_cli-1.0.0/src/openstat/commands/panel_cmds.py +141 -0
  64. openstat_cli-1.0.0/src/openstat/commands/pdf_cmds.py +226 -0
  65. openstat_cli-1.0.0/src/openstat/commands/pipeline_cmds.py +319 -0
  66. openstat_cli-1.0.0/src/openstat/commands/plot_cmds.py +189 -0
  67. openstat_cli-1.0.0/src/openstat/commands/plugin_cmds.py +79 -0
  68. openstat_cli-1.0.0/src/openstat/commands/posthoc_cmds.py +153 -0
  69. openstat_cli-1.0.0/src/openstat/commands/power_cmds.py +172 -0
  70. openstat_cli-1.0.0/src/openstat/commands/profile_cmds.py +246 -0
  71. openstat_cli-1.0.0/src/openstat/commands/rbridge_cmds.py +81 -0
  72. openstat_cli-1.0.0/src/openstat/commands/regex_cmds.py +104 -0
  73. openstat_cli-1.0.0/src/openstat/commands/report_cmds.py +48 -0
  74. openstat_cli-1.0.0/src/openstat/commands/repro_cmds.py +129 -0
  75. openstat_cli-1.0.0/src/openstat/commands/resampling_cmds.py +109 -0
  76. openstat_cli-1.0.0/src/openstat/commands/reshape_cmds.py +223 -0
  77. openstat_cli-1.0.0/src/openstat/commands/sem_cmds.py +177 -0
  78. openstat_cli-1.0.0/src/openstat/commands/stat_cmds.py +1040 -0
  79. openstat_cli-1.0.0/src/openstat/commands/stata_import_cmds.py +215 -0
  80. openstat_cli-1.0.0/src/openstat/commands/string_cmds.py +124 -0
  81. openstat_cli-1.0.0/src/openstat/commands/surv_cmds.py +145 -0
  82. openstat_cli-1.0.0/src/openstat/commands/survey_cmds.py +153 -0
  83. openstat_cli-1.0.0/src/openstat/commands/textanalysis_cmds.py +192 -0
  84. openstat_cli-1.0.0/src/openstat/commands/ts_adv_cmds.py +136 -0
  85. openstat_cli-1.0.0/src/openstat/commands/ts_cmds.py +195 -0
  86. openstat_cli-1.0.0/src/openstat/commands/tui_cmds.py +111 -0
  87. openstat_cli-1.0.0/src/openstat/commands/ux_cmds.py +191 -0
  88. openstat_cli-1.0.0/src/openstat/commands/validate_cmds.py +270 -0
  89. openstat_cli-1.0.0/src/openstat/commands/viz_adv_cmds.py +312 -0
  90. openstat_cli-1.0.0/src/openstat/commands/viz_extra_cmds.py +251 -0
  91. openstat_cli-1.0.0/src/openstat/commands/watch_cmds.py +69 -0
  92. openstat_cli-1.0.0/src/openstat/config.py +106 -0
  93. openstat_cli-1.0.0/src/openstat/dsl/__init__.py +0 -0
  94. openstat_cli-1.0.0/src/openstat/dsl/parser.py +332 -0
  95. openstat_cli-1.0.0/src/openstat/dsl/tokenizer.py +105 -0
  96. openstat_cli-1.0.0/src/openstat/i18n.py +120 -0
  97. openstat_cli-1.0.0/src/openstat/io/__init__.py +0 -0
  98. openstat_cli-1.0.0/src/openstat/io/loader.py +187 -0
  99. openstat_cli-1.0.0/src/openstat/jupyter/__init__.py +18 -0
  100. openstat_cli-1.0.0/src/openstat/jupyter/display.py +18 -0
  101. openstat_cli-1.0.0/src/openstat/jupyter/magic.py +60 -0
  102. openstat_cli-1.0.0/src/openstat/logging_config.py +59 -0
  103. openstat_cli-1.0.0/src/openstat/plots/__init__.py +0 -0
  104. openstat_cli-1.0.0/src/openstat/plots/plotter.py +437 -0
  105. openstat_cli-1.0.0/src/openstat/plots/surv_plots.py +32 -0
  106. openstat_cli-1.0.0/src/openstat/plots/ts_plots.py +59 -0
  107. openstat_cli-1.0.0/src/openstat/plugins/__init__.py +5 -0
  108. openstat_cli-1.0.0/src/openstat/plugins/manager.py +69 -0
  109. openstat_cli-1.0.0/src/openstat/repl.py +457 -0
  110. openstat_cli-1.0.0/src/openstat/reporting/__init__.py +0 -0
  111. openstat_cli-1.0.0/src/openstat/reporting/eda.py +208 -0
  112. openstat_cli-1.0.0/src/openstat/reporting/report.py +67 -0
  113. openstat_cli-1.0.0/src/openstat/script_runner.py +319 -0
  114. openstat_cli-1.0.0/src/openstat/session.py +133 -0
  115. openstat_cli-1.0.0/src/openstat/stats/__init__.py +0 -0
  116. openstat_cli-1.0.0/src/openstat/stats/advanced_regression.py +269 -0
  117. openstat_cli-1.0.0/src/openstat/stats/arch_garch.py +84 -0
  118. openstat_cli-1.0.0/src/openstat/stats/bayesian.py +103 -0
  119. openstat_cli-1.0.0/src/openstat/stats/causal.py +258 -0
  120. openstat_cli-1.0.0/src/openstat/stats/clustering.py +206 -0
  121. openstat_cli-1.0.0/src/openstat/stats/discrete.py +311 -0
  122. openstat_cli-1.0.0/src/openstat/stats/epidemiology.py +119 -0
  123. openstat_cli-1.0.0/src/openstat/stats/equiv_tobit.py +163 -0
  124. openstat_cli-1.0.0/src/openstat/stats/factor.py +174 -0
  125. openstat_cli-1.0.0/src/openstat/stats/imputation.py +282 -0
  126. openstat_cli-1.0.0/src/openstat/stats/influence.py +78 -0
  127. openstat_cli-1.0.0/src/openstat/stats/iv.py +131 -0
  128. openstat_cli-1.0.0/src/openstat/stats/manova.py +124 -0
  129. openstat_cli-1.0.0/src/openstat/stats/mixed.py +128 -0
  130. openstat_cli-1.0.0/src/openstat/stats/ml.py +275 -0
  131. openstat_cli-1.0.0/src/openstat/stats/ml_advanced.py +117 -0
  132. openstat_cli-1.0.0/src/openstat/stats/model_eval.py +183 -0
  133. openstat_cli-1.0.0/src/openstat/stats/models.py +1342 -0
  134. openstat_cli-1.0.0/src/openstat/stats/nonparametric.py +130 -0
  135. openstat_cli-1.0.0/src/openstat/stats/panel.py +179 -0
  136. openstat_cli-1.0.0/src/openstat/stats/power.py +295 -0
  137. openstat_cli-1.0.0/src/openstat/stats/resampling.py +203 -0
  138. openstat_cli-1.0.0/src/openstat/stats/survey.py +213 -0
  139. openstat_cli-1.0.0/src/openstat/stats/survival.py +196 -0
  140. openstat_cli-1.0.0/src/openstat/stats/timeseries.py +142 -0
  141. openstat_cli-1.0.0/src/openstat/stats/ts_advanced.py +114 -0
  142. openstat_cli-1.0.0/src/openstat/types.py +11 -0
  143. openstat_cli-1.0.0/src/openstat/web/__init__.py +1 -0
  144. openstat_cli-1.0.0/src/openstat/web/app.py +117 -0
  145. openstat_cli-1.0.0/src/openstat/web/session_manager.py +73 -0
  146. openstat_cli-1.0.0/src/openstat/web/static/app.js +117 -0
  147. openstat_cli-1.0.0/src/openstat/web/static/index.html +38 -0
  148. openstat_cli-1.0.0/src/openstat/web/static/style.css +103 -0
  149. openstat_cli-1.0.0/tests/__init__.py +0 -0
  150. openstat_cli-1.0.0/tests/test_advreg.py +210 -0
  151. openstat_cli-1.0.0/tests/test_backends.py +66 -0
  152. openstat_cli-1.0.0/tests/test_bayes.py +101 -0
  153. openstat_cli-1.0.0/tests/test_causal.py +118 -0
  154. openstat_cli-1.0.0/tests/test_clustering.py +181 -0
  155. openstat_cli-1.0.0/tests/test_commands.py +268 -0
  156. openstat_cli-1.0.0/tests/test_data_v040.py +160 -0
  157. openstat_cli-1.0.0/tests/test_dataquality.py +143 -0
  158. openstat_cli-1.0.0/tests/test_discrete.py +136 -0
  159. openstat_cli-1.0.0/tests/test_epidemiology.py +77 -0
  160. openstat_cli-1.0.0/tests/test_equiv_tobit.py +90 -0
  161. openstat_cli-1.0.0/tests/test_esttab.py +66 -0
  162. openstat_cli-1.0.0/tests/test_export.py +184 -0
  163. openstat_cli-1.0.0/tests/test_factor.py +212 -0
  164. openstat_cli-1.0.0/tests/test_file_formats.py +83 -0
  165. openstat_cli-1.0.0/tests/test_imputation.py +85 -0
  166. openstat_cli-1.0.0/tests/test_influence.py +81 -0
  167. openstat_cli-1.0.0/tests/test_integration.py +166 -0
  168. openstat_cli-1.0.0/tests/test_iv.py +73 -0
  169. openstat_cli-1.0.0/tests/test_jupyter.py +44 -0
  170. openstat_cli-1.0.0/tests/test_manova.py +106 -0
  171. openstat_cli-1.0.0/tests/test_mixed.py +71 -0
  172. openstat_cli-1.0.0/tests/test_ml.py +174 -0
  173. openstat_cli-1.0.0/tests/test_ml_advanced.py +101 -0
  174. openstat_cli-1.0.0/tests/test_model_eval.py +122 -0
  175. openstat_cli-1.0.0/tests/test_models.py +221 -0
  176. openstat_cli-1.0.0/tests/test_nonparam.py +187 -0
  177. openstat_cli-1.0.0/tests/test_outreg.py +67 -0
  178. openstat_cli-1.0.0/tests/test_panel.py +93 -0
  179. openstat_cli-1.0.0/tests/test_parser.py +202 -0
  180. openstat_cli-1.0.0/tests/test_plugins.py +81 -0
  181. openstat_cli-1.0.0/tests/test_power.py +177 -0
  182. openstat_cli-1.0.0/tests/test_resampling.py +106 -0
  183. openstat_cli-1.0.0/tests/test_reshape.py +195 -0
  184. openstat_cli-1.0.0/tests/test_round4.py +355 -0
  185. openstat_cli-1.0.0/tests/test_round5.py +225 -0
  186. openstat_cli-1.0.0/tests/test_round6.py +294 -0
  187. openstat_cli-1.0.0/tests/test_round7.py +274 -0
  188. openstat_cli-1.0.0/tests/test_round8.py +293 -0
  189. openstat_cli-1.0.0/tests/test_session.py +126 -0
  190. openstat_cli-1.0.0/tests/test_string_cmds.py +105 -0
  191. openstat_cli-1.0.0/tests/test_survey.py +95 -0
  192. openstat_cli-1.0.0/tests/test_survival.py +96 -0
  193. openstat_cli-1.0.0/tests/test_timeseries.py +107 -0
  194. openstat_cli-1.0.0/tests/test_ts_advanced.py +118 -0
  195. openstat_cli-1.0.0/tests/test_v020.py +648 -0
  196. openstat_cli-1.0.0/tests/test_web.py +79 -0
@@ -0,0 +1,47 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ${{ matrix.os }}
12
+ strategy:
13
+ matrix:
14
+ os: [ubuntu-latest, macos-latest]
15
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Set up Python ${{ matrix.python-version }}
20
+ uses: actions/setup-python@v5
21
+ with:
22
+ python-version: ${{ matrix.python-version }}
23
+
24
+ - name: Install dependencies
25
+ run: pip install -e ".[dev]"
26
+
27
+ - name: Run tests with coverage
28
+ run: pytest tests/ -v --tb=short --cov=openstat --cov-report=term-missing --cov-fail-under=75
29
+
30
+ - name: Run demo script
31
+ run: python -m openstat run examples/demo.ost
32
+
33
+ lint:
34
+ runs-on: ubuntu-latest
35
+ steps:
36
+ - uses: actions/checkout@v4
37
+
38
+ - name: Set up Python
39
+ uses: actions/setup-python@v5
40
+ with:
41
+ python-version: "3.12"
42
+
43
+ - name: Install dependencies
44
+ run: pip install -e ".[dev]" ruff
45
+
46
+ - name: Ruff check
47
+ run: ruff check src/ tests/
@@ -0,0 +1,40 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.egg-info/
6
+ *.egg
7
+ dist/
8
+ build/
9
+ *.whl
10
+
11
+ # Virtual environments
12
+ .venv/
13
+ venv/
14
+ env/
15
+
16
+ # Testing
17
+ .pytest_cache/
18
+ .coverage
19
+ htmlcov/
20
+ .tox/
21
+
22
+ # IDE
23
+ .vscode/
24
+ .idea/
25
+ *.swp
26
+ *.swo
27
+ *~
28
+
29
+ # Ruff
30
+ .ruff_cache/
31
+
32
+ # OS
33
+ .DS_Store
34
+ Thumbs.db
35
+
36
+ # Outputs (generated by running OpenStat)
37
+ outputs/
38
+
39
+ # Claude Code
40
+ .claude/
@@ -0,0 +1,133 @@
1
+ # Changelog
2
+
3
+ All notable changes to OpenStat are documented here.
4
+
5
+ Format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
6
+ Versioning follows [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ---
9
+
10
+ ## [0.9.0] – 2025-05-01
11
+
12
+ ### Added
13
+ - **Post-hoc tests** (`posthoc`): Tukey HSD, Bonferroni, Scheffé pairwise comparisons after ANOVA
14
+ - **Coefficient plot** (`plot coef`): horizontal error-bar plot of regression coefficients with 95% CI
15
+ - **Marginal effects plot** (`plot margins`): visualise marginal effects after `margins`
16
+ - **Interaction plot** (`plot interaction <y> <x> <moderator>`): shows regression lines by ±1 SD groups of the moderator
17
+ - **Real-time session log** (`log using <path>`): streams every command and its output to a file
18
+ - **Script runner enhancements**: `foreach`, `forvalues`, `if/else` blocks with variable substitution (`{var}`)
19
+ - **Database connectivity** (`sqlload`): load data directly from SQL databases via connection URL (requires `connectorx`)
20
+ - **SEM / CFA** (`sem`, `cfa`): structural equation modelling and confirmatory factor analysis via semopy
21
+ - **Meta-analysis** (`meta`): fixed-effects (inverse-variance) and random-effects (DerSimonian-Laird), forest and funnel plots
22
+ - **Network analysis** (`network`): build, describe, centrality, community detection, and plotting via networkx
23
+ - **Auto model selection** (`automodel`): exhaustive subset search (≤8 predictors) or forward stepwise, ranked by AIC/BIC
24
+ - **IPTW** (`iptw`): inverse probability of treatment weighting for causal inference; ATE/ATT; covariate balance table
25
+ - **Reproducibility** (`session info/save/replay`, `set seed`, `version`): full session management and script replay
26
+ - **TUI Dashboard** (`dashboard`): full-screen terminal UI with dataset overview, variable table, model results, and history (requires `textual`)
27
+ - **PyPI packaging**: `pyproject.toml` polished for release; new extras `database`, `sem`, `network`, `tui`
28
+
29
+ ### Changed
30
+ - `export docx` now includes a Summary Statistics table (N, Mean, SD, Min–Max per numeric column)
31
+ - `set` command extended with `set seed <N>` sub-command
32
+ - `log` command moved from `report_cmds` to `outreg_cmds` and upgraded to real-time streaming
33
+
34
+ ### Fixed
35
+ - `plot coef` crash when `params` is a numpy array (not a pandas Series)
36
+ - `} else {` block parsing in script runner
37
+ - `automodel` formula normalisation (space-separated predictors now work)
38
+ - `semopy.Model` API: `obj=` parameter goes in `fit()`, not `__init__()`
39
+ - Duplicate command registration for `set` and `log`
40
+
41
+ ---
42
+
43
+ ## [0.8.0] – 2025-03-01
44
+
45
+ ### Added
46
+ - **Bayesian inference** (`bayes`): MCMC sampling, posterior summaries, trace/posterior plots
47
+ - **ARCH/GARCH** (`arch`, `garch`): volatility modelling for financial time series
48
+ - **MANOVA** (`manova`): multivariate analysis of variance
49
+ - **Clustering** (`cluster kmeans`, `cluster hclust`, `discriminant`): k-means, hierarchical, LDA/QDA
50
+ - **Advanced ML** (`randomforest`, `gradientboost`, `neuralnet`, `svm`, `knn`): ensemble and deep learning models
51
+ - **Influence diagnostics** (`influence`): Cook's distance, DFFITS, leverage plots
52
+ - **Advanced regression** (`quantreg`, `truncreg`, `intreg`, `heckman`): quantile, truncated, interval, and selection models
53
+ - **Advanced time series** (`vecm`, `var`, `granger`, `threshold`): VAR, VECM, Granger causality, threshold models
54
+ - **Epidemiology** (`epi`): risk ratios, odds ratios, attributable risk, Mantel-Haenszel pooling
55
+ - **Equivalence testing** (`equiv`, `tobit`): TOST and Tobit censored regression
56
+ - **String commands** (`strreplace`, `strsplit`, `strextract`, `strpad`, `strcat`): column string manipulation
57
+ - **DSL / macro system** (`define`, `macro`, `eval`): variable macros and expressions
58
+ - **Resampling** (`bootstrap`, `jackknife`, `permtest`): resampling-based inference
59
+ - **Model evaluation** (`roc`, `calibration`, `confusion`): classification diagnostics
60
+ - **Data quality** (`missing`, `duplicates`, `outlier`, `assert`): profiling and validation
61
+ - **Reshape** (`reshape wide`, `reshape long`, `pivot`, `unpivot`): data reshaping
62
+ - **esttab** (`esttab`): publication-quality coefficient tables (LaTeX, HTML, Markdown)
63
+ - **outreg2** (`outreg2`): Word/RTF-compatible regression tables
64
+ - **Visualisation extras** (`plot violin`, `plot pairplot`, `plot parallel`, `plot density`): additional plot types
65
+
66
+ ### Changed
67
+ - Session now tracks `_last_fit_result` and `_last_fit_kwargs` for bootstrap/esttab integration
68
+
69
+ ---
70
+
71
+ ## [0.7.0] – 2025-01-01
72
+
73
+ ### Added
74
+ - **Survey analysis** (`svyset`, `svy: mean`, `svy: total`, `svy: proportion`, `svy: reg`): complex survey design
75
+ - **Multiple imputation** (`mi impute`, `mi estimate`): MICE-based imputation with pooled estimates
76
+ - **DuckDB backend** (`set backend duckdb`, `sql`): fast in-memory SQL on datasets
77
+ - **Web API** (`openstat web`): FastAPI + WebSocket server for browser-based access
78
+ - **Jupyter magic** (`%openstat`): run OpenStat commands in Jupyter notebooks
79
+ - **Plugin system** (`plugin load/list/unload`): third-party command packages
80
+
81
+ ---
82
+
83
+ ## [0.6.0] – 2024-11-01
84
+
85
+ ### Added
86
+ - **Power analysis** (`power`): t-test, ANOVA, chi-square, proportion power and sample size
87
+ - **Factor analysis** (`factor`, `pca`, `rotate`): EFA, PCA, varimax/oblimin rotation
88
+ - **IV regression** (`ivregress`): two-stage least squares
89
+ - **Mixed models** (`mixed`): linear mixed-effects models via statsmodels
90
+ - **Panel data** (`xtset`, `xtreg`, `xttest`, `hausman`): fixed/random effects, Hausman test
91
+
92
+ ---
93
+
94
+ ## [0.5.0] – 2024-09-01
95
+
96
+ ### Added
97
+ - **Survival analysis** (`stset`, `sts graph`, `stcox`, `streg`, `stsum`): Kaplan-Meier, Cox PH, AFT
98
+ - **Time series** (`tsset`, `arima`, `ardl`, `adf`, `kpss`, `forecast`): ARIMA, ARDL, unit-root tests
99
+ - **Causal inference** (`pscore`, `teffects`, `did`, `rddesign`, `synth`): propensity score, DiD, RD
100
+ - **Discrete choice** (`logit`, `probit`, `ologit`, `oprobit`, `mlogit`, `clogit`): limited dependent variable models
101
+ - **Undo/redo** (`undo`, `redo`): step-back for data transformations
102
+
103
+ ---
104
+
105
+ ## [0.4.0] – 2024-07-01
106
+
107
+ ### Added
108
+ - **Non-parametric tests** (`kruskal`, `mannwhitney`, `wilcoxon`, `friedman`, `spearman`): rank-based inference
109
+ - **Report generation** (`report html`, `report latex`): automated analysis reports
110
+ - **Plot diagnostics** (`plot diagnostics`): residuals vs fitted, Q-Q, scale-location
111
+
112
+ ---
113
+
114
+ ## [0.3.0] – 2024-05-01
115
+
116
+ ### Added
117
+ - **Data management** (`load`, `save`, `drop`, `keep`, `rename`, `encode`, `decode`, `generate`, `replace`, `sort`, `merge`, `append`, `sample`, `undo`)
118
+ - **Descriptive statistics** (`describe`, `summarize`, `tabulate`, `correlate`, `crosstab`, `anova`)
119
+ - **Regression** (`ols`, `logit`, `poisson`, `margins`, `predict`, `test`)
120
+ - **Plots** (`plot hist`, `plot scatter`, `plot line`, `plot box`, `plot bar`, `plot heatmap`, `plot acf`, `plot pacf`)
121
+ - **Output** (`export docx`, `export pptx`)
122
+ - **Configuration** (`config show`, `config set`)
123
+ - **Script runner** (`run <script.ost>`): execute .ost script files
124
+ - Interactive REPL with tab completion, syntax highlighting, and command history
125
+
126
+ ---
127
+
128
+ ## [0.1.0] – 2024-01-01
129
+
130
+ ### Added
131
+ - Initial project scaffold
132
+ - Basic REPL infrastructure
133
+ - Polars DataFrame backend
@@ -0,0 +1,116 @@
1
+ # Contributing to OpenStat
2
+
3
+ Thanks for your interest in contributing! OpenStat is an open-source statistical analysis tool and we welcome contributions of all kinds.
4
+
5
+ ## Getting Started
6
+
7
+ ```bash
8
+ # Clone the repo
9
+ git clone https://github.com/YOUR_USERNAME/openstat.git
10
+ cd openstat
11
+
12
+ # Create a virtual environment
13
+ python -m venv .venv
14
+ source .venv/bin/activate # or .venv\Scripts\activate on Windows
15
+
16
+ # Install in editable mode with dev deps
17
+ pip install -e ".[dev]"
18
+
19
+ # Run tests
20
+ pytest
21
+ ```
22
+
23
+ ## Project Structure
24
+
25
+ ```
26
+ src/openstat/
27
+ ├── cli.py # Typer CLI entry point
28
+ ├── repl.py # Interactive REPL with tab completion
29
+ ├── session.py # Session state, undo system
30
+ ├── commands/
31
+ │ ├── base.py # @command decorator, registry
32
+ │ ├── data_cmds.py # load, filter, select, derive, sort, ...
33
+ │ ├── stat_cmds.py # summarize, tabulate, corr, ols, logit, ...
34
+ │ ├── plot_cmds.py # plot hist/scatter/line/box
35
+ │ └── report_cmds.py # report, help
36
+ ├── dsl/
37
+ │ ├── tokenizer.py # Safe expression tokenizer
38
+ │ └── parser.py # Recursive descent parser (no eval!)
39
+ ├── stats/
40
+ │ └── models.py # OLS, Logit via statsmodels
41
+ ├── plots/
42
+ │ └── plotter.py # matplotlib chart generation
43
+ ├── io/
44
+ │ └── loader.py # CSV, Parquet, DTA, Excel loaders
45
+ └── reporting/
46
+ └── report.py # Markdown report generator
47
+ ```
48
+
49
+ ## Adding a New Command
50
+
51
+ 1. Pick the right module in `src/openstat/commands/` (or create a new one).
52
+ 2. Use the `@command` decorator:
53
+
54
+ ```python
55
+ from openstat.commands.base import command
56
+
57
+ @command("mycommand", usage="mycommand <arg>")
58
+ def cmd_mycommand(session, args):
59
+ """One-line description shown in help."""
60
+ df = session.require_data()
61
+ # ... your logic ...
62
+ return "Result text shown to user"
63
+ ```
64
+
65
+ 3. If you created a new module, import it in `src/openstat/commands/__init__.py`.
66
+ 4. Add tests in `tests/`.
67
+
68
+ ## Adding a DSL Function
69
+
70
+ To add a new function to the expression language (used by `filter` and `derive`):
71
+
72
+ 1. Edit `src/openstat/dsl/parser.py`
73
+ 2. Add a case in `_apply_function()`
74
+ 3. Add a test in `tests/test_parser.py`
75
+
76
+ ## Guidelines
77
+
78
+ - **No `eval()`** — all user expressions go through the safe parser
79
+ - **Snapshot before mutation** — call `session.snapshot()` before modifying `session.df`
80
+ - **Return strings** — command handlers return plain text (use `rich_to_str()` for Rich tables)
81
+ - **Friendly errors** — use `friendly_error()` to wrap exceptions
82
+ - **Test real values** — assert on actual numbers, not just "contains some string"
83
+
84
+ ## Running Tests
85
+
86
+ ```bash
87
+ # Full suite
88
+ pytest
89
+
90
+ # Verbose with specific file
91
+ pytest tests/test_commands.py -v
92
+
93
+ # With coverage
94
+ pytest --cov=openstat --cov-report=term-missing
95
+ ```
96
+
97
+ ## Code Style
98
+
99
+ We use [ruff](https://docs.astral.sh/ruff/) for linting. Before submitting:
100
+
101
+ ```bash
102
+ pip install ruff
103
+ ruff check src/ tests/
104
+ ```
105
+
106
+ ## Pull Request Process
107
+
108
+ 1. Fork the repo and create a feature branch
109
+ 2. Write tests for your changes
110
+ 3. Ensure all tests pass (`pytest`)
111
+ 4. Run `ruff check` with no errors
112
+ 5. Submit a PR with a clear description of what and why
113
+
114
+ ## License
115
+
116
+ By contributing, you agree that your contributions will be licensed under the MIT License.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 OpenStat Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.