openstat-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. openstat/__init__.py +3 -0
  2. openstat/__main__.py +4 -0
  3. openstat/backends/__init__.py +16 -0
  4. openstat/backends/duckdb_backend.py +70 -0
  5. openstat/backends/polars_backend.py +52 -0
  6. openstat/cli.py +92 -0
  7. openstat/commands/__init__.py +82 -0
  8. openstat/commands/adv_stat_cmds.py +1255 -0
  9. openstat/commands/advanced_ml_cmds.py +576 -0
  10. openstat/commands/advreg_cmds.py +207 -0
  11. openstat/commands/alias_cmds.py +135 -0
  12. openstat/commands/arch_cmds.py +82 -0
  13. openstat/commands/arules_cmds.py +111 -0
  14. openstat/commands/automodel_cmds.py +212 -0
  15. openstat/commands/backend_cmds.py +82 -0
  16. openstat/commands/base.py +170 -0
  17. openstat/commands/bayes_cmds.py +71 -0
  18. openstat/commands/causal_cmds.py +269 -0
  19. openstat/commands/cluster_cmds.py +152 -0
  20. openstat/commands/data_cmds.py +996 -0
  21. openstat/commands/datamanip_cmds.py +672 -0
  22. openstat/commands/dataquality_cmds.py +174 -0
  23. openstat/commands/datetime_cmds.py +176 -0
  24. openstat/commands/dimreduce_cmds.py +184 -0
  25. openstat/commands/discrete_cmds.py +149 -0
  26. openstat/commands/dsl_cmds.py +143 -0
  27. openstat/commands/epi_cmds.py +93 -0
  28. openstat/commands/equiv_tobit_cmds.py +94 -0
  29. openstat/commands/esttab_cmds.py +196 -0
  30. openstat/commands/export_beamer_cmds.py +142 -0
  31. openstat/commands/export_cmds.py +201 -0
  32. openstat/commands/export_extra_cmds.py +240 -0
  33. openstat/commands/factor_cmds.py +180 -0
  34. openstat/commands/groupby_cmds.py +155 -0
  35. openstat/commands/help_cmds.py +237 -0
  36. openstat/commands/i18n_cmds.py +43 -0
  37. openstat/commands/import_extra_cmds.py +561 -0
  38. openstat/commands/influence_cmds.py +134 -0
  39. openstat/commands/iv_cmds.py +106 -0
  40. openstat/commands/manova_cmds.py +105 -0
  41. openstat/commands/mediate_cmds.py +233 -0
  42. openstat/commands/meta_cmds.py +284 -0
  43. openstat/commands/mi_cmds.py +228 -0
  44. openstat/commands/mixed_cmds.py +79 -0
  45. openstat/commands/mixture_changepoint_cmds.py +166 -0
  46. openstat/commands/ml_adv_cmds.py +147 -0
  47. openstat/commands/ml_cmds.py +178 -0
  48. openstat/commands/model_eval_cmds.py +142 -0
  49. openstat/commands/network_cmds.py +288 -0
  50. openstat/commands/nlquery_cmds.py +161 -0
  51. openstat/commands/nonparam_cmds.py +149 -0
  52. openstat/commands/outreg_cmds.py +247 -0
  53. openstat/commands/panel_cmds.py +141 -0
  54. openstat/commands/pdf_cmds.py +226 -0
  55. openstat/commands/pipeline_cmds.py +319 -0
  56. openstat/commands/plot_cmds.py +189 -0
  57. openstat/commands/plugin_cmds.py +79 -0
  58. openstat/commands/posthoc_cmds.py +153 -0
  59. openstat/commands/power_cmds.py +172 -0
  60. openstat/commands/profile_cmds.py +246 -0
  61. openstat/commands/rbridge_cmds.py +81 -0
  62. openstat/commands/regex_cmds.py +104 -0
  63. openstat/commands/report_cmds.py +48 -0
  64. openstat/commands/repro_cmds.py +129 -0
  65. openstat/commands/resampling_cmds.py +109 -0
  66. openstat/commands/reshape_cmds.py +223 -0
  67. openstat/commands/sem_cmds.py +177 -0
  68. openstat/commands/stat_cmds.py +1040 -0
  69. openstat/commands/stata_import_cmds.py +215 -0
  70. openstat/commands/string_cmds.py +124 -0
  71. openstat/commands/surv_cmds.py +145 -0
  72. openstat/commands/survey_cmds.py +153 -0
  73. openstat/commands/textanalysis_cmds.py +192 -0
  74. openstat/commands/ts_adv_cmds.py +136 -0
  75. openstat/commands/ts_cmds.py +195 -0
  76. openstat/commands/tui_cmds.py +111 -0
  77. openstat/commands/ux_cmds.py +191 -0
  78. openstat/commands/validate_cmds.py +270 -0
  79. openstat/commands/viz_adv_cmds.py +312 -0
  80. openstat/commands/viz_extra_cmds.py +251 -0
  81. openstat/commands/watch_cmds.py +69 -0
  82. openstat/config.py +106 -0
  83. openstat/dsl/__init__.py +0 -0
  84. openstat/dsl/parser.py +332 -0
  85. openstat/dsl/tokenizer.py +105 -0
  86. openstat/i18n.py +120 -0
  87. openstat/io/__init__.py +0 -0
  88. openstat/io/loader.py +187 -0
  89. openstat/jupyter/__init__.py +18 -0
  90. openstat/jupyter/display.py +18 -0
  91. openstat/jupyter/magic.py +60 -0
  92. openstat/logging_config.py +59 -0
  93. openstat/plots/__init__.py +0 -0
  94. openstat/plots/plotter.py +437 -0
  95. openstat/plots/surv_plots.py +32 -0
  96. openstat/plots/ts_plots.py +59 -0
  97. openstat/plugins/__init__.py +5 -0
  98. openstat/plugins/manager.py +69 -0
  99. openstat/repl.py +457 -0
  100. openstat/reporting/__init__.py +0 -0
  101. openstat/reporting/eda.py +208 -0
  102. openstat/reporting/report.py +67 -0
  103. openstat/script_runner.py +319 -0
  104. openstat/session.py +133 -0
  105. openstat/stats/__init__.py +0 -0
  106. openstat/stats/advanced_regression.py +269 -0
  107. openstat/stats/arch_garch.py +84 -0
  108. openstat/stats/bayesian.py +103 -0
  109. openstat/stats/causal.py +258 -0
  110. openstat/stats/clustering.py +206 -0
  111. openstat/stats/discrete.py +311 -0
  112. openstat/stats/epidemiology.py +119 -0
  113. openstat/stats/equiv_tobit.py +163 -0
  114. openstat/stats/factor.py +174 -0
  115. openstat/stats/imputation.py +282 -0
  116. openstat/stats/influence.py +78 -0
  117. openstat/stats/iv.py +131 -0
  118. openstat/stats/manova.py +124 -0
  119. openstat/stats/mixed.py +128 -0
  120. openstat/stats/ml.py +275 -0
  121. openstat/stats/ml_advanced.py +117 -0
  122. openstat/stats/model_eval.py +183 -0
  123. openstat/stats/models.py +1342 -0
  124. openstat/stats/nonparametric.py +130 -0
  125. openstat/stats/panel.py +179 -0
  126. openstat/stats/power.py +295 -0
  127. openstat/stats/resampling.py +203 -0
  128. openstat/stats/survey.py +213 -0
  129. openstat/stats/survival.py +196 -0
  130. openstat/stats/timeseries.py +142 -0
  131. openstat/stats/ts_advanced.py +114 -0
  132. openstat/types.py +11 -0
  133. openstat/web/__init__.py +1 -0
  134. openstat/web/app.py +117 -0
  135. openstat/web/session_manager.py +73 -0
  136. openstat/web/static/app.js +117 -0
  137. openstat/web/static/index.html +38 -0
  138. openstat/web/static/style.css +103 -0
  139. openstat_cli-1.0.0.dist-info/METADATA +748 -0
  140. openstat_cli-1.0.0.dist-info/RECORD +143 -0
  141. openstat_cli-1.0.0.dist-info/WHEEL +4 -0
  142. openstat_cli-1.0.0.dist-info/entry_points.txt +2 -0
  143. openstat_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
openstat/io/loader.py ADDED
@@ -0,0 +1,187 @@
1
+ """Data loading and saving: CSV, Parquet, Stata (.dta), Excel (.xlsx)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ import polars as pl
8
+
9
+ from openstat.config import get_config
10
+ from openstat.logging_config import get_logger
11
+
12
+ log = get_logger("io")
13
+
14
+
15
+ def _load_dta(path: Path, session=None) -> pl.DataFrame:
16
+ """Load a Stata .dta file using pyreadstat (optional dependency)."""
17
+ try:
18
+ import pyreadstat
19
+ except ImportError:
20
+ raise ImportError(
21
+ "Reading .dta files requires pyreadstat. "
22
+ "Install it with: pip install openstat[stata]"
23
+ )
24
+ pandas_df, meta = pyreadstat.read_dta(str(path))
25
+ if session is not None and meta.variable_value_labels:
26
+ session._variable_labels = meta.variable_value_labels
27
+ return pl.from_pandas(pandas_df)
28
+
29
+
30
+ def _load_sas7bdat(path: Path, session=None) -> pl.DataFrame:
31
+ """Load a SAS .sas7bdat file using pyreadstat (optional dependency)."""
32
+ try:
33
+ import pyreadstat
34
+ except ImportError:
35
+ raise ImportError(
36
+ "Reading .sas7bdat files requires pyreadstat. "
37
+ "Install it with: pip install openstat[sas]"
38
+ )
39
+ pandas_df, meta = pyreadstat.read_sas7bdat(str(path))
40
+ if session is not None and meta.variable_value_labels:
41
+ session._variable_labels = meta.variable_value_labels
42
+ return pl.from_pandas(pandas_df)
43
+
44
+
45
+ def _load_sav(path: Path, session=None) -> pl.DataFrame:
46
+ """Load an SPSS .sav file using pyreadstat (optional dependency)."""
47
+ try:
48
+ import pyreadstat
49
+ except ImportError:
50
+ raise ImportError(
51
+ "Reading .sav files requires pyreadstat. "
52
+ "Install it with: pip install openstat[spss]"
53
+ )
54
+ pandas_df, meta = pyreadstat.read_sav(str(path))
55
+ if session is not None and meta.variable_value_labels:
56
+ session._variable_labels = meta.variable_value_labels
57
+ return pl.from_pandas(pandas_df)
58
+
59
+
60
+ def _load_excel(path: Path, sheet: str | int | None = None) -> pl.DataFrame:
61
+ """Load an Excel file (optional dependency).
62
+
63
+ Args:
64
+ sheet: Sheet name or 0-based index. If None, loads the first sheet.
65
+ Pass 'list' to list available sheet names.
66
+ """
67
+ try:
68
+ import openpyxl
69
+ except ImportError:
70
+ raise ImportError(
71
+ "Reading .xlsx files requires openpyxl. "
72
+ "Install it with: pip install openstat[excel]"
73
+ )
74
+
75
+ if sheet == "list":
76
+ wb = openpyxl.load_workbook(str(path), read_only=True, data_only=True)
77
+ names = wb.sheetnames
78
+ wb.close()
79
+ raise ValueError(f"Sheets in {path.name}: {', '.join(names)}")
80
+
81
+ kwargs: dict = {}
82
+ if sheet is not None:
83
+ kwargs["sheet_name"] = sheet
84
+ try:
85
+ return pl.read_excel(path, **kwargs)
86
+ except TypeError:
87
+ # Older polars may not support sheet_name
88
+ return pl.read_excel(path)
89
+
90
+
91
+ def _load_csv(path: Path, session=None) -> pl.DataFrame:
92
+ cfg = get_config()
93
+ return pl.read_csv(
94
+ path,
95
+ infer_schema_length=cfg.infer_schema_length,
96
+ separator=cfg.csv_separator,
97
+ )
98
+
99
+
100
+ _LOADERS = {
101
+ ".csv": _load_csv,
102
+ ".parquet": lambda p, session=None: pl.read_parquet(p),
103
+ ".dta": _load_dta,
104
+ ".xlsx": lambda p, session=None: _load_excel(p),
105
+ ".xls": lambda p, session=None: _load_excel(p),
106
+ ".sas7bdat": _load_sas7bdat,
107
+ ".sav": _load_sav,
108
+ }
109
+
110
+ _SUPPORTED = ", ".join(_LOADERS.keys())
111
+
112
+
113
+ def load_file(path: str | Path, session=None) -> pl.DataFrame:
114
+ """Load a data file into a Polars DataFrame."""
115
+ p = Path(path)
116
+ if not p.exists():
117
+ raise FileNotFoundError(f"File not found: {p}")
118
+
119
+ suffix = p.suffix.lower()
120
+ loader = _LOADERS.get(suffix)
121
+ if loader is None:
122
+ raise ValueError(f"Unsupported file format: {suffix} (supported: {_SUPPORTED})")
123
+ log.info("Loading %s (%s, %.1f KB)", p.name, suffix, p.stat().st_size / 1024)
124
+ df = loader(p, session=session)
125
+ log.info("Loaded %d rows x %d cols", df.height, df.width)
126
+ return df
127
+
128
+
129
+ def _save_excel(df: pl.DataFrame, path: Path) -> None:
130
+ """Save to Excel (optional dependency)."""
131
+ try:
132
+ df.write_excel(path)
133
+ except (ImportError, ModuleNotFoundError):
134
+ raise ImportError(
135
+ "Writing .xlsx files requires xlsxwriter. "
136
+ "Install it with: pip install openstat[excel]"
137
+ )
138
+
139
+
140
+ def _save_dta(df: pl.DataFrame, path: Path) -> None:
141
+ """Save to Stata .dta (optional dependency)."""
142
+ try:
143
+ import pyreadstat
144
+ except ImportError:
145
+ raise ImportError(
146
+ "Writing .dta files requires pyreadstat. "
147
+ "Install it with: pip install openstat[stata]"
148
+ )
149
+ pandas_df = df.to_pandas()
150
+ pyreadstat.write_dta(pandas_df, str(path))
151
+
152
+
153
+ def _save_sav(df: pl.DataFrame, path: Path) -> None:
154
+ """Save to SPSS .sav (optional dependency)."""
155
+ try:
156
+ import pyreadstat
157
+ except ImportError:
158
+ raise ImportError(
159
+ "Writing .sav files requires pyreadstat. "
160
+ "Install it with: pip install openstat[spss]"
161
+ )
162
+ pandas_df = df.to_pandas()
163
+ pyreadstat.write_sav(pandas_df, str(path))
164
+
165
+
166
+ def save_file(df: pl.DataFrame, path: str | Path) -> Path:
167
+ """Save a DataFrame to CSV, Parquet, Excel, Stata, or SPSS."""
168
+ p = Path(path)
169
+ p.parent.mkdir(parents=True, exist_ok=True)
170
+
171
+ suffix = p.suffix.lower()
172
+ if suffix == ".csv":
173
+ df.write_csv(p, separator=get_config().csv_separator)
174
+ elif suffix == ".parquet":
175
+ df.write_parquet(p)
176
+ elif suffix == ".xlsx":
177
+ _save_excel(df, p)
178
+ elif suffix == ".dta":
179
+ _save_dta(df, p)
180
+ elif suffix == ".sav":
181
+ _save_sav(df, p)
182
+ else:
183
+ raise ValueError(
184
+ f"Unsupported save format: {suffix} "
185
+ "(use .csv, .parquet, .xlsx, .dta, or .sav)"
186
+ )
187
+ return p
@@ -0,0 +1,18 @@
1
+ """Jupyter notebook integration for OpenStat.
2
+
3
+ Usage in Jupyter:
4
+ %load_ext openstat
5
+ %ost load data.csv
6
+ %ost summarize
7
+
8
+ %%openstat
9
+ load data.csv
10
+ summarize
11
+ ols y ~ x1 + x2
12
+ """
13
+
14
+
15
+ def load_ipython_extension(ipython):
16
+ """Register OpenStat magics when %load_ext openstat is called."""
17
+ from openstat.jupyter.magic import OpenStatMagics
18
+ ipython.register_magics(OpenStatMagics)
@@ -0,0 +1,18 @@
1
+ """Jupyter display helpers for OpenStat."""
2
+
3
+ from __future__ import annotations
4
+
5
+
6
+ def fit_result_to_html(result) -> str:
7
+ """Convert a FitResult to HTML table for Jupyter rendering."""
8
+ from rich.console import Console
9
+ console = Console(record=True, width=120)
10
+ text = result.summary_table()
11
+ console.print(text)
12
+ return console.export_html(inline_styles=True)
13
+
14
+
15
+ def dataframe_to_html(df, max_rows: int = 50) -> str:
16
+ """Convert a Polars DataFrame to styled HTML for Jupyter."""
17
+ pdf = df.head(max_rows).to_pandas()
18
+ return pdf.to_html(classes="openstat-table", border=0, index=False)
@@ -0,0 +1,60 @@
1
+ """IPython magic commands for OpenStat in Jupyter notebooks."""
2
+
3
+ from __future__ import annotations
4
+
5
+ try:
6
+ from IPython.core.magic import Magics, magics_class, cell_magic, line_magic
7
+ from IPython.display import display, HTML
8
+ HAS_IPYTHON = True
9
+ except ImportError:
10
+ HAS_IPYTHON = False
11
+
12
+ from openstat.session import Session
13
+
14
+
15
+ def _dispatch_line(session: Session, line: str) -> str | None:
16
+ """Dispatch a single command line using the REPL dispatcher."""
17
+ from openstat.repl import _dispatch
18
+ return _dispatch(session, line)
19
+
20
+
21
+ def _rich_to_html(text: str) -> str:
22
+ """Convert Rich-styled text to HTML."""
23
+ from rich.console import Console
24
+ console = Console(record=True, width=120)
25
+ console.print(text)
26
+ return console.export_html(inline_styles=True)
27
+
28
+
29
+ if HAS_IPYTHON:
30
+ @magics_class
31
+ class OpenStatMagics(Magics):
32
+ """IPython magics for OpenStat: %ost and %%openstat."""
33
+
34
+ def __init__(self, shell):
35
+ super().__init__(shell)
36
+ self.session = Session()
37
+
38
+ @line_magic
39
+ def ost(self, line):
40
+ """Run a single OpenStat command: %ost load data.csv"""
41
+ result = _dispatch_line(self.session, line)
42
+ if result and result != "__QUIT__":
43
+ display(HTML(f"<pre>{_rich_to_html(result)}</pre>"))
44
+
45
+ @cell_magic
46
+ def openstat(self, line, cell):
47
+ """Run multiple OpenStat commands in a cell."""
48
+ for cmd_line in cell.strip().split('\n'):
49
+ cmd_line = cmd_line.strip()
50
+ if not cmd_line or cmd_line.startswith('#'):
51
+ continue
52
+ result = _dispatch_line(self.session, cmd_line)
53
+ if result == "__QUIT__":
54
+ break
55
+ if result:
56
+ display(HTML(f"<pre>{_rich_to_html(result)}</pre>"))
57
+ else:
58
+ class OpenStatMagics:
59
+ """Stub when IPython is not available."""
60
+ pass
@@ -0,0 +1,59 @@
1
+ """Logging configuration for OpenStat."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import sys
7
+ from pathlib import Path
8
+
9
+ _LOG_DIR = Path.home() / ".openstat" / "logs"
10
+ _LOG_FORMAT = "%(asctime)s [%(levelname)s] %(name)s: %(message)s"
11
+ _LOG_DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
12
+
13
+ _configured = False
14
+
15
+
16
+ def setup_logging(*, verbose: bool = False, debug: bool = False) -> None:
17
+ """Configure logging for OpenStat.
18
+
19
+ - Default: WARNING to file only
20
+ - --verbose: INFO to file + stderr
21
+ - --debug: DEBUG to file + stderr
22
+ """
23
+ global _configured
24
+ if _configured:
25
+ return
26
+ _configured = True
27
+
28
+ root = logging.getLogger("openstat")
29
+
30
+ if debug:
31
+ level = logging.DEBUG
32
+ elif verbose:
33
+ level = logging.INFO
34
+ else:
35
+ level = logging.WARNING
36
+
37
+ root.setLevel(logging.DEBUG) # capture everything, handlers filter
38
+
39
+ # File handler — always active, always DEBUG
40
+ try:
41
+ _LOG_DIR.mkdir(parents=True, exist_ok=True)
42
+ fh = logging.FileHandler(_LOG_DIR / "openstat.log", encoding="utf-8")
43
+ fh.setLevel(logging.DEBUG)
44
+ fh.setFormatter(logging.Formatter(_LOG_FORMAT, datefmt=_LOG_DATE_FORMAT))
45
+ root.addHandler(fh)
46
+ except OSError:
47
+ pass # if we can't write logs, don't crash
48
+
49
+ # Console handler — only if verbose/debug
50
+ if verbose or debug:
51
+ ch = logging.StreamHandler(sys.stderr)
52
+ ch.setLevel(level)
53
+ ch.setFormatter(logging.Formatter(_LOG_FORMAT, datefmt=_LOG_DATE_FORMAT))
54
+ root.addHandler(ch)
55
+
56
+
57
+ def get_logger(name: str) -> logging.Logger:
58
+ """Get a logger under the openstat namespace."""
59
+ return logging.getLogger(f"openstat.{name}")
File without changes