openstat-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. openstat/__init__.py +3 -0
  2. openstat/__main__.py +4 -0
  3. openstat/backends/__init__.py +16 -0
  4. openstat/backends/duckdb_backend.py +70 -0
  5. openstat/backends/polars_backend.py +52 -0
  6. openstat/cli.py +92 -0
  7. openstat/commands/__init__.py +82 -0
  8. openstat/commands/adv_stat_cmds.py +1255 -0
  9. openstat/commands/advanced_ml_cmds.py +576 -0
  10. openstat/commands/advreg_cmds.py +207 -0
  11. openstat/commands/alias_cmds.py +135 -0
  12. openstat/commands/arch_cmds.py +82 -0
  13. openstat/commands/arules_cmds.py +111 -0
  14. openstat/commands/automodel_cmds.py +212 -0
  15. openstat/commands/backend_cmds.py +82 -0
  16. openstat/commands/base.py +170 -0
  17. openstat/commands/bayes_cmds.py +71 -0
  18. openstat/commands/causal_cmds.py +269 -0
  19. openstat/commands/cluster_cmds.py +152 -0
  20. openstat/commands/data_cmds.py +996 -0
  21. openstat/commands/datamanip_cmds.py +672 -0
  22. openstat/commands/dataquality_cmds.py +174 -0
  23. openstat/commands/datetime_cmds.py +176 -0
  24. openstat/commands/dimreduce_cmds.py +184 -0
  25. openstat/commands/discrete_cmds.py +149 -0
  26. openstat/commands/dsl_cmds.py +143 -0
  27. openstat/commands/epi_cmds.py +93 -0
  28. openstat/commands/equiv_tobit_cmds.py +94 -0
  29. openstat/commands/esttab_cmds.py +196 -0
  30. openstat/commands/export_beamer_cmds.py +142 -0
  31. openstat/commands/export_cmds.py +201 -0
  32. openstat/commands/export_extra_cmds.py +240 -0
  33. openstat/commands/factor_cmds.py +180 -0
  34. openstat/commands/groupby_cmds.py +155 -0
  35. openstat/commands/help_cmds.py +237 -0
  36. openstat/commands/i18n_cmds.py +43 -0
  37. openstat/commands/import_extra_cmds.py +561 -0
  38. openstat/commands/influence_cmds.py +134 -0
  39. openstat/commands/iv_cmds.py +106 -0
  40. openstat/commands/manova_cmds.py +105 -0
  41. openstat/commands/mediate_cmds.py +233 -0
  42. openstat/commands/meta_cmds.py +284 -0
  43. openstat/commands/mi_cmds.py +228 -0
  44. openstat/commands/mixed_cmds.py +79 -0
  45. openstat/commands/mixture_changepoint_cmds.py +166 -0
  46. openstat/commands/ml_adv_cmds.py +147 -0
  47. openstat/commands/ml_cmds.py +178 -0
  48. openstat/commands/model_eval_cmds.py +142 -0
  49. openstat/commands/network_cmds.py +288 -0
  50. openstat/commands/nlquery_cmds.py +161 -0
  51. openstat/commands/nonparam_cmds.py +149 -0
  52. openstat/commands/outreg_cmds.py +247 -0
  53. openstat/commands/panel_cmds.py +141 -0
  54. openstat/commands/pdf_cmds.py +226 -0
  55. openstat/commands/pipeline_cmds.py +319 -0
  56. openstat/commands/plot_cmds.py +189 -0
  57. openstat/commands/plugin_cmds.py +79 -0
  58. openstat/commands/posthoc_cmds.py +153 -0
  59. openstat/commands/power_cmds.py +172 -0
  60. openstat/commands/profile_cmds.py +246 -0
  61. openstat/commands/rbridge_cmds.py +81 -0
  62. openstat/commands/regex_cmds.py +104 -0
  63. openstat/commands/report_cmds.py +48 -0
  64. openstat/commands/repro_cmds.py +129 -0
  65. openstat/commands/resampling_cmds.py +109 -0
  66. openstat/commands/reshape_cmds.py +223 -0
  67. openstat/commands/sem_cmds.py +177 -0
  68. openstat/commands/stat_cmds.py +1040 -0
  69. openstat/commands/stata_import_cmds.py +215 -0
  70. openstat/commands/string_cmds.py +124 -0
  71. openstat/commands/surv_cmds.py +145 -0
  72. openstat/commands/survey_cmds.py +153 -0
  73. openstat/commands/textanalysis_cmds.py +192 -0
  74. openstat/commands/ts_adv_cmds.py +136 -0
  75. openstat/commands/ts_cmds.py +195 -0
  76. openstat/commands/tui_cmds.py +111 -0
  77. openstat/commands/ux_cmds.py +191 -0
  78. openstat/commands/validate_cmds.py +270 -0
  79. openstat/commands/viz_adv_cmds.py +312 -0
  80. openstat/commands/viz_extra_cmds.py +251 -0
  81. openstat/commands/watch_cmds.py +69 -0
  82. openstat/config.py +106 -0
  83. openstat/dsl/__init__.py +0 -0
  84. openstat/dsl/parser.py +332 -0
  85. openstat/dsl/tokenizer.py +105 -0
  86. openstat/i18n.py +120 -0
  87. openstat/io/__init__.py +0 -0
  88. openstat/io/loader.py +187 -0
  89. openstat/jupyter/__init__.py +18 -0
  90. openstat/jupyter/display.py +18 -0
  91. openstat/jupyter/magic.py +60 -0
  92. openstat/logging_config.py +59 -0
  93. openstat/plots/__init__.py +0 -0
  94. openstat/plots/plotter.py +437 -0
  95. openstat/plots/surv_plots.py +32 -0
  96. openstat/plots/ts_plots.py +59 -0
  97. openstat/plugins/__init__.py +5 -0
  98. openstat/plugins/manager.py +69 -0
  99. openstat/repl.py +457 -0
  100. openstat/reporting/__init__.py +0 -0
  101. openstat/reporting/eda.py +208 -0
  102. openstat/reporting/report.py +67 -0
  103. openstat/script_runner.py +319 -0
  104. openstat/session.py +133 -0
  105. openstat/stats/__init__.py +0 -0
  106. openstat/stats/advanced_regression.py +269 -0
  107. openstat/stats/arch_garch.py +84 -0
  108. openstat/stats/bayesian.py +103 -0
  109. openstat/stats/causal.py +258 -0
  110. openstat/stats/clustering.py +206 -0
  111. openstat/stats/discrete.py +311 -0
  112. openstat/stats/epidemiology.py +119 -0
  113. openstat/stats/equiv_tobit.py +163 -0
  114. openstat/stats/factor.py +174 -0
  115. openstat/stats/imputation.py +282 -0
  116. openstat/stats/influence.py +78 -0
  117. openstat/stats/iv.py +131 -0
  118. openstat/stats/manova.py +124 -0
  119. openstat/stats/mixed.py +128 -0
  120. openstat/stats/ml.py +275 -0
  121. openstat/stats/ml_advanced.py +117 -0
  122. openstat/stats/model_eval.py +183 -0
  123. openstat/stats/models.py +1342 -0
  124. openstat/stats/nonparametric.py +130 -0
  125. openstat/stats/panel.py +179 -0
  126. openstat/stats/power.py +295 -0
  127. openstat/stats/resampling.py +203 -0
  128. openstat/stats/survey.py +213 -0
  129. openstat/stats/survival.py +196 -0
  130. openstat/stats/timeseries.py +142 -0
  131. openstat/stats/ts_advanced.py +114 -0
  132. openstat/types.py +11 -0
  133. openstat/web/__init__.py +1 -0
  134. openstat/web/app.py +117 -0
  135. openstat/web/session_manager.py +73 -0
  136. openstat/web/static/app.js +117 -0
  137. openstat/web/static/index.html +38 -0
  138. openstat/web/static/style.css +103 -0
  139. openstat_cli-1.0.0.dist-info/METADATA +748 -0
  140. openstat_cli-1.0.0.dist-info/RECORD +143 -0
  141. openstat_cli-1.0.0.dist-info/WHEEL +4 -0
  142. openstat_cli-1.0.0.dist-info/entry_points.txt +2 -0
  143. openstat_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
openstat/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """OpenStat — Open-source statistical analysis tool."""
2
+
3
+ __version__ = "0.9.0"
openstat/__main__.py ADDED
@@ -0,0 +1,4 @@
1
+ """Allow running as `python -m openstat`."""
2
+ from openstat.cli import app
3
+
4
+ app()
@@ -0,0 +1,16 @@
1
+ """Data backend abstraction: Polars (default) and DuckDB."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Protocol
6
+
7
+ import polars as pl
8
+
9
+
10
+ class DataBackend(Protocol):
11
+ """Protocol for data backends."""
12
+
13
+ def load(self, path: str) -> None: ...
14
+ def to_polars(self) -> pl.DataFrame: ...
15
+ def sql(self, query: str) -> pl.DataFrame: ...
16
+ def shape(self) -> tuple[int, int]: ...
@@ -0,0 +1,70 @@
1
+ """DuckDB backend for large dataset processing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ import polars as pl
8
+
9
+
10
+ def _try_import_duckdb():
11
+ try:
12
+ import duckdb
13
+ return duckdb
14
+ except ImportError:
15
+ raise ImportError(
16
+ "DuckDB backend requires duckdb. "
17
+ "Install it with: pip install openstat[duckdb]"
18
+ )
19
+
20
+
21
+ class DuckDBBackend:
22
+ """DuckDB-based backend for large datasets."""
23
+
24
+ def __init__(self) -> None:
25
+ duckdb = _try_import_duckdb()
26
+ self._conn = duckdb.connect()
27
+ self._table_loaded = False
28
+
29
+ def load(self, path: str) -> None:
30
+ p = Path(path)
31
+ suffix = p.suffix.lower()
32
+ path_str = str(p).replace("'", "''")
33
+
34
+ if suffix == ".csv":
35
+ self._conn.execute(f"CREATE OR REPLACE TABLE data AS SELECT * FROM read_csv('{path_str}')")
36
+ elif suffix == ".parquet":
37
+ self._conn.execute(f"CREATE OR REPLACE TABLE data AS SELECT * FROM read_parquet('{path_str}')")
38
+ elif suffix in (".xlsx", ".xls"):
39
+ # DuckDB doesn't natively support Excel; load via Polars and register
40
+ df = pl.read_excel(p)
41
+ self._conn.register("data", df.to_pandas())
42
+ else:
43
+ from openstat.io.loader import load_file
44
+ df = load_file(path)
45
+ self._conn.register("data", df.to_pandas())
46
+
47
+ self._table_loaded = True
48
+
49
+ def to_polars(self) -> pl.DataFrame:
50
+ if not self._table_loaded:
51
+ raise RuntimeError("No data loaded")
52
+ return self._conn.execute("SELECT * FROM data").pl()
53
+
54
+ def sql(self, query: str) -> pl.DataFrame:
55
+ if not self._table_loaded:
56
+ raise RuntimeError("No data loaded. Use 'load' first.")
57
+ return self._conn.execute(query).pl()
58
+
59
+ def shape(self) -> tuple[int, int]:
60
+ if not self._table_loaded:
61
+ return (0, 0)
62
+ result = self._conn.execute("SELECT COUNT(*) as n FROM data").fetchone()
63
+ n_rows = result[0] if result else 0
64
+ cols = self._conn.execute("SELECT * FROM data LIMIT 0").description
65
+ n_cols = len(cols) if cols else 0
66
+ return (n_rows, n_cols)
67
+
68
+ def execute(self, query: str):
69
+ """Execute raw SQL and return DuckDB result."""
70
+ return self._conn.execute(query)
@@ -0,0 +1,52 @@
1
+ """Polars backend — wraps the default Polars DataFrame operations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ import polars as pl
8
+
9
+ from openstat.config import get_config
10
+
11
+
12
+ class PolarsBackend:
13
+ """Default backend using Polars DataFrames."""
14
+
15
+ def __init__(self) -> None:
16
+ self._df: pl.DataFrame | None = None
17
+ self._lazy: pl.LazyFrame | None = None
18
+
19
+ def load(self, path: str) -> None:
20
+ p = Path(path)
21
+ suffix = p.suffix.lower()
22
+ cfg = get_config()
23
+
24
+ # Use LazyFrame for scan-capable formats
25
+ if suffix == ".csv":
26
+ self._lazy = pl.scan_csv(
27
+ p,
28
+ infer_schema_length=cfg.infer_schema_length,
29
+ )
30
+ elif suffix == ".parquet":
31
+ self._lazy = pl.scan_parquet(p)
32
+ else:
33
+ # For other formats, load eagerly
34
+ from openstat.io.loader import load_file
35
+ self._df = load_file(path)
36
+ self._lazy = self._df.lazy()
37
+
38
+ def to_polars(self) -> pl.DataFrame:
39
+ if self._df is not None:
40
+ return self._df
41
+ if self._lazy is not None:
42
+ self._df = self._lazy.collect()
43
+ return self._df
44
+ raise RuntimeError("No data loaded")
45
+
46
+ def sql(self, query: str) -> pl.DataFrame:
47
+ df = self.to_polars()
48
+ return pl.SQLContext({"data": df}).execute(query).collect()
49
+
50
+ def shape(self) -> tuple[int, int]:
51
+ df = self.to_polars()
52
+ return df.shape
openstat/cli.py ADDED
@@ -0,0 +1,92 @@
1
+ """CLI entry point for OpenStat (Typer-based)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ import typer
8
+ from rich.console import Console
9
+
10
+ from openstat import __version__
11
+
12
+ app = typer.Typer(
13
+ name="openstat",
14
+ help="OpenStat — Open-source statistical analysis tool.",
15
+ add_completion=False,
16
+ no_args_is_help=True,
17
+ )
18
+ console = Console()
19
+
20
+
21
+ def _version_callback(value: bool) -> None:
22
+ if value:
23
+ console.print(f"openstat {__version__}")
24
+ raise typer.Exit()
25
+
26
+
27
+ @app.callback()
28
+ def main(
29
+ version: bool = typer.Option(
30
+ False, "--version", "-V",
31
+ callback=_version_callback,
32
+ is_eager=True,
33
+ help="Show version and exit.",
34
+ ),
35
+ verbose: bool = typer.Option(
36
+ False, "--verbose", "-v",
37
+ help="Enable verbose logging (INFO level).",
38
+ ),
39
+ debug: bool = typer.Option(
40
+ False, "--debug",
41
+ help="Enable debug logging (DEBUG level).",
42
+ ),
43
+ ) -> None:
44
+ """OpenStat — Open-source statistical analysis tool."""
45
+ from openstat.logging_config import setup_logging
46
+ setup_logging(verbose=verbose, debug=debug)
47
+
48
+
49
+ @app.command()
50
+ def repl() -> None:
51
+ """Start the interactive REPL."""
52
+ from openstat.repl import run_repl
53
+ run_repl()
54
+
55
+
56
+ @app.command()
57
+ def run(
58
+ script: Path = typer.Argument(..., help="Path to an .ost script file."),
59
+ strict: bool = typer.Option(
60
+ False, "--strict",
61
+ help="Stop on first error and exit with code 1.",
62
+ ),
63
+ ) -> None:
64
+ """Execute an .ost script file."""
65
+ if not script.exists():
66
+ console.print(f"[red]File not found: {script}[/red]")
67
+ raise typer.Exit(1)
68
+ from openstat.repl import run_script
69
+ run_script(str(script), strict=strict)
70
+
71
+
72
+ @app.command()
73
+ def serve(
74
+ port: int = typer.Option(8080, help="Port to listen on."),
75
+ host: str = typer.Option("127.0.0.1", help="Host to bind to."),
76
+ ) -> None:
77
+ """Start the web-based GUI."""
78
+ try:
79
+ import uvicorn
80
+ except ImportError:
81
+ console.print("[red]Web GUI requires: pip install openstat[web][/red]")
82
+ raise typer.Exit(1)
83
+ from openstat.web.app import app as web_app
84
+ if web_app is None:
85
+ console.print("[red]Web GUI requires: pip install openstat[web][/red]")
86
+ raise typer.Exit(1)
87
+ console.print(f"[bold cyan]OpenStat Web[/bold cyan] starting on http://{host}:{port}")
88
+ uvicorn.run(web_app, host=host, port=port, log_level="info")
89
+
90
+
91
+ if __name__ == "__main__":
92
+ app()
@@ -0,0 +1,82 @@
1
+ """Command package — import all command modules to register them."""
2
+
3
+ # Importing these modules triggers the @command decorators,
4
+ # which populate the global registry in base.py.
5
+ from openstat.commands import data_cmds # noqa: F401
6
+ from openstat.commands import stat_cmds # noqa: F401
7
+ from openstat.commands import plot_cmds # noqa: F401
8
+ from openstat.commands import report_cmds # noqa: F401
9
+ from openstat.commands import plugin_cmds # noqa: F401
10
+ from openstat.commands import panel_cmds # noqa: F401
11
+ from openstat.commands import iv_cmds # noqa: F401
12
+ from openstat.commands import mixed_cmds # noqa: F401
13
+ from openstat.commands import ts_cmds # noqa: F401
14
+ from openstat.commands import surv_cmds # noqa: F401
15
+ from openstat.commands import mi_cmds # noqa: F401
16
+ from openstat.commands import survey_cmds # noqa: F401
17
+ from openstat.commands import backend_cmds # noqa: F401
18
+ from openstat.commands import discrete_cmds # noqa: F401
19
+ from openstat.commands import causal_cmds # noqa: F401
20
+ from openstat.commands import power_cmds # noqa: F401
21
+ from openstat.commands import factor_cmds # noqa: F401
22
+ from openstat.commands import export_cmds # noqa: F401
23
+ from openstat.commands import nonparam_cmds # noqa: F401
24
+ from openstat.commands import ml_cmds # noqa: F401
25
+ from openstat.commands import cluster_cmds # noqa: F401
26
+ from openstat.commands import manova_cmds # noqa: F401
27
+ from openstat.commands import arch_cmds # noqa: F401
28
+ from openstat.commands import bayes_cmds # noqa: F401
29
+ from openstat.commands import reshape_cmds # noqa: F401
30
+ from openstat.commands import advreg_cmds # noqa: F401
31
+ from openstat.commands import ts_adv_cmds # noqa: F401
32
+ from openstat.commands import influence_cmds # noqa: F401
33
+ from openstat.commands import ml_adv_cmds # noqa: F401
34
+ from openstat.commands import esttab_cmds # noqa: F401
35
+ from openstat.commands import string_cmds # noqa: F401
36
+ from openstat.commands import epi_cmds # noqa: F401
37
+ from openstat.commands import dsl_cmds # noqa: F401
38
+ from openstat.commands import resampling_cmds # noqa: F401
39
+ from openstat.commands import model_eval_cmds # noqa: F401
40
+ from openstat.commands import dataquality_cmds # noqa: F401
41
+ from openstat.commands import outreg_cmds # noqa: F401
42
+ from openstat.commands import equiv_tobit_cmds # noqa: F401
43
+ from openstat.commands import viz_extra_cmds # noqa: F401
44
+ from openstat.commands import posthoc_cmds # noqa: F401
45
+ from openstat.commands import sem_cmds # noqa: F401
46
+ from openstat.commands import meta_cmds # noqa: F401
47
+ from openstat.commands import network_cmds # noqa: F401
48
+ from openstat.commands import automodel_cmds # noqa: F401
49
+ from openstat.commands import repro_cmds # noqa: F401
50
+ from openstat.commands import tui_cmds # noqa: F401
51
+ from openstat.commands import i18n_cmds # noqa: F401
52
+ from openstat.commands import mediate_cmds # noqa: F401
53
+ from openstat.commands import validate_cmds # noqa: F401
54
+ from openstat.commands import regex_cmds # noqa: F401
55
+ from openstat.commands import alias_cmds # noqa: F401
56
+ from openstat.commands import pdf_cmds # noqa: F401
57
+ from openstat.commands import watch_cmds # noqa: F401
58
+ from openstat.commands import rbridge_cmds # noqa: F401
59
+ from openstat.commands import nlquery_cmds # noqa: F401
60
+ from openstat.commands import stata_import_cmds # noqa: F401
61
+ from openstat.commands import help_cmds # noqa: F401
62
+ from openstat.commands import datetime_cmds # noqa: F401
63
+ from openstat.commands import groupby_cmds # noqa: F401
64
+ from openstat.commands import profile_cmds # noqa: F401
65
+ from openstat.commands import advanced_ml_cmds # noqa: F401
66
+ from openstat.commands import viz_adv_cmds # noqa: F401
67
+ from openstat.commands import pipeline_cmds # noqa: F401
68
+ from openstat.commands import export_extra_cmds # noqa: F401
69
+ from openstat.commands import datamanip_cmds # noqa: F401
70
+ from openstat.commands import import_extra_cmds # noqa: F401
71
+ from openstat.commands import adv_stat_cmds # noqa: F401
72
+ from openstat.commands import mixture_changepoint_cmds # noqa: F401
73
+ from openstat.commands import dimreduce_cmds # noqa: F401
74
+ from openstat.commands import arules_cmds # noqa: F401
75
+ from openstat.commands import textanalysis_cmds # noqa: F401
76
+ from openstat.commands import ux_cmds # noqa: F401
77
+ from openstat.commands import export_beamer_cmds # noqa: F401
78
+
79
+ from openstat.commands.base import get_registry
80
+
81
+ # Public API — the COMMANDS dict used by the REPL
82
+ COMMANDS = get_registry()