PyPI - openstat-cli - Versions diffs - 1.0.0__py3-none-any.whl - Mend

openstat-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (143) hide show

openstat/__init__.py +3 -0
openstat/__main__.py +4 -0
openstat/backends/__init__.py +16 -0
openstat/backends/duckdb_backend.py +70 -0
openstat/backends/polars_backend.py +52 -0
openstat/cli.py +92 -0
openstat/commands/__init__.py +82 -0
openstat/commands/adv_stat_cmds.py +1255 -0
openstat/commands/advanced_ml_cmds.py +576 -0
openstat/commands/advreg_cmds.py +207 -0
openstat/commands/alias_cmds.py +135 -0
openstat/commands/arch_cmds.py +82 -0
openstat/commands/arules_cmds.py +111 -0
openstat/commands/automodel_cmds.py +212 -0
openstat/commands/backend_cmds.py +82 -0
openstat/commands/base.py +170 -0
openstat/commands/bayes_cmds.py +71 -0
openstat/commands/causal_cmds.py +269 -0
openstat/commands/cluster_cmds.py +152 -0
openstat/commands/data_cmds.py +996 -0
openstat/commands/datamanip_cmds.py +672 -0
openstat/commands/dataquality_cmds.py +174 -0
openstat/commands/datetime_cmds.py +176 -0
openstat/commands/dimreduce_cmds.py +184 -0
openstat/commands/discrete_cmds.py +149 -0
openstat/commands/dsl_cmds.py +143 -0
openstat/commands/epi_cmds.py +93 -0
openstat/commands/equiv_tobit_cmds.py +94 -0
openstat/commands/esttab_cmds.py +196 -0
openstat/commands/export_beamer_cmds.py +142 -0
openstat/commands/export_cmds.py +201 -0
openstat/commands/export_extra_cmds.py +240 -0
openstat/commands/factor_cmds.py +180 -0
openstat/commands/groupby_cmds.py +155 -0
openstat/commands/help_cmds.py +237 -0
openstat/commands/i18n_cmds.py +43 -0
openstat/commands/import_extra_cmds.py +561 -0
openstat/commands/influence_cmds.py +134 -0
openstat/commands/iv_cmds.py +106 -0
openstat/commands/manova_cmds.py +105 -0
openstat/commands/mediate_cmds.py +233 -0
openstat/commands/meta_cmds.py +284 -0
openstat/commands/mi_cmds.py +228 -0
openstat/commands/mixed_cmds.py +79 -0
openstat/commands/mixture_changepoint_cmds.py +166 -0
openstat/commands/ml_adv_cmds.py +147 -0
openstat/commands/ml_cmds.py +178 -0
openstat/commands/model_eval_cmds.py +142 -0
openstat/commands/network_cmds.py +288 -0
openstat/commands/nlquery_cmds.py +161 -0
openstat/commands/nonparam_cmds.py +149 -0
openstat/commands/outreg_cmds.py +247 -0
openstat/commands/panel_cmds.py +141 -0
openstat/commands/pdf_cmds.py +226 -0
openstat/commands/pipeline_cmds.py +319 -0
openstat/commands/plot_cmds.py +189 -0
openstat/commands/plugin_cmds.py +79 -0
openstat/commands/posthoc_cmds.py +153 -0
openstat/commands/power_cmds.py +172 -0
openstat/commands/profile_cmds.py +246 -0
openstat/commands/rbridge_cmds.py +81 -0
openstat/commands/regex_cmds.py +104 -0
openstat/commands/report_cmds.py +48 -0
openstat/commands/repro_cmds.py +129 -0
openstat/commands/resampling_cmds.py +109 -0
openstat/commands/reshape_cmds.py +223 -0
openstat/commands/sem_cmds.py +177 -0
openstat/commands/stat_cmds.py +1040 -0
openstat/commands/stata_import_cmds.py +215 -0
openstat/commands/string_cmds.py +124 -0
openstat/commands/surv_cmds.py +145 -0
openstat/commands/survey_cmds.py +153 -0
openstat/commands/textanalysis_cmds.py +192 -0
openstat/commands/ts_adv_cmds.py +136 -0
openstat/commands/ts_cmds.py +195 -0
openstat/commands/tui_cmds.py +111 -0
openstat/commands/ux_cmds.py +191 -0
openstat/commands/validate_cmds.py +270 -0
openstat/commands/viz_adv_cmds.py +312 -0
openstat/commands/viz_extra_cmds.py +251 -0
openstat/commands/watch_cmds.py +69 -0
openstat/config.py +106 -0
openstat/dsl/__init__.py +0 -0
openstat/dsl/parser.py +332 -0
openstat/dsl/tokenizer.py +105 -0
openstat/i18n.py +120 -0
openstat/io/__init__.py +0 -0
openstat/io/loader.py +187 -0
openstat/jupyter/__init__.py +18 -0
openstat/jupyter/display.py +18 -0
openstat/jupyter/magic.py +60 -0
openstat/logging_config.py +59 -0
openstat/plots/__init__.py +0 -0
openstat/plots/plotter.py +437 -0
openstat/plots/surv_plots.py +32 -0
openstat/plots/ts_plots.py +59 -0
openstat/plugins/__init__.py +5 -0
openstat/plugins/manager.py +69 -0
openstat/repl.py +457 -0
openstat/reporting/__init__.py +0 -0
openstat/reporting/eda.py +208 -0
openstat/reporting/report.py +67 -0
openstat/script_runner.py +319 -0
openstat/session.py +133 -0
openstat/stats/__init__.py +0 -0
openstat/stats/advanced_regression.py +269 -0
openstat/stats/arch_garch.py +84 -0
openstat/stats/bayesian.py +103 -0
openstat/stats/causal.py +258 -0
openstat/stats/clustering.py +206 -0
openstat/stats/discrete.py +311 -0
openstat/stats/epidemiology.py +119 -0
openstat/stats/equiv_tobit.py +163 -0
openstat/stats/factor.py +174 -0
openstat/stats/imputation.py +282 -0
openstat/stats/influence.py +78 -0
openstat/stats/iv.py +131 -0
openstat/stats/manova.py +124 -0
openstat/stats/mixed.py +128 -0
openstat/stats/ml.py +275 -0
openstat/stats/ml_advanced.py +117 -0
openstat/stats/model_eval.py +183 -0
openstat/stats/models.py +1342 -0
openstat/stats/nonparametric.py +130 -0
openstat/stats/panel.py +179 -0
openstat/stats/power.py +295 -0
openstat/stats/resampling.py +203 -0
openstat/stats/survey.py +213 -0
openstat/stats/survival.py +196 -0
openstat/stats/timeseries.py +142 -0
openstat/stats/ts_advanced.py +114 -0
openstat/types.py +11 -0
openstat/web/__init__.py +1 -0
openstat/web/app.py +117 -0
openstat/web/session_manager.py +73 -0
openstat/web/static/app.js +117 -0
openstat/web/static/index.html +38 -0
openstat/web/static/style.css +103 -0
openstat_cli-1.0.0.dist-info/METADATA +748 -0
openstat_cli-1.0.0.dist-info/RECORD +143 -0
openstat_cli-1.0.0.dist-info/WHEEL +4 -0
openstat_cli-1.0.0.dist-info/entry_points.txt +2 -0
openstat_cli-1.0.0.dist-info/licenses/LICENSE +21 -0

openstat/reporting/report.py ADDED Viewed

@@ -0,0 +1,67 @@
+"""Markdown report generation."""
+from __future__ import annotations
+from datetime import datetime
+from pathlib import Path
+from openstat.session import Session
+def generate_report(session: Session, output_path: str | Path) -> Path:
+    """Generate a Markdown report from the session state."""
+    p = Path(output_path)
+    p.parent.mkdir(parents=True, exist_ok=True)
+    lines: list[str] = []
+    lines.append("# OpenStat Analysis Report")
+    lines.append("")
+    lines.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    lines.append("")
+    # Dataset info
+    lines.append("## Dataset")
+    lines.append("")
+    if session.dataset_path:
+        lines.append(f"- **Source**: `{session.dataset_path}`")
+    if session.df is not None:
+        r, c = session.df.shape
+        lines.append(f"- **Shape**: {r:,} rows x {c} columns")
+        lines.append(f"- **Columns**: {', '.join(session.df.columns)}")
+    lines.append("")
+    # Command history
+    lines.append("## Commands Executed")
+    lines.append("")
+    lines.append("```")
+    for cmd in session.history:
+        lines.append(cmd)
+    lines.append("```")
+    lines.append("")
+    # Model results
+    if session.results:
+        lines.append("## Model Results")
+        lines.append("")
+        for result in session.results:
+            lines.append(result.table)
+            lines.append("")
+    # Plots
+    if session.plot_paths:
+        lines.append("## Plots")
+        lines.append("")
+        for plot_path in session.plot_paths:
+            plot_p = Path(plot_path)
+            name = plot_p.name
+            # Use relative path from report location for portability
+            try:
+                rel = plot_p.resolve().relative_to(p.parent.resolve())
+            except ValueError:
+                rel = plot_p
+            lines.append(f"![{name}]({rel})")
+            lines.append("")
+    content = "\n".join(lines)
+    p.write_text(content, encoding="utf-8")
+    return p

openstat/script_runner.py ADDED Viewed

@@ -0,0 +1,319 @@
+"""Advanced .ost script runner with foreach, forvalues, and if/else support."""
+from __future__ import annotations
+import re
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from openstat.session import Session
+# ---------------------------------------------------------------------------
+# Block parser
+# ---------------------------------------------------------------------------
+def _collect_block(lines: list[str], start: int) -> tuple[list[str], int]:
+    """Starting *after* the opening '{', collect lines until the matching '}'.
+    Returns (body_lines, next_index_after_closing_brace).
+    When a line starts with '}' at depth==1, we consider the block closed even
+    if more text follows on that line (e.g., '} else {').  The caller is
+    responsible for examining the remainder of that closing line.
+    """
+    body: list[str] = []
+    depth = 1
+    i = start
+    while i < len(lines):
+        stripped = lines[i].strip()
+        # A line starting with '}' at the current top level closes this block
+        if stripped.startswith("}"):
+            # Account for nested close + new open on same line: '} else {'
+            # We stop here; return index pointing at this same line so the
+            # caller can inspect it for 'else'.
+            return body, i
+        for ch in stripped:
+            if ch == "{":
+                depth += 1
+            elif ch == "}":
+                depth -= 1
+        body.append(lines[i])
+        i += 1
+    return body, i  # unterminated block — caller handles
+def _parse_statements(lines: list[str]) -> list:
+    """Parse lines into a flat list of statement objects.
+    Each statement is one of:
+      ("line",   text)
+      ("foreach", varname, values_list, body_lines)
+      ("forvalues", varname, num_sequence, body_lines)
+      ("if",     condition, if_body, else_body_or_None)
+    """
+    statements: list = []
+    i = 0
+    while i < len(lines):
+        raw = lines[i]
+        stripped = raw.strip()
+        # Skip blank lines and comments
+        if not stripped or stripped.startswith("#"):
+            i += 1
+            continue
+        # ---- foreach var in val1 val2 ... {
+        m = re.match(r'^foreach\s+(\w+)\s+in\s+(.+?)(?:\s*\{)?\s*$', stripped)
+        if m:
+            varname = m.group(1)
+            values_raw = m.group(2).rstrip("{").strip()
+            values = values_raw.split()
+            body, i = _collect_block(lines, i + 1)
+            # advance past the closing '}'
+            if i < len(lines) and lines[i].strip().startswith("}"):
+                i += 1
+            statements.append(("foreach", varname, values, body))
+            continue
+        # ---- forvalues var = start/end  or  start(step)end
+        m = re.match(r'^forvalues\s+(\w+)\s*=\s*(.+?)(?:\s*\{)?\s*$', stripped)
+        if m:
+            varname = m.group(1)
+            seq_raw = m.group(2).rstrip("{").strip()
+            seq = _parse_numseq(seq_raw)
+            body, i = _collect_block(lines, i + 1)
+            if i < len(lines) and lines[i].strip().startswith("}"):
+                i += 1
+            statements.append(("forvalues", varname, seq, body))
+            continue
+        # ---- if condition {
+        m = re.match(r'^if\s+(.+?)(?:\s*\{)?\s*$', stripped)
+        if m and not stripped.startswith("if_"):
+            condition = m.group(1).rstrip("{").strip()
+            if_body, i = _collect_block(lines, i + 1)
+            # Check for else on the closing '}' line: '} else {'
+            else_body: list[str] | None = None
+            if i < len(lines):
+                close_line = lines[i].strip()
+                # '} else {' or '} else' or standalone '}'
+                else_m = re.match(r'^\}\s*else\s*\{?\s*$', close_line)
+                if else_m:
+                    if close_line.rstrip().endswith("{"):
+                        # Body starts on next line
+                        else_body, i = _collect_block(lines, i + 1)
+                        if i < len(lines) and lines[i].strip().startswith("}"):
+                            i += 1
+                    else:
+                        # '} else' — opening brace on next line
+                        i += 1
+                        while i < len(lines) and lines[i].strip() in ("", "{"):
+                            if lines[i].strip() == "{":
+                                i += 1
+                                break
+                            i += 1
+                        else_body, i = _collect_block(lines, i)
+                        if i < len(lines) and lines[i].strip().startswith("}"):
+                            i += 1
+                else:
+                    # plain closing '}' — advance past it
+                    i += 1
+            statements.append(("if", condition, if_body, else_body))
+            continue
+        # ---- Plain line
+        statements.append(("line", stripped))
+        i += 1
+    return statements
+def _parse_numseq(raw: str) -> list[int | float]:
+    """Parse forvalues sequence: '1/10', '1(2)10', '1.0(0.5)5.0'."""
+    # start(step)end
+    m = re.match(r'^([\d.]+)\((-?[\d.]+)\)([\d.]+)$', raw.strip())
+    if m:
+        start, step, end = float(m.group(1)), float(m.group(2)), float(m.group(3))
+        result = []
+        v = start
+        while (step > 0 and v <= end + 1e-9) or (step < 0 and v >= end - 1e-9):
+            result.append(int(v) if v == int(v) else v)
+            v += step
+        return result
+    # start/end
+    m = re.match(r'^([\d.]+)/([\d.]+)$', raw.strip())
+    if m:
+        start, end = float(m.group(1)), float(m.group(2))
+        return [int(v) if v == int(v) else v for v in range(int(start), int(end) + 1)]
+    # Single value
+    try:
+        v = float(raw.strip())
+        return [int(v) if v == int(v) else v]
+    except ValueError:
+        return []
+# ---------------------------------------------------------------------------
+# Evaluator
+# ---------------------------------------------------------------------------
+_BOOL_TRUE = {"true", "1", "yes"}
+def _eval_condition(condition: str, local_vars: dict[str, str], session: "Session") -> bool:
+    """Evaluate a simple if-condition.
+    Supports:
+      - data_loaded         → True if dataset is loaded
+      - col_exists <colname>→ True if column exists in data
+      - N > 100             → True if row count > 100
+      - {var} == value      → comparison with local variable
+    """
+    # Substitute local variables
+    cond = _substitute(condition, local_vars)
+    cond = cond.strip()
+    if cond.lower() == "data_loaded":
+        return session.df is not None
+    m = re.match(r'^col_exists\s+(\S+)$', cond.lower())
+    if m:
+        col = m.group(1)
+        return session.df is not None and col in session.df.columns
+    if cond.lower() == "n":
+        return session.df is not None and len(session.df) > 0
+    # N <op> <number>
+    m = re.match(r'^N\s*(>|<|>=|<=|==|!=)\s*([\d.]+)$', cond)
+    if m and session.df is not None:
+        op, val = m.group(1), float(m.group(2))
+        n = len(session.df)
+        return {">"  : n > val, "<"  : n < val, ">=" : n >= val,
+                "<=" : n <= val, "==" : n == val, "!=" : n != val}[op]
+    # Generic comparison: lhs op rhs
+    m = re.match(r'^(.+?)\s*(==|!=|>=|<=|>|<)\s*(.+)$', cond)
+    if m:
+        lhs, op, rhs = m.group(1).strip(), m.group(2), m.group(3).strip()
+        # Strip quotes from rhs if string
+        rhs_s = rhs.strip('"\'')
+        lhs_s = lhs.strip('"\'')
+        try:
+            lhs_v: float | str = float(lhs_s)
+            rhs_v: float | str = float(rhs_s)
+        except ValueError:
+            lhs_v = lhs_s
+            rhs_v = rhs_s
+        return {"==" : lhs_v == rhs_v, "!=" : lhs_v != rhs_v,
+                ">"  : lhs_v > rhs_v,  "<"  : lhs_v < rhs_v,   # type: ignore[operator]
+                ">=" : lhs_v >= rhs_v, "<=" : lhs_v <= rhs_v}[op]  # type: ignore[operator]
+    return cond.lower() in _BOOL_TRUE
+def _substitute(text: str, local_vars: dict[str, str]) -> str:
+    """Replace {varname} with its value from local_vars."""
+    for k, v in local_vars.items():
+        text = text.replace(f"{{{k}}}", str(v))
+    return text
+# ---------------------------------------------------------------------------
+# Executor
+# ---------------------------------------------------------------------------
+def execute_statements(
+    statements: list,
+    session: "Session",
+    console,
+    dispatcher,
+    *,
+    strict: bool = False,
+    local_vars: dict[str, str] | None = None,
+) -> bool:
+    """Execute a list of parsed statements. Returns True if script should continue."""
+    if local_vars is None:
+        local_vars = {}
+    for stmt in statements:
+        kind = stmt[0]
+        if kind == "line":
+            line = _substitute(stmt[1], local_vars)
+            if not line or line.startswith("#"):
+                continue
+            console.print(f"[dim]>>> {line}[/dim]")
+            result = dispatcher(session, line)
+            if result == "__QUIT__":
+                return False
+            if result:
+                console.print(result)
+                if strict:
+                    import re as _re
+                    plain = _re.sub(r"\[/?[^\]]*\]", "", result)
+                    if plain.startswith(("Error", "Internal error")):
+                        raise SystemExit(1)
+            console.print()
+        elif kind == "foreach":
+            _, varname, values, body = stmt
+            body_stmts = _parse_statements(body)
+            for val in values:
+                new_locals = {**local_vars, varname: str(val)}
+                cont = execute_statements(
+                    body_stmts, session, console, dispatcher,
+                    strict=strict, local_vars=new_locals,
+                )
+                if not cont:
+                    return False
+        elif kind == "forvalues":
+            _, varname, seq, body = stmt
+            body_stmts = _parse_statements(body)
+            for val in seq:
+                new_locals = {**local_vars, varname: str(val)}
+                cont = execute_statements(
+                    body_stmts, session, console, dispatcher,
+                    strict=strict, local_vars=new_locals,
+                )
+                if not cont:
+                    return False
+        elif kind == "if":
+            _, condition, if_body, else_body = stmt
+            if _eval_condition(condition, local_vars, session):
+                branch = if_body
+            else:
+                branch = else_body or []
+            branch_stmts = _parse_statements(branch)
+            cont = execute_statements(
+                branch_stmts, session, console, dispatcher,
+                strict=strict, local_vars=local_vars,
+            )
+            if not cont:
+                return False
+    return True
+def run_script_advanced(
+    path: str,
+    session: "Session",
+    console,
+    dispatcher,
+    *,
+    strict: bool = False,
+) -> None:
+    """Run an .ost script with foreach/forvalues/if-else support."""
+    with open(path, encoding="utf-8") as f:
+        lines = f.readlines()
+    statements = _parse_statements(lines)
+    execute_statements(statements, session, console, dispatcher, strict=strict)

openstat/session.py ADDED Viewed

@@ -0,0 +1,133 @@
+"""Session state: holds the active dataset, command history, and results."""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from pathlib import Path
+import polars as pl
+from openstat.config import get_config
+@dataclass
+class ModelResult:
+    """Stores a fitted model's summary."""
+    name: str  # e.g. "OLS", "Logit"
+    formula: str  # e.g. "y ~ x1 + x2"
+    table: str  # formatted text table
+    details: dict  # r2, n, etc.
+@dataclass
+class Session:
+    """Holds all state for a single analysis session."""
+    df: pl.DataFrame | None = None
+    dataset_path: str | None = None
+    dataset_name: str | None = None
+    history: list[str] = field(default_factory=list)
+    results: list[ModelResult] = field(default_factory=list)
+    plot_paths: list[str] = field(default_factory=list)
+    _last_model: object = field(default=None, repr=False)  # last fitted statsmodels result
+    _last_model_vars: tuple | None = field(default=None, repr=False)  # (dep, indeps)
+    _last_fit_result: object = field(default=None, repr=False)  # last FitResult for latex export
+    _last_fit_kwargs: dict = field(default_factory=dict, repr=False)  # model-specific kwargs for bootstrap
+    output_dir: Path = field(default=None)  # type: ignore[assignment]
+    _undo_stack: list[pl.DataFrame] = field(default_factory=list)
+    # Panel data (F1) / Time series (F2)
+    _panel_var: str | None = field(default=None, repr=False)
+    _time_var: str | None = field(default=None, repr=False)
+    _ts_freq: str | None = field(default=None, repr=False)
+    # Survival analysis (F3)
+    _surv_time_var: str | None = field(default=None, repr=False)
+    _surv_event_var: str | None = field(default=None, repr=False)
+    # DuckDB backend (F6)
+    _backend: str = field(default="polars", repr=False)
+    _backend_obj: object = field(default=None, repr=False)
+    # File format labels (F10)
+    _variable_labels: dict | None = field(default=None, repr=False)
+    # Multiple imputation (F11)
+    _imputed_datasets: list | None = field(default=None, repr=False)
+    _mi_m: int = field(default=0, repr=False)
+    # Survey design (F12)
+    _svy_weight_var: str | None = field(default=None, repr=False)
+    _svy_strata_var: str | None = field(default=None, repr=False)
+    _svy_psu_var: str | None = field(default=None, repr=False)
+    # Panel model storage for Hausman test
+    _panel_models: dict = field(default_factory=dict, repr=False)
+    # Session logging (log using / log close)
+    _log_file: object = field(default=None, repr=False)   # open file handle
+    _log_path: str | None = field(default=None, repr=False)
+    # Last margins / marginal effects result
+    _last_margins: object = field(default=None, repr=False)
+    # Network analysis (network build)
+    _network: object = field(default=None, repr=False)
+    _network_weight_col: str | None = field(default=None, repr=False)
+    def __post_init__(self) -> None:
+        cfg = get_config()
+        if self.output_dir is None:
+            self.output_dir = Path(cfg.output_dir)
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+    def record(self, command: str) -> None:
+        """Record a command in history."""
+        self.history.append(command)
+    def require_data(self) -> pl.DataFrame:
+        """Return the active DataFrame or raise."""
+        if self.df is None:
+            raise RuntimeError("No dataset loaded. Use: load <path>")
+        return self.df
+    def snapshot(self) -> None:
+        """Save current DataFrame to undo stack (call before mutations).
+        Respects max_undo_stack and max_undo_memory_mb from config.
+        """
+        if self.df is not None:
+            cfg = get_config()
+            # Memory check: estimate DataFrame size
+            df_size_mb = self.df.estimated_size("mb")
+            stack_size_mb = sum(d.estimated_size("mb") for d in self._undo_stack)
+            if stack_size_mb + df_size_mb > cfg.max_undo_memory_mb and self._undo_stack:
+                # Drop oldest snapshots to stay within budget
+                while (self._undo_stack
+                       and stack_size_mb + df_size_mb > cfg.max_undo_memory_mb):
+                    removed = self._undo_stack.pop(0)
+                    stack_size_mb -= removed.estimated_size("mb")
+            self._undo_stack.append(self.df.clone())
+            # Keep stack bounded by count too
+            if len(self._undo_stack) > cfg.max_undo_stack:
+                self._undo_stack.pop(0)
+    def undo(self) -> bool:
+        """Restore the previous DataFrame. Returns True if successful."""
+        if not self._undo_stack:
+            return False
+        self.df = self._undo_stack.pop()
+        return True
+    @property
+    def undo_depth(self) -> int:
+        return len(self._undo_stack)
+    @property
+    def shape_str(self) -> str:
+        if self.df is None:
+            return "No data"
+        r, c = self.df.shape
+        return f"{r:,} rows x {c} columns"

openstat/stats/__init__.py ADDED Viewed

File without changes