openstat-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. openstat/__init__.py +3 -0
  2. openstat/__main__.py +4 -0
  3. openstat/backends/__init__.py +16 -0
  4. openstat/backends/duckdb_backend.py +70 -0
  5. openstat/backends/polars_backend.py +52 -0
  6. openstat/cli.py +92 -0
  7. openstat/commands/__init__.py +82 -0
  8. openstat/commands/adv_stat_cmds.py +1255 -0
  9. openstat/commands/advanced_ml_cmds.py +576 -0
  10. openstat/commands/advreg_cmds.py +207 -0
  11. openstat/commands/alias_cmds.py +135 -0
  12. openstat/commands/arch_cmds.py +82 -0
  13. openstat/commands/arules_cmds.py +111 -0
  14. openstat/commands/automodel_cmds.py +212 -0
  15. openstat/commands/backend_cmds.py +82 -0
  16. openstat/commands/base.py +170 -0
  17. openstat/commands/bayes_cmds.py +71 -0
  18. openstat/commands/causal_cmds.py +269 -0
  19. openstat/commands/cluster_cmds.py +152 -0
  20. openstat/commands/data_cmds.py +996 -0
  21. openstat/commands/datamanip_cmds.py +672 -0
  22. openstat/commands/dataquality_cmds.py +174 -0
  23. openstat/commands/datetime_cmds.py +176 -0
  24. openstat/commands/dimreduce_cmds.py +184 -0
  25. openstat/commands/discrete_cmds.py +149 -0
  26. openstat/commands/dsl_cmds.py +143 -0
  27. openstat/commands/epi_cmds.py +93 -0
  28. openstat/commands/equiv_tobit_cmds.py +94 -0
  29. openstat/commands/esttab_cmds.py +196 -0
  30. openstat/commands/export_beamer_cmds.py +142 -0
  31. openstat/commands/export_cmds.py +201 -0
  32. openstat/commands/export_extra_cmds.py +240 -0
  33. openstat/commands/factor_cmds.py +180 -0
  34. openstat/commands/groupby_cmds.py +155 -0
  35. openstat/commands/help_cmds.py +237 -0
  36. openstat/commands/i18n_cmds.py +43 -0
  37. openstat/commands/import_extra_cmds.py +561 -0
  38. openstat/commands/influence_cmds.py +134 -0
  39. openstat/commands/iv_cmds.py +106 -0
  40. openstat/commands/manova_cmds.py +105 -0
  41. openstat/commands/mediate_cmds.py +233 -0
  42. openstat/commands/meta_cmds.py +284 -0
  43. openstat/commands/mi_cmds.py +228 -0
  44. openstat/commands/mixed_cmds.py +79 -0
  45. openstat/commands/mixture_changepoint_cmds.py +166 -0
  46. openstat/commands/ml_adv_cmds.py +147 -0
  47. openstat/commands/ml_cmds.py +178 -0
  48. openstat/commands/model_eval_cmds.py +142 -0
  49. openstat/commands/network_cmds.py +288 -0
  50. openstat/commands/nlquery_cmds.py +161 -0
  51. openstat/commands/nonparam_cmds.py +149 -0
  52. openstat/commands/outreg_cmds.py +247 -0
  53. openstat/commands/panel_cmds.py +141 -0
  54. openstat/commands/pdf_cmds.py +226 -0
  55. openstat/commands/pipeline_cmds.py +319 -0
  56. openstat/commands/plot_cmds.py +189 -0
  57. openstat/commands/plugin_cmds.py +79 -0
  58. openstat/commands/posthoc_cmds.py +153 -0
  59. openstat/commands/power_cmds.py +172 -0
  60. openstat/commands/profile_cmds.py +246 -0
  61. openstat/commands/rbridge_cmds.py +81 -0
  62. openstat/commands/regex_cmds.py +104 -0
  63. openstat/commands/report_cmds.py +48 -0
  64. openstat/commands/repro_cmds.py +129 -0
  65. openstat/commands/resampling_cmds.py +109 -0
  66. openstat/commands/reshape_cmds.py +223 -0
  67. openstat/commands/sem_cmds.py +177 -0
  68. openstat/commands/stat_cmds.py +1040 -0
  69. openstat/commands/stata_import_cmds.py +215 -0
  70. openstat/commands/string_cmds.py +124 -0
  71. openstat/commands/surv_cmds.py +145 -0
  72. openstat/commands/survey_cmds.py +153 -0
  73. openstat/commands/textanalysis_cmds.py +192 -0
  74. openstat/commands/ts_adv_cmds.py +136 -0
  75. openstat/commands/ts_cmds.py +195 -0
  76. openstat/commands/tui_cmds.py +111 -0
  77. openstat/commands/ux_cmds.py +191 -0
  78. openstat/commands/validate_cmds.py +270 -0
  79. openstat/commands/viz_adv_cmds.py +312 -0
  80. openstat/commands/viz_extra_cmds.py +251 -0
  81. openstat/commands/watch_cmds.py +69 -0
  82. openstat/config.py +106 -0
  83. openstat/dsl/__init__.py +0 -0
  84. openstat/dsl/parser.py +332 -0
  85. openstat/dsl/tokenizer.py +105 -0
  86. openstat/i18n.py +120 -0
  87. openstat/io/__init__.py +0 -0
  88. openstat/io/loader.py +187 -0
  89. openstat/jupyter/__init__.py +18 -0
  90. openstat/jupyter/display.py +18 -0
  91. openstat/jupyter/magic.py +60 -0
  92. openstat/logging_config.py +59 -0
  93. openstat/plots/__init__.py +0 -0
  94. openstat/plots/plotter.py +437 -0
  95. openstat/plots/surv_plots.py +32 -0
  96. openstat/plots/ts_plots.py +59 -0
  97. openstat/plugins/__init__.py +5 -0
  98. openstat/plugins/manager.py +69 -0
  99. openstat/repl.py +457 -0
  100. openstat/reporting/__init__.py +0 -0
  101. openstat/reporting/eda.py +208 -0
  102. openstat/reporting/report.py +67 -0
  103. openstat/script_runner.py +319 -0
  104. openstat/session.py +133 -0
  105. openstat/stats/__init__.py +0 -0
  106. openstat/stats/advanced_regression.py +269 -0
  107. openstat/stats/arch_garch.py +84 -0
  108. openstat/stats/bayesian.py +103 -0
  109. openstat/stats/causal.py +258 -0
  110. openstat/stats/clustering.py +206 -0
  111. openstat/stats/discrete.py +311 -0
  112. openstat/stats/epidemiology.py +119 -0
  113. openstat/stats/equiv_tobit.py +163 -0
  114. openstat/stats/factor.py +174 -0
  115. openstat/stats/imputation.py +282 -0
  116. openstat/stats/influence.py +78 -0
  117. openstat/stats/iv.py +131 -0
  118. openstat/stats/manova.py +124 -0
  119. openstat/stats/mixed.py +128 -0
  120. openstat/stats/ml.py +275 -0
  121. openstat/stats/ml_advanced.py +117 -0
  122. openstat/stats/model_eval.py +183 -0
  123. openstat/stats/models.py +1342 -0
  124. openstat/stats/nonparametric.py +130 -0
  125. openstat/stats/panel.py +179 -0
  126. openstat/stats/power.py +295 -0
  127. openstat/stats/resampling.py +203 -0
  128. openstat/stats/survey.py +213 -0
  129. openstat/stats/survival.py +196 -0
  130. openstat/stats/timeseries.py +142 -0
  131. openstat/stats/ts_advanced.py +114 -0
  132. openstat/types.py +11 -0
  133. openstat/web/__init__.py +1 -0
  134. openstat/web/app.py +117 -0
  135. openstat/web/session_manager.py +73 -0
  136. openstat/web/static/app.js +117 -0
  137. openstat/web/static/index.html +38 -0
  138. openstat/web/static/style.css +103 -0
  139. openstat_cli-1.0.0.dist-info/METADATA +748 -0
  140. openstat_cli-1.0.0.dist-info/RECORD +143 -0
  141. openstat_cli-1.0.0.dist-info/WHEEL +4 -0
  142. openstat_cli-1.0.0.dist-info/entry_points.txt +2 -0
  143. openstat_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,215 @@
1
+ """Stata .do file importer: convert Stata syntax to OpenStat .ost script."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from pathlib import Path
7
+
8
+ from openstat.commands.base import command, CommandArgs, friendly_error
9
+ from openstat.session import Session
10
+
11
+ # ── Stata → OpenStat translation rules ──────────────────────────────────────
12
+ # Each rule is (pattern, replacement_or_callable)
13
+ # Applied in order; first match wins for each line.
14
+
15
+ _RULES: list[tuple[re.Pattern, object]] = []
16
+
17
+
18
+ def _rule(pattern: str, repl):
19
+ _RULES.append((re.compile(pattern, re.IGNORECASE), repl))
20
+
21
+
22
+ # Comments
23
+ _rule(r"^\s*\*.*$", lambda m: "# " + m.group(0).lstrip("* \t"))
24
+ _rule(r"^(\s*)//(.*)$", lambda m: m.group(1) + "# " + m.group(2))
25
+
26
+ # use → load
27
+ _rule(r"^\s*use\s+[\"']?(.+?)[\"']?\s*(?:,\s*clear)?\s*$",
28
+ lambda m: f"load {m.group(1).strip()}")
29
+
30
+ # save → save
31
+ _rule(r"^\s*save\s+[\"']?(.+?)[\"']?\s*(?:,\s*replace)?\s*$",
32
+ lambda m: f"save {m.group(1).strip()}")
33
+
34
+ # summarize → summarize
35
+ _rule(r"^\s*su(?:mmarize)?\s*(.*)", lambda m: f"summarize {m.group(1).strip()}")
36
+
37
+ # describe → describe
38
+ _rule(r"^\s*desc(?:ribe)?\s*(.*)", lambda m: f"describe")
39
+
40
+ # regress → ols
41
+ _rule(r"^\s*reg(?:ress)?\s+(\S+)\s+(.*)",
42
+ lambda m: f"ols {m.group(1)} {m.group(2).strip()}")
43
+
44
+ # logit → logit (same)
45
+ _rule(r"^\s*logit\s+(\S+)\s+(.*)",
46
+ lambda m: f"logit {m.group(1)} {m.group(2).strip()}")
47
+
48
+ # probit → probit (same)
49
+ _rule(r"^\s*probit\s+(\S+)\s+(.*)",
50
+ lambda m: f"probit {m.group(1)} {m.group(2).strip()}")
51
+
52
+ # poisson → poisson (same)
53
+ _rule(r"^\s*poisson\s+(\S+)\s+(.*)",
54
+ lambda m: f"poisson {m.group(1)} {m.group(2).strip()}")
55
+
56
+ # tab → tabulate
57
+ _rule(r"^\s*tab(?:ulate)?\s+(.*)", lambda m: f"tabulate {m.group(1).strip()}")
58
+
59
+ # cor → correlate
60
+ _rule(r"^\s*cor(?:relate)?\s+(.*)", lambda m: f"correlate {m.group(1).strip()}")
61
+
62
+ # drop if → filter
63
+ _rule(r"^\s*drop\s+if\s+(.*)",
64
+ lambda m: f"# [manual] drop if {m.group(1)} → filter {_stata_cond(m.group(1))}")
65
+
66
+ # keep if → filter
67
+ _rule(r"^\s*keep\s+if\s+(.*)",
68
+ lambda m: f"# [manual] keep if {m.group(1)} → filter {_stata_cond(m.group(1))}")
69
+
70
+ # drop varlist → drop
71
+ _rule(r"^\s*drop\s+(?!if)(.*)", lambda m: f"drop {m.group(1).strip()}")
72
+
73
+ # keep varlist → keep
74
+ _rule(r"^\s*keep\s+(?!if)(.*)", lambda m: f"keep {m.group(1).strip()}")
75
+
76
+ # gen/generate → generate
77
+ _rule(r"^\s*gen(?:erate)?\s+(.+)=(.+)",
78
+ lambda m: f"generate {m.group(1).strip()} = {m.group(2).strip()}")
79
+
80
+ # replace → replace
81
+ _rule(r"^\s*replace\s+(.+)=(.+)",
82
+ lambda m: f"replace {m.group(1).strip()} = {m.group(2).strip()}")
83
+
84
+ # rename → rename
85
+ _rule(r"^\s*rename\s+(\S+)\s+(\S+)", lambda m: f"rename {m.group(1)} {m.group(2)}")
86
+
87
+ # sort → sort
88
+ _rule(r"^\s*sort\s+(.*)", lambda m: f"sort {m.group(1).strip()}")
89
+
90
+ # set seed → set seed
91
+ _rule(r"^\s*set\s+seed\s+(\d+)", lambda m: f"set seed {m.group(1)}")
92
+
93
+ # xtset → xtset (panel)
94
+ _rule(r"^\s*xtset\s+(.*)", lambda m: f"xtset {m.group(1).strip()}")
95
+
96
+ # tsset → tsset
97
+ _rule(r"^\s*tsset\s+(.*)", lambda m: f"tsset {m.group(1).strip()}")
98
+
99
+ # stset → stset
100
+ _rule(r"^\s*stset\s+(.*)", lambda m: f"stset {m.group(1).strip()}")
101
+
102
+ # display → print (closest)
103
+ _rule(r"^\s*dis(?:play)?\s+(.*)", lambda m: f"# display {m.group(1).strip()}")
104
+
105
+ # local macro → define
106
+ _rule(r"^\s*local\s+(\w+)\s+(.*)", lambda m: f"define {m.group(1)} = {m.group(2).strip()}")
107
+
108
+ # forvalues
109
+ _rule(r"^\s*forvalues\s+(.*)", lambda m: f"forvalues {m.group(1).strip()} {{")
110
+
111
+ # foreach
112
+ _rule(r"^\s*foreach\s+(.*)", lambda m: f"foreach {m.group(1).strip()} {{")
113
+
114
+ # if / else blocks - pass through
115
+ _rule(r"^\s*\}\s*$", lambda m: "}")
116
+
117
+ # quietly → strip (run silently)
118
+ _rule(r"^\s*quietly\s+(.*)", lambda m: m.group(1).strip())
119
+
120
+ # capture → strip (ignore errors)
121
+ _rule(r"^\s*capture\s+(.*)", lambda m: m.group(1).strip())
122
+
123
+
124
+ def _stata_cond(cond: str) -> str:
125
+ """Very rough Stata condition → Polars filter hint."""
126
+ # == → ==, != → !=, & → &, | → |
127
+ cond = cond.replace(" & ", " & ").replace(" | ", " | ")
128
+ return cond
129
+
130
+
131
+ def _translate_line(line: str) -> str:
132
+ """Translate a single Stata .do line to OpenStat syntax."""
133
+ stripped = line.rstrip()
134
+ if not stripped.strip():
135
+ return ""
136
+
137
+ for pat, repl in _RULES:
138
+ m = pat.match(stripped)
139
+ if m:
140
+ if callable(repl):
141
+ return repl(m)
142
+ return repl
143
+ # No match — keep as comment with note
144
+ return f"# [untranslated] {stripped}"
145
+
146
+
147
+ def convert_do_file(do_text: str) -> str:
148
+ """Convert Stata .do file content to OpenStat .ost script."""
149
+ out_lines = ["# OpenStat script — converted from Stata .do file", ""]
150
+ for line in do_text.splitlines():
151
+ translated = _translate_line(line)
152
+ out_lines.append(translated)
153
+ return "\n".join(out_lines)
154
+
155
+
156
+ @command("import do", usage="import do <file.do> [--out=<file.ost>]")
157
+ def cmd_import_do(session: Session, args: str) -> str:
158
+ """Convert a Stata .do file to an OpenStat .ost script.
159
+
160
+ Translates common Stata commands (use, regress, gen, etc.) to their
161
+ OpenStat equivalents. Untranslated lines are kept as comments.
162
+
163
+ Options:
164
+ --out=<path> output .ost file path (default: same name, .ost extension)
165
+ --run also execute the converted script immediately
166
+
167
+ Examples:
168
+ import do analysis.do
169
+ import do stata_script.do --out=my_analysis.ost --run
170
+ """
171
+ ca = CommandArgs(args)
172
+ if not ca.positional:
173
+ return "Usage: import do <file.do> [--out=<path>] [--run]"
174
+
175
+ do_path = Path(ca.positional[0])
176
+ if not do_path.exists():
177
+ return f"File not found: {do_path}"
178
+
179
+ out_path_str = ca.options.get("out", str(do_path.with_suffix(".ost")))
180
+ out_path = Path(out_path_str)
181
+ run_after = "--run" in args
182
+
183
+ try:
184
+ do_text = do_path.read_text(encoding="utf-8", errors="replace")
185
+ ost_text = convert_do_file(do_text)
186
+
187
+ out_path.parent.mkdir(parents=True, exist_ok=True)
188
+ out_path.write_text(ost_text, encoding="utf-8")
189
+
190
+ n_lines = len(do_text.splitlines())
191
+ n_translated = sum(
192
+ 1 for line in ost_text.splitlines()
193
+ if line.strip() and not line.startswith("# [untranslated]")
194
+ and not line.startswith("# OpenStat")
195
+ )
196
+ n_untranslated = ost_text.count("# [untranslated]")
197
+
198
+ result = (
199
+ f"Converted: {do_path} → {out_path}\n"
200
+ f" Lines: {n_lines} | Translated: {n_translated} | "
201
+ f"Untranslated (kept as comments): {n_untranslated}"
202
+ )
203
+
204
+ if run_after:
205
+ from openstat.script_runner import run_script_advanced
206
+ try:
207
+ run_script_advanced(str(out_path), session)
208
+ result += f"\nScript executed: {out_path}"
209
+ except Exception as exc:
210
+ result += f"\nScript error: {exc}"
211
+
212
+ return result
213
+
214
+ except Exception as e:
215
+ return friendly_error(e, "import do")
@@ -0,0 +1,124 @@
1
+ """String and date manipulation commands."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ import polars as pl
8
+
9
+ from openstat.commands.base import command
10
+ from openstat.session import Session
11
+
12
+
13
+ def _stata_opts(raw: str) -> tuple[list[str], dict[str, str]]:
14
+ opts: dict[str, str] = {}
15
+ for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
16
+ opts[m.group(1).lower()] = m.group(2)
17
+ rest = re.sub(r'\w+\([^)]*\)', '', raw)
18
+ positional = [t.strip(',') for t in rest.split() if t.strip(',')]
19
+ return positional, opts
20
+
21
+
22
+ @command("split", usage="split varname [, sep(,) gen(newvar)]")
23
+ def cmd_split(session: Session, args: str) -> str:
24
+ """Split a string column into multiple columns."""
25
+ df = session.require_data()
26
+ positional, opts = _stata_opts(args)
27
+ if not positional:
28
+ return "Usage: split varname [sep(,) gen(prefix)]"
29
+ var = positional[0]
30
+ if var not in df.columns:
31
+ return f"Column '{var}' not found."
32
+ sep = opts.get("sep", " ")
33
+ prefix = opts.get("gen", var)
34
+ session.snapshot()
35
+ try:
36
+ parts = df[var].str.split(sep)
37
+ max_parts = max(len(p) for p in parts.to_list())
38
+ new_df = df
39
+ for i in range(max_parts):
40
+ col_name = f"{prefix}{i+1}"
41
+ new_df = new_df.with_columns(
42
+ parts.list.get(i, null_on_oob=True).alias(col_name)
43
+ )
44
+ session.df = new_df
45
+ return f"Split '{var}' into {max_parts} columns: {[f'{prefix}{i+1}' for i in range(max_parts)]}"
46
+ except Exception as exc:
47
+ return f"split error: {exc}"
48
+
49
+
50
+ @command("strtrim", usage="strtrim varname [gen(newvar)]")
51
+ def cmd_strtrim(session: Session, args: str) -> str:
52
+ """Trim whitespace from a string column."""
53
+ df = session.require_data()
54
+ positional, opts = _stata_opts(args)
55
+ if not positional:
56
+ return "Usage: strtrim varname [gen(newvar)]"
57
+ var = positional[0]
58
+ if var not in df.columns:
59
+ return f"Column '{var}' not found."
60
+ new_var = opts.get("gen", var)
61
+ session.snapshot()
62
+ try:
63
+ session.df = df.with_columns(pl.col(var).str.strip_chars().alias(new_var))
64
+ return f"Trimmed '{var}' → '{new_var}'"
65
+ except Exception as exc:
66
+ return f"strtrim error: {exc}"
67
+
68
+
69
+ @command("strupper", usage="strupper varname [gen(newvar)]")
70
+ def cmd_strupper(session: Session, args: str) -> str:
71
+ """Convert string column to uppercase."""
72
+ df = session.require_data()
73
+ positional, opts = _stata_opts(args)
74
+ if not positional:
75
+ return "Usage: strupper varname [gen(newvar)]"
76
+ var = positional[0]
77
+ if var not in df.columns:
78
+ return f"Column '{var}' not found."
79
+ new_var = opts.get("gen", var)
80
+ session.snapshot()
81
+ try:
82
+ session.df = df.with_columns(pl.col(var).str.to_uppercase().alias(new_var))
83
+ return f"Uppercased '{var}' → '{new_var}'"
84
+ except Exception as exc:
85
+ return f"strupper error: {exc}"
86
+
87
+
88
+ @command("strlower", usage="strlower varname [gen(newvar)]")
89
+ def cmd_strlower(session: Session, args: str) -> str:
90
+ """Convert string column to lowercase."""
91
+ df = session.require_data()
92
+ positional, opts = _stata_opts(args)
93
+ if not positional:
94
+ return "Usage: strlower varname [gen(newvar)]"
95
+ var = positional[0]
96
+ if var not in df.columns:
97
+ return f"Column '{var}' not found."
98
+ new_var = opts.get("gen", var)
99
+ session.snapshot()
100
+ try:
101
+ session.df = df.with_columns(pl.col(var).str.to_lowercase().alias(new_var))
102
+ return f"Lowercased '{var}' → '{new_var}'"
103
+ except Exception as exc:
104
+ return f"strlower error: {exc}"
105
+
106
+
107
+ @command("strreplace", usage="strreplace varname old new [gen(newvar)]")
108
+ def cmd_strreplace(session: Session, args: str) -> str:
109
+ """Replace substring in a string column."""
110
+ df = session.require_data()
111
+ positional, opts = _stata_opts(args)
112
+ if len(positional) < 3:
113
+ return "Usage: strreplace varname old new [gen(newvar)]"
114
+ var, old_str, new_str = positional[0], positional[1], positional[2]
115
+ if var not in df.columns:
116
+ return f"Column '{var}' not found."
117
+ new_var = opts.get("gen", var)
118
+ session.snapshot()
119
+ try:
120
+ session.df = df.with_columns(pl.col(var).str.replace_all(old_str, new_str).alias(new_var))
121
+ return f"Replaced '{old_str}' with '{new_str}' in '{var}' → '{new_var}'"
122
+ except Exception as exc:
123
+ return f"strreplace error: {exc}"
124
+
@@ -0,0 +1,145 @@
1
+ """Survival analysis commands: stset, stcox, sts."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ from openstat.session import Session, ModelResult
8
+ from openstat.commands.base import command, CommandArgs, friendly_error
9
+
10
+
11
+ @command("stset", usage="stset <time_var>, failure(<event_var>)")
12
+ def cmd_stset(session: Session, args: str) -> str:
13
+ """Declare survival time and failure event variables."""
14
+ df = session.require_data()
15
+
16
+ m = re.search(r'failure\((\w+)\)', args)
17
+ if not m:
18
+ return "Usage: stset <time_var>, failure(<event_var>)"
19
+
20
+ event_var = m.group(1)
21
+ # Time var is everything before the comma
22
+ time_part = args[:args.index(',')] if ',' in args else args[:m.start()]
23
+ time_var = time_part.strip()
24
+
25
+ if time_var not in df.columns:
26
+ return f"Column not found: {time_var}"
27
+ if event_var not in df.columns:
28
+ return f"Column not found: {event_var}"
29
+
30
+ session._surv_time_var = time_var
31
+ session._surv_event_var = event_var
32
+
33
+ n = df.height
34
+ events = df[event_var].sum()
35
+ return (
36
+ f"Survival time: {time_var}\n"
37
+ f"Failure event: {event_var}\n"
38
+ f"Observations: {n}, Events: {events}, Censored: {n - events}"
39
+ )
40
+
41
+
42
+ @command("stcox", usage="stcox x1 x2 [--robust]")
43
+ def cmd_stcox(session: Session, args: str) -> str:
44
+ """Fit a Cox Proportional Hazards model."""
45
+ df = session.require_data()
46
+
47
+ if session._surv_time_var is None or session._surv_event_var is None:
48
+ return "Survival structure not set. Use: stset <time_var>, failure(<event_var>)"
49
+
50
+ ca = CommandArgs(args)
51
+ robust = ca.has_flag("--robust")
52
+ covariates = [p for p in ca.positional if not p.startswith("--")]
53
+
54
+ if not covariates:
55
+ return "Usage: stcox x1 x2 [--robust]"
56
+
57
+ missing = [c for c in covariates if c not in df.columns]
58
+ if missing:
59
+ return f"Columns not found: {', '.join(missing)}"
60
+
61
+ try:
62
+ from openstat.stats.survival import fit_cox_ph
63
+
64
+ result, raw = fit_cox_ph(
65
+ df, session._surv_time_var, session._surv_event_var,
66
+ covariates, robust=robust,
67
+ )
68
+
69
+ session._last_model = raw
70
+ session._last_model_vars = (session._surv_time_var, covariates)
71
+ session._last_fit_result = result
72
+ session._last_fit_kwargs = {"survival": True}
73
+
74
+ md = result.to_markdown() if hasattr(result, "to_markdown") else ""
75
+ session.results.append(ModelResult(
76
+ name="Cox PH", formula=result.formula,
77
+ table=md, details={
78
+ "n_obs": result.n_obs,
79
+ "params": dict(result.params),
80
+ "log_likelihood": result.log_likelihood,
81
+ },
82
+ ))
83
+
84
+ output = result.summary_table()
85
+ if result.warnings:
86
+ output += "\n" + "\n".join(result.warnings)
87
+ return output
88
+ except ImportError as e:
89
+ return str(e)
90
+ except Exception as e:
91
+ return friendly_error(e, "stcox")
92
+
93
+
94
+ @command("sts", usage="sts graph [by=group] | sts test <group_var>")
95
+ def cmd_sts(session: Session, args: str) -> str:
96
+ """Kaplan-Meier survival curves and log-rank test."""
97
+ df = session.require_data()
98
+
99
+ if session._surv_time_var is None or session._surv_event_var is None:
100
+ return "Survival structure not set. Use: stset <time_var>, failure(<event_var>)"
101
+
102
+ ca = CommandArgs(args)
103
+ subcmd = ca.positional[0].lower() if ca.positional else ""
104
+
105
+ if subcmd == "graph":
106
+ group_var = ca.get_option("by")
107
+ try:
108
+ from openstat.stats.survival import kaplan_meier
109
+ summary, kmf = kaplan_meier(
110
+ df, session._surv_time_var, session._surv_event_var, group_var,
111
+ )
112
+
113
+ # Plot
114
+ try:
115
+ from openstat.plots.surv_plots import plot_km
116
+ path = plot_km(kmf, session.output_dir, group_var)
117
+ session.plot_paths.append(str(path))
118
+ summary += f"\nPlot saved: {path}"
119
+ except Exception:
120
+ pass
121
+
122
+ return summary
123
+ except ImportError as e:
124
+ return str(e)
125
+ except Exception as e:
126
+ return friendly_error(e, "sts graph")
127
+
128
+ elif subcmd == "test":
129
+ group_var = ca.positional[1] if len(ca.positional) > 1 else None
130
+ if not group_var:
131
+ return "Usage: sts test <group_var>"
132
+ if group_var not in df.columns:
133
+ return f"Column not found: {group_var}"
134
+ try:
135
+ from openstat.stats.survival import log_rank_test
136
+ return log_rank_test(
137
+ df, session._surv_time_var, session._surv_event_var, group_var,
138
+ )
139
+ except ImportError as e:
140
+ return str(e)
141
+ except Exception as e:
142
+ return friendly_error(e, "sts test")
143
+
144
+ else:
145
+ return "Usage: sts graph [by=group] | sts test <group_var>"
@@ -0,0 +1,153 @@
1
+ """Survey weighting commands: svyset, svy: prefix."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ from openstat.session import Session, ModelResult
8
+ from openstat.commands.base import command, CommandArgs, friendly_error
9
+ from openstat.dsl.parser import parse_formula, ParseError
10
+ from openstat.types import NUMERIC_DTYPES
11
+
12
+
13
+ @command("svyset", usage="svyset <psu> [pw=<weight>], strata(<strata>)")
14
+ def cmd_svyset(session: Session, args: str) -> str:
15
+ """Declare survey design: PSU, sampling weights, and strata."""
16
+ df = session.require_data()
17
+
18
+ # Parse weight: [pw=weight_var]
19
+ m_pw = re.search(r'\[pw=(\w+)\]', args)
20
+ weight_var = m_pw.group(1) if m_pw else None
21
+
22
+ # Parse strata: strata(strata_var)
23
+ m_strata = re.search(r'strata\((\w+)\)', args)
24
+ strata_var = m_strata.group(1) if m_strata else None
25
+
26
+ # PSU is the first positional argument
27
+ clean = args
28
+ if m_pw:
29
+ clean = clean.replace(m_pw.group(0), "")
30
+ if m_strata:
31
+ clean = clean.replace(m_strata.group(0), "")
32
+ clean = clean.replace(",", "").strip()
33
+ psu_var = clean.split()[0] if clean.split() else None
34
+
35
+ # Validate columns exist
36
+ for var, label in [(psu_var, "PSU"), (weight_var, "weight"), (strata_var, "strata")]:
37
+ if var and var not in df.columns:
38
+ return f"{label} column not found: {var}"
39
+
40
+ session._svy_psu_var = psu_var
41
+ session._svy_weight_var = weight_var
42
+ session._svy_strata_var = strata_var
43
+
44
+ lines = ["Survey design set:"]
45
+ if psu_var:
46
+ lines.append(f" PSU: {psu_var} ({df[psu_var].n_unique()} clusters)")
47
+ if weight_var:
48
+ lines.append(f" Weight: {weight_var}")
49
+ if strata_var:
50
+ lines.append(f" Strata: {strata_var} ({df[strata_var].n_unique()} strata)")
51
+ return "\n".join(lines)
52
+
53
+
54
+ @command("svy:", usage="svy: summarize|ols|logit ...")
55
+ def cmd_svy(session: Session, args: str) -> str:
56
+ """Run survey-weighted analysis. Requires svyset first."""
57
+ df = session.require_data()
58
+
59
+ if session._svy_weight_var is None:
60
+ return "Survey design not set. Use: svyset <psu> [pw=<weight>], strata(<strata>)"
61
+
62
+ parts = args.strip().split(None, 1)
63
+ subcmd = parts[0].lower() if parts else ""
64
+ rest = parts[1] if len(parts) > 1 else ""
65
+
66
+ if subcmd == "summarize":
67
+ return _svy_summarize(session, df, rest)
68
+ elif subcmd == "ols":
69
+ return _svy_ols(session, df, rest)
70
+ elif subcmd == "logit":
71
+ return _svy_logit(session, df, rest)
72
+ else:
73
+ return "Usage: svy: summarize [cols] | svy: ols y ~ x1 + x2 | svy: logit y ~ x1 + x2"
74
+
75
+
76
+ def _svy_summarize(session: Session, df, args: str) -> str:
77
+ """Weighted summary statistics."""
78
+ cols = args.split() if args.strip() else [c for c in df.columns if df[c].dtype in NUMERIC_DTYPES]
79
+ if not cols:
80
+ return "No numeric columns to summarize."
81
+
82
+ from openstat.stats.survey import weighted_summary
83
+ return weighted_summary(df, cols, session._svy_weight_var)
84
+
85
+
86
+ def _svy_ols(session: Session, df, args: str) -> str:
87
+ """Weighted OLS regression."""
88
+ try:
89
+ dep, indeps = parse_formula(args)
90
+ except ParseError as e:
91
+ return f"Formula error: {e}"
92
+
93
+ try:
94
+ from openstat.stats.survey import fit_weighted_ols
95
+ result, raw = fit_weighted_ols(
96
+ df, dep, indeps, session._svy_weight_var,
97
+ session._svy_strata_var, session._svy_psu_var,
98
+ )
99
+
100
+ session._last_model = raw
101
+ session._last_model_vars = (dep, indeps)
102
+ session._last_fit_result = result
103
+ session._last_fit_kwargs = {"survey": True}
104
+
105
+ md = result.to_markdown() if hasattr(result, "to_markdown") else ""
106
+ session.results.append(ModelResult(
107
+ name="Svy: OLS", formula=result.formula,
108
+ table=md, details={
109
+ "n_obs": result.n_obs,
110
+ "params": dict(result.params),
111
+ "r_squared": result.r_squared,
112
+ },
113
+ ))
114
+
115
+ output = result.summary_table()
116
+ if result.warnings:
117
+ output += "\n" + "\n".join(result.warnings)
118
+ return output
119
+ except Exception as e:
120
+ return friendly_error(e, "svy: ols")
121
+
122
+
123
+ def _svy_logit(session: Session, df, args: str) -> str:
124
+ """Weighted logistic regression."""
125
+ try:
126
+ dep, indeps = parse_formula(args)
127
+ except ParseError as e:
128
+ return f"Formula error: {e}"
129
+
130
+ try:
131
+ from openstat.stats.survey import fit_weighted_logit
132
+ result, raw = fit_weighted_logit(df, dep, indeps, session._svy_weight_var)
133
+
134
+ session._last_model = raw
135
+ session._last_model_vars = (dep, indeps)
136
+ session._last_fit_result = result
137
+ session._last_fit_kwargs = {"survey": True}
138
+
139
+ md = result.to_markdown() if hasattr(result, "to_markdown") else ""
140
+ session.results.append(ModelResult(
141
+ name="Svy: Logit", formula=result.formula,
142
+ table=md, details={
143
+ "n_obs": result.n_obs,
144
+ "params": dict(result.params),
145
+ },
146
+ ))
147
+
148
+ output = result.summary_table()
149
+ if result.warnings:
150
+ output += "\n" + "\n".join(result.warnings)
151
+ return output
152
+ except Exception as e:
153
+ return friendly_error(e, "svy: logit")