openstat-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. openstat/__init__.py +3 -0
  2. openstat/__main__.py +4 -0
  3. openstat/backends/__init__.py +16 -0
  4. openstat/backends/duckdb_backend.py +70 -0
  5. openstat/backends/polars_backend.py +52 -0
  6. openstat/cli.py +92 -0
  7. openstat/commands/__init__.py +82 -0
  8. openstat/commands/adv_stat_cmds.py +1255 -0
  9. openstat/commands/advanced_ml_cmds.py +576 -0
  10. openstat/commands/advreg_cmds.py +207 -0
  11. openstat/commands/alias_cmds.py +135 -0
  12. openstat/commands/arch_cmds.py +82 -0
  13. openstat/commands/arules_cmds.py +111 -0
  14. openstat/commands/automodel_cmds.py +212 -0
  15. openstat/commands/backend_cmds.py +82 -0
  16. openstat/commands/base.py +170 -0
  17. openstat/commands/bayes_cmds.py +71 -0
  18. openstat/commands/causal_cmds.py +269 -0
  19. openstat/commands/cluster_cmds.py +152 -0
  20. openstat/commands/data_cmds.py +996 -0
  21. openstat/commands/datamanip_cmds.py +672 -0
  22. openstat/commands/dataquality_cmds.py +174 -0
  23. openstat/commands/datetime_cmds.py +176 -0
  24. openstat/commands/dimreduce_cmds.py +184 -0
  25. openstat/commands/discrete_cmds.py +149 -0
  26. openstat/commands/dsl_cmds.py +143 -0
  27. openstat/commands/epi_cmds.py +93 -0
  28. openstat/commands/equiv_tobit_cmds.py +94 -0
  29. openstat/commands/esttab_cmds.py +196 -0
  30. openstat/commands/export_beamer_cmds.py +142 -0
  31. openstat/commands/export_cmds.py +201 -0
  32. openstat/commands/export_extra_cmds.py +240 -0
  33. openstat/commands/factor_cmds.py +180 -0
  34. openstat/commands/groupby_cmds.py +155 -0
  35. openstat/commands/help_cmds.py +237 -0
  36. openstat/commands/i18n_cmds.py +43 -0
  37. openstat/commands/import_extra_cmds.py +561 -0
  38. openstat/commands/influence_cmds.py +134 -0
  39. openstat/commands/iv_cmds.py +106 -0
  40. openstat/commands/manova_cmds.py +105 -0
  41. openstat/commands/mediate_cmds.py +233 -0
  42. openstat/commands/meta_cmds.py +284 -0
  43. openstat/commands/mi_cmds.py +228 -0
  44. openstat/commands/mixed_cmds.py +79 -0
  45. openstat/commands/mixture_changepoint_cmds.py +166 -0
  46. openstat/commands/ml_adv_cmds.py +147 -0
  47. openstat/commands/ml_cmds.py +178 -0
  48. openstat/commands/model_eval_cmds.py +142 -0
  49. openstat/commands/network_cmds.py +288 -0
  50. openstat/commands/nlquery_cmds.py +161 -0
  51. openstat/commands/nonparam_cmds.py +149 -0
  52. openstat/commands/outreg_cmds.py +247 -0
  53. openstat/commands/panel_cmds.py +141 -0
  54. openstat/commands/pdf_cmds.py +226 -0
  55. openstat/commands/pipeline_cmds.py +319 -0
  56. openstat/commands/plot_cmds.py +189 -0
  57. openstat/commands/plugin_cmds.py +79 -0
  58. openstat/commands/posthoc_cmds.py +153 -0
  59. openstat/commands/power_cmds.py +172 -0
  60. openstat/commands/profile_cmds.py +246 -0
  61. openstat/commands/rbridge_cmds.py +81 -0
  62. openstat/commands/regex_cmds.py +104 -0
  63. openstat/commands/report_cmds.py +48 -0
  64. openstat/commands/repro_cmds.py +129 -0
  65. openstat/commands/resampling_cmds.py +109 -0
  66. openstat/commands/reshape_cmds.py +223 -0
  67. openstat/commands/sem_cmds.py +177 -0
  68. openstat/commands/stat_cmds.py +1040 -0
  69. openstat/commands/stata_import_cmds.py +215 -0
  70. openstat/commands/string_cmds.py +124 -0
  71. openstat/commands/surv_cmds.py +145 -0
  72. openstat/commands/survey_cmds.py +153 -0
  73. openstat/commands/textanalysis_cmds.py +192 -0
  74. openstat/commands/ts_adv_cmds.py +136 -0
  75. openstat/commands/ts_cmds.py +195 -0
  76. openstat/commands/tui_cmds.py +111 -0
  77. openstat/commands/ux_cmds.py +191 -0
  78. openstat/commands/validate_cmds.py +270 -0
  79. openstat/commands/viz_adv_cmds.py +312 -0
  80. openstat/commands/viz_extra_cmds.py +251 -0
  81. openstat/commands/watch_cmds.py +69 -0
  82. openstat/config.py +106 -0
  83. openstat/dsl/__init__.py +0 -0
  84. openstat/dsl/parser.py +332 -0
  85. openstat/dsl/tokenizer.py +105 -0
  86. openstat/i18n.py +120 -0
  87. openstat/io/__init__.py +0 -0
  88. openstat/io/loader.py +187 -0
  89. openstat/jupyter/__init__.py +18 -0
  90. openstat/jupyter/display.py +18 -0
  91. openstat/jupyter/magic.py +60 -0
  92. openstat/logging_config.py +59 -0
  93. openstat/plots/__init__.py +0 -0
  94. openstat/plots/plotter.py +437 -0
  95. openstat/plots/surv_plots.py +32 -0
  96. openstat/plots/ts_plots.py +59 -0
  97. openstat/plugins/__init__.py +5 -0
  98. openstat/plugins/manager.py +69 -0
  99. openstat/repl.py +457 -0
  100. openstat/reporting/__init__.py +0 -0
  101. openstat/reporting/eda.py +208 -0
  102. openstat/reporting/report.py +67 -0
  103. openstat/script_runner.py +319 -0
  104. openstat/session.py +133 -0
  105. openstat/stats/__init__.py +0 -0
  106. openstat/stats/advanced_regression.py +269 -0
  107. openstat/stats/arch_garch.py +84 -0
  108. openstat/stats/bayesian.py +103 -0
  109. openstat/stats/causal.py +258 -0
  110. openstat/stats/clustering.py +206 -0
  111. openstat/stats/discrete.py +311 -0
  112. openstat/stats/epidemiology.py +119 -0
  113. openstat/stats/equiv_tobit.py +163 -0
  114. openstat/stats/factor.py +174 -0
  115. openstat/stats/imputation.py +282 -0
  116. openstat/stats/influence.py +78 -0
  117. openstat/stats/iv.py +131 -0
  118. openstat/stats/manova.py +124 -0
  119. openstat/stats/mixed.py +128 -0
  120. openstat/stats/ml.py +275 -0
  121. openstat/stats/ml_advanced.py +117 -0
  122. openstat/stats/model_eval.py +183 -0
  123. openstat/stats/models.py +1342 -0
  124. openstat/stats/nonparametric.py +130 -0
  125. openstat/stats/panel.py +179 -0
  126. openstat/stats/power.py +295 -0
  127. openstat/stats/resampling.py +203 -0
  128. openstat/stats/survey.py +213 -0
  129. openstat/stats/survival.py +196 -0
  130. openstat/stats/timeseries.py +142 -0
  131. openstat/stats/ts_advanced.py +114 -0
  132. openstat/types.py +11 -0
  133. openstat/web/__init__.py +1 -0
  134. openstat/web/app.py +117 -0
  135. openstat/web/session_manager.py +73 -0
  136. openstat/web/static/app.js +117 -0
  137. openstat/web/static/index.html +38 -0
  138. openstat/web/static/style.css +103 -0
  139. openstat_cli-1.0.0.dist-info/METADATA +748 -0
  140. openstat_cli-1.0.0.dist-info/RECORD +143 -0
  141. openstat_cli-1.0.0.dist-info/WHEEL +4 -0
  142. openstat_cli-1.0.0.dist-info/entry_points.txt +2 -0
  143. openstat_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,67 @@
1
+ """Markdown report generation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import datetime
6
+ from pathlib import Path
7
+
8
+ from openstat.session import Session
9
+
10
+
11
+ def generate_report(session: Session, output_path: str | Path) -> Path:
12
+ """Generate a Markdown report from the session state."""
13
+ p = Path(output_path)
14
+ p.parent.mkdir(parents=True, exist_ok=True)
15
+
16
+ lines: list[str] = []
17
+ lines.append("# OpenStat Analysis Report")
18
+ lines.append("")
19
+ lines.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
20
+ lines.append("")
21
+
22
+ # Dataset info
23
+ lines.append("## Dataset")
24
+ lines.append("")
25
+ if session.dataset_path:
26
+ lines.append(f"- **Source**: `{session.dataset_path}`")
27
+ if session.df is not None:
28
+ r, c = session.df.shape
29
+ lines.append(f"- **Shape**: {r:,} rows x {c} columns")
30
+ lines.append(f"- **Columns**: {', '.join(session.df.columns)}")
31
+ lines.append("")
32
+
33
+ # Command history
34
+ lines.append("## Commands Executed")
35
+ lines.append("")
36
+ lines.append("```")
37
+ for cmd in session.history:
38
+ lines.append(cmd)
39
+ lines.append("```")
40
+ lines.append("")
41
+
42
+ # Model results
43
+ if session.results:
44
+ lines.append("## Model Results")
45
+ lines.append("")
46
+ for result in session.results:
47
+ lines.append(result.table)
48
+ lines.append("")
49
+
50
+ # Plots
51
+ if session.plot_paths:
52
+ lines.append("## Plots")
53
+ lines.append("")
54
+ for plot_path in session.plot_paths:
55
+ plot_p = Path(plot_path)
56
+ name = plot_p.name
57
+ # Use relative path from report location for portability
58
+ try:
59
+ rel = plot_p.resolve().relative_to(p.parent.resolve())
60
+ except ValueError:
61
+ rel = plot_p
62
+ lines.append(f"![{name}]({rel})")
63
+ lines.append("")
64
+
65
+ content = "\n".join(lines)
66
+ p.write_text(content, encoding="utf-8")
67
+ return p
@@ -0,0 +1,319 @@
1
+ """Advanced .ost script runner with foreach, forvalues, and if/else support."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from typing import TYPE_CHECKING
7
+
8
+ if TYPE_CHECKING:
9
+ from openstat.session import Session
10
+
11
+
12
+ # ---------------------------------------------------------------------------
13
+ # Block parser
14
+ # ---------------------------------------------------------------------------
15
+
16
+ def _collect_block(lines: list[str], start: int) -> tuple[list[str], int]:
17
+ """Starting *after* the opening '{', collect lines until the matching '}'.
18
+
19
+ Returns (body_lines, next_index_after_closing_brace).
20
+
21
+ When a line starts with '}' at depth==1, we consider the block closed even
22
+ if more text follows on that line (e.g., '} else {'). The caller is
23
+ responsible for examining the remainder of that closing line.
24
+ """
25
+ body: list[str] = []
26
+ depth = 1
27
+ i = start
28
+ while i < len(lines):
29
+ stripped = lines[i].strip()
30
+ # A line starting with '}' at the current top level closes this block
31
+ if stripped.startswith("}"):
32
+ # Account for nested close + new open on same line: '} else {'
33
+ # We stop here; return index pointing at this same line so the
34
+ # caller can inspect it for 'else'.
35
+ return body, i
36
+ for ch in stripped:
37
+ if ch == "{":
38
+ depth += 1
39
+ elif ch == "}":
40
+ depth -= 1
41
+ body.append(lines[i])
42
+ i += 1
43
+ return body, i # unterminated block — caller handles
44
+
45
+
46
+ def _parse_statements(lines: list[str]) -> list:
47
+ """Parse lines into a flat list of statement objects.
48
+
49
+ Each statement is one of:
50
+ ("line", text)
51
+ ("foreach", varname, values_list, body_lines)
52
+ ("forvalues", varname, num_sequence, body_lines)
53
+ ("if", condition, if_body, else_body_or_None)
54
+ """
55
+ statements: list = []
56
+ i = 0
57
+ while i < len(lines):
58
+ raw = lines[i]
59
+ stripped = raw.strip()
60
+
61
+ # Skip blank lines and comments
62
+ if not stripped or stripped.startswith("#"):
63
+ i += 1
64
+ continue
65
+
66
+ # ---- foreach var in val1 val2 ... {
67
+ m = re.match(r'^foreach\s+(\w+)\s+in\s+(.+?)(?:\s*\{)?\s*$', stripped)
68
+ if m:
69
+ varname = m.group(1)
70
+ values_raw = m.group(2).rstrip("{").strip()
71
+ values = values_raw.split()
72
+ body, i = _collect_block(lines, i + 1)
73
+ # advance past the closing '}'
74
+ if i < len(lines) and lines[i].strip().startswith("}"):
75
+ i += 1
76
+ statements.append(("foreach", varname, values, body))
77
+ continue
78
+
79
+ # ---- forvalues var = start/end or start(step)end
80
+ m = re.match(r'^forvalues\s+(\w+)\s*=\s*(.+?)(?:\s*\{)?\s*$', stripped)
81
+ if m:
82
+ varname = m.group(1)
83
+ seq_raw = m.group(2).rstrip("{").strip()
84
+ seq = _parse_numseq(seq_raw)
85
+ body, i = _collect_block(lines, i + 1)
86
+ if i < len(lines) and lines[i].strip().startswith("}"):
87
+ i += 1
88
+ statements.append(("forvalues", varname, seq, body))
89
+ continue
90
+
91
+ # ---- if condition {
92
+ m = re.match(r'^if\s+(.+?)(?:\s*\{)?\s*$', stripped)
93
+ if m and not stripped.startswith("if_"):
94
+ condition = m.group(1).rstrip("{").strip()
95
+ if_body, i = _collect_block(lines, i + 1)
96
+
97
+ # Check for else on the closing '}' line: '} else {'
98
+ else_body: list[str] | None = None
99
+ if i < len(lines):
100
+ close_line = lines[i].strip()
101
+ # '} else {' or '} else' or standalone '}'
102
+ else_m = re.match(r'^\}\s*else\s*\{?\s*$', close_line)
103
+ if else_m:
104
+ if close_line.rstrip().endswith("{"):
105
+ # Body starts on next line
106
+ else_body, i = _collect_block(lines, i + 1)
107
+ if i < len(lines) and lines[i].strip().startswith("}"):
108
+ i += 1
109
+ else:
110
+ # '} else' — opening brace on next line
111
+ i += 1
112
+ while i < len(lines) and lines[i].strip() in ("", "{"):
113
+ if lines[i].strip() == "{":
114
+ i += 1
115
+ break
116
+ i += 1
117
+ else_body, i = _collect_block(lines, i)
118
+ if i < len(lines) and lines[i].strip().startswith("}"):
119
+ i += 1
120
+ else:
121
+ # plain closing '}' — advance past it
122
+ i += 1
123
+
124
+ statements.append(("if", condition, if_body, else_body))
125
+ continue
126
+
127
+ # ---- Plain line
128
+ statements.append(("line", stripped))
129
+ i += 1
130
+
131
+ return statements
132
+
133
+
134
+ def _parse_numseq(raw: str) -> list[int | float]:
135
+ """Parse forvalues sequence: '1/10', '1(2)10', '1.0(0.5)5.0'."""
136
+ # start(step)end
137
+ m = re.match(r'^([\d.]+)\((-?[\d.]+)\)([\d.]+)$', raw.strip())
138
+ if m:
139
+ start, step, end = float(m.group(1)), float(m.group(2)), float(m.group(3))
140
+ result = []
141
+ v = start
142
+ while (step > 0 and v <= end + 1e-9) or (step < 0 and v >= end - 1e-9):
143
+ result.append(int(v) if v == int(v) else v)
144
+ v += step
145
+ return result
146
+
147
+ # start/end
148
+ m = re.match(r'^([\d.]+)/([\d.]+)$', raw.strip())
149
+ if m:
150
+ start, end = float(m.group(1)), float(m.group(2))
151
+ return [int(v) if v == int(v) else v for v in range(int(start), int(end) + 1)]
152
+
153
+ # Single value
154
+ try:
155
+ v = float(raw.strip())
156
+ return [int(v) if v == int(v) else v]
157
+ except ValueError:
158
+ return []
159
+
160
+
161
+ # ---------------------------------------------------------------------------
162
+ # Evaluator
163
+ # ---------------------------------------------------------------------------
164
+
165
+ _BOOL_TRUE = {"true", "1", "yes"}
166
+
167
+
168
+ def _eval_condition(condition: str, local_vars: dict[str, str], session: "Session") -> bool:
169
+ """Evaluate a simple if-condition.
170
+
171
+ Supports:
172
+ - data_loaded → True if dataset is loaded
173
+ - col_exists <colname>→ True if column exists in data
174
+ - N > 100 → True if row count > 100
175
+ - {var} == value → comparison with local variable
176
+ """
177
+ # Substitute local variables
178
+ cond = _substitute(condition, local_vars)
179
+
180
+ cond = cond.strip()
181
+
182
+ if cond.lower() == "data_loaded":
183
+ return session.df is not None
184
+
185
+ m = re.match(r'^col_exists\s+(\S+)$', cond.lower())
186
+ if m:
187
+ col = m.group(1)
188
+ return session.df is not None and col in session.df.columns
189
+
190
+ if cond.lower() == "n":
191
+ return session.df is not None and len(session.df) > 0
192
+
193
+ # N <op> <number>
194
+ m = re.match(r'^N\s*(>|<|>=|<=|==|!=)\s*([\d.]+)$', cond)
195
+ if m and session.df is not None:
196
+ op, val = m.group(1), float(m.group(2))
197
+ n = len(session.df)
198
+ return {">" : n > val, "<" : n < val, ">=" : n >= val,
199
+ "<=" : n <= val, "==" : n == val, "!=" : n != val}[op]
200
+
201
+ # Generic comparison: lhs op rhs
202
+ m = re.match(r'^(.+?)\s*(==|!=|>=|<=|>|<)\s*(.+)$', cond)
203
+ if m:
204
+ lhs, op, rhs = m.group(1).strip(), m.group(2), m.group(3).strip()
205
+ # Strip quotes from rhs if string
206
+ rhs_s = rhs.strip('"\'')
207
+ lhs_s = lhs.strip('"\'')
208
+ try:
209
+ lhs_v: float | str = float(lhs_s)
210
+ rhs_v: float | str = float(rhs_s)
211
+ except ValueError:
212
+ lhs_v = lhs_s
213
+ rhs_v = rhs_s
214
+ return {"==" : lhs_v == rhs_v, "!=" : lhs_v != rhs_v,
215
+ ">" : lhs_v > rhs_v, "<" : lhs_v < rhs_v, # type: ignore[operator]
216
+ ">=" : lhs_v >= rhs_v, "<=" : lhs_v <= rhs_v}[op] # type: ignore[operator]
217
+
218
+ return cond.lower() in _BOOL_TRUE
219
+
220
+
221
+ def _substitute(text: str, local_vars: dict[str, str]) -> str:
222
+ """Replace {varname} with its value from local_vars."""
223
+ for k, v in local_vars.items():
224
+ text = text.replace(f"{{{k}}}", str(v))
225
+ return text
226
+
227
+
228
+ # ---------------------------------------------------------------------------
229
+ # Executor
230
+ # ---------------------------------------------------------------------------
231
+
232
+ def execute_statements(
233
+ statements: list,
234
+ session: "Session",
235
+ console,
236
+ dispatcher,
237
+ *,
238
+ strict: bool = False,
239
+ local_vars: dict[str, str] | None = None,
240
+ ) -> bool:
241
+ """Execute a list of parsed statements. Returns True if script should continue."""
242
+ if local_vars is None:
243
+ local_vars = {}
244
+
245
+ for stmt in statements:
246
+ kind = stmt[0]
247
+
248
+ if kind == "line":
249
+ line = _substitute(stmt[1], local_vars)
250
+ if not line or line.startswith("#"):
251
+ continue
252
+ console.print(f"[dim]>>> {line}[/dim]")
253
+ result = dispatcher(session, line)
254
+ if result == "__QUIT__":
255
+ return False
256
+ if result:
257
+ console.print(result)
258
+ if strict:
259
+ import re as _re
260
+ plain = _re.sub(r"\[/?[^\]]*\]", "", result)
261
+ if plain.startswith(("Error", "Internal error")):
262
+ raise SystemExit(1)
263
+ console.print()
264
+
265
+ elif kind == "foreach":
266
+ _, varname, values, body = stmt
267
+ body_stmts = _parse_statements(body)
268
+ for val in values:
269
+ new_locals = {**local_vars, varname: str(val)}
270
+ cont = execute_statements(
271
+ body_stmts, session, console, dispatcher,
272
+ strict=strict, local_vars=new_locals,
273
+ )
274
+ if not cont:
275
+ return False
276
+
277
+ elif kind == "forvalues":
278
+ _, varname, seq, body = stmt
279
+ body_stmts = _parse_statements(body)
280
+ for val in seq:
281
+ new_locals = {**local_vars, varname: str(val)}
282
+ cont = execute_statements(
283
+ body_stmts, session, console, dispatcher,
284
+ strict=strict, local_vars=new_locals,
285
+ )
286
+ if not cont:
287
+ return False
288
+
289
+ elif kind == "if":
290
+ _, condition, if_body, else_body = stmt
291
+ if _eval_condition(condition, local_vars, session):
292
+ branch = if_body
293
+ else:
294
+ branch = else_body or []
295
+ branch_stmts = _parse_statements(branch)
296
+ cont = execute_statements(
297
+ branch_stmts, session, console, dispatcher,
298
+ strict=strict, local_vars=local_vars,
299
+ )
300
+ if not cont:
301
+ return False
302
+
303
+ return True
304
+
305
+
306
+ def run_script_advanced(
307
+ path: str,
308
+ session: "Session",
309
+ console,
310
+ dispatcher,
311
+ *,
312
+ strict: bool = False,
313
+ ) -> None:
314
+ """Run an .ost script with foreach/forvalues/if-else support."""
315
+ with open(path, encoding="utf-8") as f:
316
+ lines = f.readlines()
317
+
318
+ statements = _parse_statements(lines)
319
+ execute_statements(statements, session, console, dispatcher, strict=strict)
openstat/session.py ADDED
@@ -0,0 +1,133 @@
1
+ """Session state: holds the active dataset, command history, and results."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from pathlib import Path
7
+
8
+ import polars as pl
9
+
10
+ from openstat.config import get_config
11
+
12
+
13
+ @dataclass
14
+ class ModelResult:
15
+ """Stores a fitted model's summary."""
16
+
17
+ name: str # e.g. "OLS", "Logit"
18
+ formula: str # e.g. "y ~ x1 + x2"
19
+ table: str # formatted text table
20
+ details: dict # r2, n, etc.
21
+
22
+
23
+ @dataclass
24
+ class Session:
25
+ """Holds all state for a single analysis session."""
26
+
27
+ df: pl.DataFrame | None = None
28
+ dataset_path: str | None = None
29
+ dataset_name: str | None = None
30
+ history: list[str] = field(default_factory=list)
31
+ results: list[ModelResult] = field(default_factory=list)
32
+ plot_paths: list[str] = field(default_factory=list)
33
+ _last_model: object = field(default=None, repr=False) # last fitted statsmodels result
34
+ _last_model_vars: tuple | None = field(default=None, repr=False) # (dep, indeps)
35
+ _last_fit_result: object = field(default=None, repr=False) # last FitResult for latex export
36
+ _last_fit_kwargs: dict = field(default_factory=dict, repr=False) # model-specific kwargs for bootstrap
37
+ output_dir: Path = field(default=None) # type: ignore[assignment]
38
+ _undo_stack: list[pl.DataFrame] = field(default_factory=list)
39
+
40
+ # Panel data (F1) / Time series (F2)
41
+ _panel_var: str | None = field(default=None, repr=False)
42
+ _time_var: str | None = field(default=None, repr=False)
43
+ _ts_freq: str | None = field(default=None, repr=False)
44
+
45
+ # Survival analysis (F3)
46
+ _surv_time_var: str | None = field(default=None, repr=False)
47
+ _surv_event_var: str | None = field(default=None, repr=False)
48
+
49
+ # DuckDB backend (F6)
50
+ _backend: str = field(default="polars", repr=False)
51
+ _backend_obj: object = field(default=None, repr=False)
52
+
53
+ # File format labels (F10)
54
+ _variable_labels: dict | None = field(default=None, repr=False)
55
+
56
+ # Multiple imputation (F11)
57
+ _imputed_datasets: list | None = field(default=None, repr=False)
58
+ _mi_m: int = field(default=0, repr=False)
59
+
60
+ # Survey design (F12)
61
+ _svy_weight_var: str | None = field(default=None, repr=False)
62
+ _svy_strata_var: str | None = field(default=None, repr=False)
63
+ _svy_psu_var: str | None = field(default=None, repr=False)
64
+
65
+ # Panel model storage for Hausman test
66
+ _panel_models: dict = field(default_factory=dict, repr=False)
67
+
68
+ # Session logging (log using / log close)
69
+ _log_file: object = field(default=None, repr=False) # open file handle
70
+ _log_path: str | None = field(default=None, repr=False)
71
+
72
+ # Last margins / marginal effects result
73
+ _last_margins: object = field(default=None, repr=False)
74
+
75
+ # Network analysis (network build)
76
+ _network: object = field(default=None, repr=False)
77
+ _network_weight_col: str | None = field(default=None, repr=False)
78
+
79
+ def __post_init__(self) -> None:
80
+ cfg = get_config()
81
+ if self.output_dir is None:
82
+ self.output_dir = Path(cfg.output_dir)
83
+ self.output_dir.mkdir(parents=True, exist_ok=True)
84
+
85
+ def record(self, command: str) -> None:
86
+ """Record a command in history."""
87
+ self.history.append(command)
88
+
89
+ def require_data(self) -> pl.DataFrame:
90
+ """Return the active DataFrame or raise."""
91
+ if self.df is None:
92
+ raise RuntimeError("No dataset loaded. Use: load <path>")
93
+ return self.df
94
+
95
+ def snapshot(self) -> None:
96
+ """Save current DataFrame to undo stack (call before mutations).
97
+
98
+ Respects max_undo_stack and max_undo_memory_mb from config.
99
+ """
100
+ if self.df is not None:
101
+ cfg = get_config()
102
+ # Memory check: estimate DataFrame size
103
+ df_size_mb = self.df.estimated_size("mb")
104
+ stack_size_mb = sum(d.estimated_size("mb") for d in self._undo_stack)
105
+ if stack_size_mb + df_size_mb > cfg.max_undo_memory_mb and self._undo_stack:
106
+ # Drop oldest snapshots to stay within budget
107
+ while (self._undo_stack
108
+ and stack_size_mb + df_size_mb > cfg.max_undo_memory_mb):
109
+ removed = self._undo_stack.pop(0)
110
+ stack_size_mb -= removed.estimated_size("mb")
111
+
112
+ self._undo_stack.append(self.df.clone())
113
+ # Keep stack bounded by count too
114
+ if len(self._undo_stack) > cfg.max_undo_stack:
115
+ self._undo_stack.pop(0)
116
+
117
+ def undo(self) -> bool:
118
+ """Restore the previous DataFrame. Returns True if successful."""
119
+ if not self._undo_stack:
120
+ return False
121
+ self.df = self._undo_stack.pop()
122
+ return True
123
+
124
+ @property
125
+ def undo_depth(self) -> int:
126
+ return len(self._undo_stack)
127
+
128
+ @property
129
+ def shape_str(self) -> str:
130
+ if self.df is None:
131
+ return "No data"
132
+ r, c = self.df.shape
133
+ return f"{r:,} rows x {c} columns"
File without changes