openstat-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. openstat/__init__.py +3 -0
  2. openstat/__main__.py +4 -0
  3. openstat/backends/__init__.py +16 -0
  4. openstat/backends/duckdb_backend.py +70 -0
  5. openstat/backends/polars_backend.py +52 -0
  6. openstat/cli.py +92 -0
  7. openstat/commands/__init__.py +82 -0
  8. openstat/commands/adv_stat_cmds.py +1255 -0
  9. openstat/commands/advanced_ml_cmds.py +576 -0
  10. openstat/commands/advreg_cmds.py +207 -0
  11. openstat/commands/alias_cmds.py +135 -0
  12. openstat/commands/arch_cmds.py +82 -0
  13. openstat/commands/arules_cmds.py +111 -0
  14. openstat/commands/automodel_cmds.py +212 -0
  15. openstat/commands/backend_cmds.py +82 -0
  16. openstat/commands/base.py +170 -0
  17. openstat/commands/bayes_cmds.py +71 -0
  18. openstat/commands/causal_cmds.py +269 -0
  19. openstat/commands/cluster_cmds.py +152 -0
  20. openstat/commands/data_cmds.py +996 -0
  21. openstat/commands/datamanip_cmds.py +672 -0
  22. openstat/commands/dataquality_cmds.py +174 -0
  23. openstat/commands/datetime_cmds.py +176 -0
  24. openstat/commands/dimreduce_cmds.py +184 -0
  25. openstat/commands/discrete_cmds.py +149 -0
  26. openstat/commands/dsl_cmds.py +143 -0
  27. openstat/commands/epi_cmds.py +93 -0
  28. openstat/commands/equiv_tobit_cmds.py +94 -0
  29. openstat/commands/esttab_cmds.py +196 -0
  30. openstat/commands/export_beamer_cmds.py +142 -0
  31. openstat/commands/export_cmds.py +201 -0
  32. openstat/commands/export_extra_cmds.py +240 -0
  33. openstat/commands/factor_cmds.py +180 -0
  34. openstat/commands/groupby_cmds.py +155 -0
  35. openstat/commands/help_cmds.py +237 -0
  36. openstat/commands/i18n_cmds.py +43 -0
  37. openstat/commands/import_extra_cmds.py +561 -0
  38. openstat/commands/influence_cmds.py +134 -0
  39. openstat/commands/iv_cmds.py +106 -0
  40. openstat/commands/manova_cmds.py +105 -0
  41. openstat/commands/mediate_cmds.py +233 -0
  42. openstat/commands/meta_cmds.py +284 -0
  43. openstat/commands/mi_cmds.py +228 -0
  44. openstat/commands/mixed_cmds.py +79 -0
  45. openstat/commands/mixture_changepoint_cmds.py +166 -0
  46. openstat/commands/ml_adv_cmds.py +147 -0
  47. openstat/commands/ml_cmds.py +178 -0
  48. openstat/commands/model_eval_cmds.py +142 -0
  49. openstat/commands/network_cmds.py +288 -0
  50. openstat/commands/nlquery_cmds.py +161 -0
  51. openstat/commands/nonparam_cmds.py +149 -0
  52. openstat/commands/outreg_cmds.py +247 -0
  53. openstat/commands/panel_cmds.py +141 -0
  54. openstat/commands/pdf_cmds.py +226 -0
  55. openstat/commands/pipeline_cmds.py +319 -0
  56. openstat/commands/plot_cmds.py +189 -0
  57. openstat/commands/plugin_cmds.py +79 -0
  58. openstat/commands/posthoc_cmds.py +153 -0
  59. openstat/commands/power_cmds.py +172 -0
  60. openstat/commands/profile_cmds.py +246 -0
  61. openstat/commands/rbridge_cmds.py +81 -0
  62. openstat/commands/regex_cmds.py +104 -0
  63. openstat/commands/report_cmds.py +48 -0
  64. openstat/commands/repro_cmds.py +129 -0
  65. openstat/commands/resampling_cmds.py +109 -0
  66. openstat/commands/reshape_cmds.py +223 -0
  67. openstat/commands/sem_cmds.py +177 -0
  68. openstat/commands/stat_cmds.py +1040 -0
  69. openstat/commands/stata_import_cmds.py +215 -0
  70. openstat/commands/string_cmds.py +124 -0
  71. openstat/commands/surv_cmds.py +145 -0
  72. openstat/commands/survey_cmds.py +153 -0
  73. openstat/commands/textanalysis_cmds.py +192 -0
  74. openstat/commands/ts_adv_cmds.py +136 -0
  75. openstat/commands/ts_cmds.py +195 -0
  76. openstat/commands/tui_cmds.py +111 -0
  77. openstat/commands/ux_cmds.py +191 -0
  78. openstat/commands/validate_cmds.py +270 -0
  79. openstat/commands/viz_adv_cmds.py +312 -0
  80. openstat/commands/viz_extra_cmds.py +251 -0
  81. openstat/commands/watch_cmds.py +69 -0
  82. openstat/config.py +106 -0
  83. openstat/dsl/__init__.py +0 -0
  84. openstat/dsl/parser.py +332 -0
  85. openstat/dsl/tokenizer.py +105 -0
  86. openstat/i18n.py +120 -0
  87. openstat/io/__init__.py +0 -0
  88. openstat/io/loader.py +187 -0
  89. openstat/jupyter/__init__.py +18 -0
  90. openstat/jupyter/display.py +18 -0
  91. openstat/jupyter/magic.py +60 -0
  92. openstat/logging_config.py +59 -0
  93. openstat/plots/__init__.py +0 -0
  94. openstat/plots/plotter.py +437 -0
  95. openstat/plots/surv_plots.py +32 -0
  96. openstat/plots/ts_plots.py +59 -0
  97. openstat/plugins/__init__.py +5 -0
  98. openstat/plugins/manager.py +69 -0
  99. openstat/repl.py +457 -0
  100. openstat/reporting/__init__.py +0 -0
  101. openstat/reporting/eda.py +208 -0
  102. openstat/reporting/report.py +67 -0
  103. openstat/script_runner.py +319 -0
  104. openstat/session.py +133 -0
  105. openstat/stats/__init__.py +0 -0
  106. openstat/stats/advanced_regression.py +269 -0
  107. openstat/stats/arch_garch.py +84 -0
  108. openstat/stats/bayesian.py +103 -0
  109. openstat/stats/causal.py +258 -0
  110. openstat/stats/clustering.py +206 -0
  111. openstat/stats/discrete.py +311 -0
  112. openstat/stats/epidemiology.py +119 -0
  113. openstat/stats/equiv_tobit.py +163 -0
  114. openstat/stats/factor.py +174 -0
  115. openstat/stats/imputation.py +282 -0
  116. openstat/stats/influence.py +78 -0
  117. openstat/stats/iv.py +131 -0
  118. openstat/stats/manova.py +124 -0
  119. openstat/stats/mixed.py +128 -0
  120. openstat/stats/ml.py +275 -0
  121. openstat/stats/ml_advanced.py +117 -0
  122. openstat/stats/model_eval.py +183 -0
  123. openstat/stats/models.py +1342 -0
  124. openstat/stats/nonparametric.py +130 -0
  125. openstat/stats/panel.py +179 -0
  126. openstat/stats/power.py +295 -0
  127. openstat/stats/resampling.py +203 -0
  128. openstat/stats/survey.py +213 -0
  129. openstat/stats/survival.py +196 -0
  130. openstat/stats/timeseries.py +142 -0
  131. openstat/stats/ts_advanced.py +114 -0
  132. openstat/types.py +11 -0
  133. openstat/web/__init__.py +1 -0
  134. openstat/web/app.py +117 -0
  135. openstat/web/session_manager.py +73 -0
  136. openstat/web/static/app.js +117 -0
  137. openstat/web/static/index.html +38 -0
  138. openstat/web/static/style.css +103 -0
  139. openstat_cli-1.0.0.dist-info/METADATA +748 -0
  140. openstat_cli-1.0.0.dist-info/RECORD +143 -0
  141. openstat_cli-1.0.0.dist-info/WHEEL +4 -0
  142. openstat_cli-1.0.0.dist-info/entry_points.txt +2 -0
  143. openstat_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,561 @@
1
+ """Extra import commands: URL, clipboard, SPSS syntax translation, REST webhook."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import io
6
+ import os
7
+ import re
8
+ import sys
9
+ import tempfile
10
+ from pathlib import Path
11
+
12
+ from openstat.commands.base import command, CommandArgs, friendly_error
13
+ from openstat.session import Session
14
+
15
+
16
+ # ---------------------------------------------------------------------------
17
+ # import url
18
+ # ---------------------------------------------------------------------------
19
+
20
+ @command("import url", usage="import url <url> [--format=csv|json|parquet] [--sep=,]")
21
+ def cmd_import_url(session: Session, args: str) -> str:
22
+ """Load data from an HTTP/HTTPS URL.
23
+
24
+ Auto-detects format from the URL extension if --format is not supplied.
25
+ Supported formats: csv, json, parquet.
26
+
27
+ Example: import url https://example.com/data.csv
28
+ Example: import url https://api.example.com/data.json --format=json
29
+ """
30
+ import polars as pl
31
+ from openstat.io.loader import load_file
32
+
33
+ ca = CommandArgs(args)
34
+ if not ca.positional:
35
+ return "Usage: import url <url> [--format=csv|json|parquet] [--sep=,]"
36
+
37
+ url = ca.positional[0]
38
+ fmt = ca.options.get("format", "").lower()
39
+ sep = ca.options.get("sep", ",")
40
+
41
+ # Auto-detect format from extension
42
+ if not fmt:
43
+ url_path = url.split("?")[0].lower()
44
+ if url_path.endswith(".parquet"):
45
+ fmt = "parquet"
46
+ elif url_path.endswith(".json") or url_path.endswith(".jsonl"):
47
+ fmt = "json"
48
+ else:
49
+ fmt = "csv"
50
+
51
+ # Download to a temporary file
52
+ suffix = f".{fmt}"
53
+ try:
54
+ try:
55
+ import requests
56
+ resp = requests.get(url, timeout=60)
57
+ resp.raise_for_status()
58
+ data_bytes = resp.content
59
+ except ImportError:
60
+ import urllib.request as _urllib
61
+ with _urllib.urlopen(url, timeout=60) as resp: # noqa: S310
62
+ data_bytes = resp.read()
63
+ except Exception as exc:
64
+ return f"Failed to download '{url}': {exc}"
65
+
66
+ try:
67
+ with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as tmp:
68
+ tmp.write(data_bytes)
69
+ tmp_path = tmp.name
70
+
71
+ if fmt == "parquet":
72
+ df = pl.read_parquet(tmp_path)
73
+ elif fmt == "json":
74
+ df = pl.read_json(tmp_path)
75
+ else:
76
+ actual_sep = "\t" if sep in ("\\t", "\t") else sep
77
+ df = pl.read_csv(tmp_path, separator=actual_sep)
78
+ except Exception as exc:
79
+ return friendly_error(exc, "import url")
80
+ finally:
81
+ try:
82
+ os.unlink(tmp_path)
83
+ except Exception:
84
+ pass
85
+
86
+ session.snapshot()
87
+ session.df = df
88
+ session.dataset_path = url
89
+ session.dataset_name = url.split("/")[-1].split("?")[0] or "url_import"
90
+ session._undo_stack.clear()
91
+ r, c = df.shape
92
+ return f"Loaded {r:,} rows x {c} columns from {url}"
93
+
94
+
95
+ # ---------------------------------------------------------------------------
96
+ # import clipboard
97
+ # ---------------------------------------------------------------------------
98
+
99
+ @command("import clipboard", usage="import clipboard [--sep=\\t|,]")
100
+ def cmd_import_clipboard(session: Session, args: str) -> str:
101
+ """Load tabular data pasted from a spreadsheet (Excel, Google Sheets, etc.).
102
+
103
+ The default separator is a tab character, which is what Excel/Sheets
104
+ copies to the clipboard. Use --sep=, for comma-separated text.
105
+
106
+ Example: import clipboard
107
+ Example: import clipboard --sep=,
108
+ """
109
+ import polars as pl
110
+
111
+ ca = CommandArgs(args)
112
+ sep_raw = ca.options.get("sep", "\\t")
113
+ sep = "\t" if sep_raw in ("\\t", "\t") else sep_raw
114
+
115
+ # Retrieve clipboard contents
116
+ text: str | None = None
117
+
118
+ try:
119
+ import pyperclip
120
+ text = pyperclip.paste()
121
+ except ImportError:
122
+ pass
123
+
124
+ if text is None:
125
+ # Fallback: platform-specific subprocess
126
+ try:
127
+ import subprocess
128
+ platform = sys.platform
129
+ if platform == "darwin":
130
+ result = subprocess.run(["pbpaste"], capture_output=True, text=True, timeout=5)
131
+ text = result.stdout
132
+ elif platform.startswith("linux"):
133
+ result = subprocess.run(
134
+ ["xclip", "-selection", "clipboard", "-o"],
135
+ capture_output=True, text=True, timeout=5,
136
+ )
137
+ if result.returncode != 0:
138
+ result = subprocess.run(
139
+ ["xsel", "--clipboard", "--output"],
140
+ capture_output=True, text=True, timeout=5,
141
+ )
142
+ text = result.stdout
143
+ elif platform == "win32":
144
+ result = subprocess.run(
145
+ ["powershell", "-command", "Get-Clipboard"],
146
+ capture_output=True, text=True, timeout=5,
147
+ )
148
+ text = result.stdout
149
+ except Exception as exc:
150
+ return (
151
+ f"Could not read clipboard: {exc}\n"
152
+ "Install pyperclip for reliable clipboard support: pip install pyperclip"
153
+ )
154
+
155
+ if not text or not text.strip():
156
+ return "Clipboard is empty or contains no text."
157
+
158
+ try:
159
+ df = pl.read_csv(
160
+ io.StringIO(text),
161
+ separator=sep,
162
+ infer_schema_length=1000,
163
+ )
164
+ except Exception as exc:
165
+ return friendly_error(exc, "import clipboard")
166
+
167
+ session.snapshot()
168
+ session.df = df
169
+ session.dataset_path = "clipboard"
170
+ session.dataset_name = "clipboard"
171
+ session._undo_stack.clear()
172
+ r, c = df.shape
173
+ sep_display = "tab" if sep == "\t" else repr(sep)
174
+ return f"Loaded {r:,} rows x {c} columns from clipboard (sep={sep_display})."
175
+
176
+
177
+ # ---------------------------------------------------------------------------
178
+ # import spss
179
+ # ---------------------------------------------------------------------------
180
+
181
+ # Mapping of SPSS commands to OpenStat equivalents.
182
+ # Each entry: (regex_pattern, replacement_template | None)
183
+ # None means we emit an [untranslated] comment.
184
+
185
+ _SPSS_RULES: list[tuple[re.Pattern, str | None]] = [
186
+ # GET FILE = 'path/to/data.sav'.
187
+ (
188
+ re.compile(r"^GET\s+FILE\s*=\s*['\"]?([^'\".\s]+\S*?)['\"]?\s*\.?\s*$", re.IGNORECASE),
189
+ r"load \1",
190
+ ),
191
+ # SAVE OUTFILE = 'path'.
192
+ (
193
+ re.compile(r"^SAVE\s+OUTFILE\s*=\s*['\"]?([^'\".\s]+\S*?)['\"]?\s*\.?\s*$", re.IGNORECASE),
194
+ r"save \1",
195
+ ),
196
+ # REGRESSION VARIABLES = y x1 x2 / DEPENDENT y ...
197
+ (
198
+ re.compile(
199
+ r"^REGRESSION\s+.*VARIABLES\s*=\s*([\w\s]+?)(?:\s*/.*)?\.?\s*$",
200
+ re.IGNORECASE | re.DOTALL,
201
+ ),
202
+ None, # complex to auto-translate; emit comment
203
+ ),
204
+ # FREQUENCIES VARIABLES = col1 col2.
205
+ (
206
+ re.compile(r"^FREQUENCIES\s+VARIABLES\s*=\s*([\w\s]+?)\.?\s*$", re.IGNORECASE),
207
+ None, # -> tabulate (needs single col); emit comment
208
+ ),
209
+ # DESCRIPTIVES VARIABLES = col1 col2.
210
+ (
211
+ re.compile(r"^DESCRIPTIVES\s+VARIABLES\s*=\s*([\w\s]+?)\.?\s*$", re.IGNORECASE),
212
+ None,
213
+ ),
214
+ # CORRELATIONS VARIABLES = col1 col2.
215
+ (
216
+ re.compile(r"^CORRELATIONS\s+VARIABLES\s*=\s*([\w\s]+?)\.?\s*$", re.IGNORECASE),
217
+ None,
218
+ ),
219
+ # COMPUTE newvar = expression.
220
+ (
221
+ re.compile(r"^COMPUTE\s+(\w+)\s*=\s*(.+?)\.?\s*$", re.IGNORECASE),
222
+ r"generate \1 = \2",
223
+ ),
224
+ # SELECT IF (condition).
225
+ (
226
+ re.compile(r"^SELECT\s+IF\s+\((.+?)\)\.?\s*$", re.IGNORECASE),
227
+ None,
228
+ ),
229
+ # RECODE var (old=new) ...
230
+ (
231
+ re.compile(r"^RECODE\s+.+$", re.IGNORECASE),
232
+ None,
233
+ ),
234
+ ]
235
+
236
+ # Simple direct substitution rules (SPSS keyword -> OpenStat command prefix)
237
+ _SPSS_SIMPLE: list[tuple[re.Pattern, str]] = [
238
+ (
239
+ re.compile(r"^FREQUENCIES\s+VARIABLES\s*=\s*([\w\s]+?)\.?\s*$", re.IGNORECASE),
240
+ "tabulate",
241
+ ),
242
+ (
243
+ re.compile(r"^DESCRIPTIVES\s+VARIABLES\s*=\s*([\w\s]+?)\.?\s*$", re.IGNORECASE),
244
+ "summarize",
245
+ ),
246
+ (
247
+ re.compile(r"^CORRELATIONS\s+VARIABLES\s*=\s*([\w\s]+?)\.?\s*$", re.IGNORECASE),
248
+ "correlate",
249
+ ),
250
+ ]
251
+
252
+
253
+ def _translate_spss_line(line: str) -> str:
254
+ """Translate a single SPSS syntax line to an OpenStat line."""
255
+ stripped = line.strip()
256
+ if not stripped or stripped.startswith("*") or stripped.startswith("/*"):
257
+ # SPSS comment
258
+ return f"# {stripped.lstrip('*').strip()}" if stripped else ""
259
+
260
+ # GET FILE
261
+ m = re.match(r"^GET\s+FILE\s*=\s*['\"]?([^'\".\s]+\S*?)['\"]?\s*\.?\s*$", stripped, re.IGNORECASE)
262
+ if m:
263
+ return f"load {m.group(1)}"
264
+
265
+ # SAVE OUTFILE
266
+ m = re.match(r"^SAVE\s+OUTFILE\s*=\s*['\"]?([^'\".\s]+\S*?)['\"]?\s*\.?\s*$", stripped, re.IGNORECASE)
267
+ if m:
268
+ return f"save {m.group(1)}"
269
+
270
+ # COMPUTE newvar = expr.
271
+ m = re.match(r"^COMPUTE\s+(\w+)\s*=\s*(.+?)\.?\s*$", stripped, re.IGNORECASE)
272
+ if m:
273
+ return f"generate {m.group(1)} = {m.group(2)}"
274
+
275
+ # FREQUENCIES VARIABLES = ...
276
+ m = re.match(r"^FREQUENCIES\s+VARIABLES\s*=\s*([\w\s]+?)\.?\s*$", stripped, re.IGNORECASE)
277
+ if m:
278
+ first_col = m.group(1).split()[0]
279
+ rest = m.group(1).split()[1:]
280
+ extra = " ".join(rest)
281
+ note = f" # [note] original cols: {extra}" if extra else ""
282
+ return f"tabulate {first_col}{note}"
283
+
284
+ # DESCRIPTIVES VARIABLES = ...
285
+ m = re.match(r"^DESCRIPTIVES\s+VARIABLES\s*=\s*([\w\s]+?)\.?\s*$", stripped, re.IGNORECASE)
286
+ if m:
287
+ cols = m.group(1).strip()
288
+ return f"summarize {cols}"
289
+
290
+ # CORRELATIONS VARIABLES = ...
291
+ m = re.match(r"^CORRELATIONS\s+VARIABLES\s*=\s*([\w\s]+?)\.?\s*$", stripped, re.IGNORECASE)
292
+ if m:
293
+ cols = m.group(1).strip()
294
+ return f"correlate {cols}"
295
+
296
+ # REGRESSION — complex, emit annotated comment + best-effort ols
297
+ m = re.match(
298
+ r"^REGRESSION\s+.*?DEPENDENT\s*=?\s*(\w+)\s+.*?ENTER\s+([\w\s]+?)\.?\s*$",
299
+ stripped, re.IGNORECASE | re.DOTALL,
300
+ )
301
+ if m:
302
+ dep = m.group(1)
303
+ indeps = m.group(2).strip()
304
+ return f"ols {dep} {indeps} # [translated from REGRESSION]"
305
+
306
+ # SELECT IF
307
+ m = re.match(r"^SELECT\s+IF\s+\((.+?)\)\.?\s*$", stripped, re.IGNORECASE)
308
+ if m:
309
+ cond = m.group(1)
310
+ return (
311
+ f"# [untranslated] SELECT IF ({cond})\n"
312
+ f"# Manual equivalent: filter {cond}"
313
+ )
314
+
315
+ # RECODE
316
+ if re.match(r"^RECODE\b", stripped, re.IGNORECASE):
317
+ return (
318
+ f"# [untranslated] {stripped}\n"
319
+ "# Manual equivalent: recode <col> old=new ..."
320
+ )
321
+
322
+ # Anything else
323
+ return f"# [untranslated] {stripped}"
324
+
325
+
326
+ @command("import spss", usage="import spss <syntax.sps> [--out=<script.ost>] [--run]")
327
+ def cmd_import_spss(session: Session, args: str) -> str:
328
+ """Translate an SPSS syntax file (.sps) into an OpenStat script (.ost).
329
+
330
+ Translated commands:
331
+ GET FILE -> load
332
+ SAVE OUTFILE -> save
333
+ COMPUTE -> generate
334
+ FREQUENCIES -> tabulate
335
+ DESCRIPTIVES -> summarize
336
+ CORRELATIONS -> correlate
337
+ REGRESSION -> ols (best-effort)
338
+ SELECT IF -> filter (emitted as comment with hint)
339
+ RECODE -> replace (emitted as comment with hint)
340
+ All other lines are kept as [untranslated] comments.
341
+
342
+ Use --run to immediately execute the translated script.
343
+
344
+ Example: import spss analysis.sps --out=analysis.ost
345
+ """
346
+ ca = CommandArgs(args)
347
+ if not ca.positional:
348
+ return "Usage: import spss <syntax.sps> [--out=<script.ost>] [--run]"
349
+
350
+ sps_path = ca.positional[0]
351
+ out_path = ca.options.get("out")
352
+ do_run = ca.has_flag("--run")
353
+
354
+ try:
355
+ raw = Path(sps_path).read_text(encoding="utf-8", errors="replace")
356
+ except FileNotFoundError:
357
+ return f"File not found: {sps_path}"
358
+ except Exception as exc:
359
+ return f"Cannot read '{sps_path}': {exc}"
360
+
361
+ # SPSS statements can span multiple lines ending with '.'; we join continuation lines.
362
+ # Simple heuristic: lines not ending with '.' that are not blank are joined to the next.
363
+ joined_lines: list[str] = []
364
+ buffer = ""
365
+ for raw_line in raw.splitlines():
366
+ stripped = raw_line.strip()
367
+ if not stripped:
368
+ if buffer:
369
+ joined_lines.append(buffer)
370
+ buffer = ""
371
+ joined_lines.append("")
372
+ continue
373
+ buffer = (buffer + " " + stripped).strip() if buffer else stripped
374
+ if buffer.endswith("."):
375
+ joined_lines.append(buffer.rstrip("."))
376
+ buffer = ""
377
+ if buffer:
378
+ joined_lines.append(buffer)
379
+
380
+ ost_lines: list[str] = [
381
+ f"# OpenStat script translated from SPSS: {sps_path}",
382
+ "#",
383
+ ]
384
+ untranslated_count = 0
385
+ translated_count = 0
386
+
387
+ for line in joined_lines:
388
+ if not line.strip():
389
+ ost_lines.append("")
390
+ continue
391
+ translated = _translate_spss_line(line)
392
+ ost_lines.append(translated)
393
+ if "[untranslated]" in translated:
394
+ untranslated_count += 1
395
+ else:
396
+ translated_count += 1
397
+
398
+ ost_text = "\n".join(ost_lines)
399
+
400
+ if out_path:
401
+ Path(out_path).parent.mkdir(parents=True, exist_ok=True)
402
+ Path(out_path).write_text(ost_text, encoding="utf-8")
403
+ result_msg = (
404
+ f"Translated '{sps_path}' -> '{out_path}'\n"
405
+ f" Translated: {translated_count} lines, "
406
+ f"untranslated (kept as comments): {untranslated_count} lines."
407
+ )
408
+ else:
409
+ result_msg = (
410
+ f"# --- Translated output (not saved) ---\n"
411
+ f"{ost_text}\n"
412
+ f"# --- Translated: {translated_count}, untranslated: {untranslated_count} ---"
413
+ )
414
+
415
+ if do_run and out_path:
416
+ try:
417
+ from openstat.script_runner import run_script
418
+ run_script(session, Path(out_path))
419
+ result_msg += "\nScript executed."
420
+ except Exception as exc:
421
+ result_msg += f"\nScript execution failed: {exc}"
422
+
423
+ return result_msg
424
+
425
+
426
+ # ---------------------------------------------------------------------------
427
+ # webhook
428
+ # ---------------------------------------------------------------------------
429
+
430
+ def _extract_json_path(data: object, dotpath: str) -> object:
431
+ """Traverse a dotted path like 'data.records' through nested dicts/lists."""
432
+ parts = dotpath.strip().split(".")
433
+ current = data
434
+ for part in parts:
435
+ if isinstance(current, dict):
436
+ current = current.get(part)
437
+ elif isinstance(current, list) and part.isdigit():
438
+ current = current[int(part)]
439
+ else:
440
+ return None
441
+ if current is None:
442
+ return None
443
+ return current
444
+
445
+
446
+ @command("webhook", usage="webhook <url> [--method=GET|POST] [--params=key:val,key:val]")
447
+ def cmd_webhook(session: Session, args: str) -> str:
448
+ """Fetch data from a REST API endpoint and load as a DataFrame.
449
+
450
+ Options:
451
+ --method=GET|POST HTTP method (default GET).
452
+ --params=key:val,key:val Request parameters (GET query string or POST body).
453
+ --token=<bearer_token> Authorization: Bearer <token> header.
454
+ --json_path=<dotpath> Navigate into nested JSON (e.g. data.records).
455
+
456
+ The response must be JSON. Arrays of objects are loaded directly;
457
+ nested structures are extracted with --json_path.
458
+
459
+ Example: webhook https://api.example.com/records
460
+ Example: webhook https://api.example.com/search --method=POST --params=q:hello,limit:100
461
+ Example: webhook https://api.example.com/v2/items --token=abc123 --json_path=data.items
462
+ """
463
+ import polars as pl
464
+
465
+ ca = CommandArgs(args)
466
+ if not ca.positional:
467
+ return "Usage: webhook <url> [--method=GET|POST] [--params=key:val,key:val]"
468
+
469
+ url = ca.positional[0]
470
+ method = ca.options.get("method", "GET").upper()
471
+ token = ca.options.get("token")
472
+ json_path = ca.options.get("json_path")
473
+ params_raw = ca.options.get("params", "")
474
+
475
+ if method not in ("GET", "POST"):
476
+ return f"Unsupported method '{method}'. Use GET or POST."
477
+
478
+ # Parse params: key:val,key:val
479
+ params: dict[str, str] = {}
480
+ if params_raw:
481
+ for pair in params_raw.split(","):
482
+ pair = pair.strip()
483
+ if ":" not in pair:
484
+ return f"Invalid param '{pair}'. Use key:val format."
485
+ k, v = pair.split(":", 1)
486
+ params[k.strip()] = v.strip()
487
+
488
+ # Build headers
489
+ headers: dict[str, str] = {"Accept": "application/json"}
490
+ if token:
491
+ headers["Authorization"] = f"Bearer {token}"
492
+
493
+ try:
494
+ try:
495
+ import requests
496
+ if method == "GET":
497
+ resp = requests.get(url, params=params or None, headers=headers, timeout=60)
498
+ else:
499
+ resp = requests.post(url, json=params or None, headers=headers, timeout=60)
500
+ resp.raise_for_status()
501
+ payload = resp.json()
502
+ except ImportError:
503
+ # Fall back to urllib for GET without params easily serialisable
504
+ import urllib.request as _urllib
505
+ import urllib.parse as _urlparse
506
+ import json as _json
507
+
508
+ full_url = url
509
+ if method == "GET" and params:
510
+ full_url = url + "?" + _urlparse.urlencode(params)
511
+
512
+ req = _urllib.Request(full_url, headers=headers) # noqa: S310
513
+ if method == "POST" and params:
514
+ import json as _json2
515
+ body = _json2.dumps(params).encode()
516
+ req = _urllib.Request(full_url, data=body, headers={**headers, "Content-Type": "application/json"}) # noqa: S310
517
+
518
+ with _urllib.urlopen(req, timeout=60) as resp: # noqa: S310
519
+ payload = _json.loads(resp.read().decode("utf-8"))
520
+
521
+ except Exception as exc:
522
+ return f"Request to '{url}' failed: {exc}"
523
+
524
+ # Navigate to nested path if requested
525
+ if json_path:
526
+ payload = _extract_json_path(payload, json_path)
527
+ if payload is None:
528
+ return f"json_path '{json_path}' not found in response."
529
+
530
+ # Convert to DataFrame
531
+ try:
532
+ if isinstance(payload, list):
533
+ if not payload:
534
+ return "API returned an empty array."
535
+ df = pl.DataFrame(payload)
536
+ elif isinstance(payload, dict):
537
+ # Try to find an array field automatically
538
+ array_fields = [k for k, v in payload.items() if isinstance(v, list)]
539
+ if not array_fields:
540
+ # Treat the dict as a single-row DataFrame
541
+ df = pl.DataFrame([payload])
542
+ elif len(array_fields) == 1:
543
+ df = pl.DataFrame(payload[array_fields[0]])
544
+ else:
545
+ # Multiple arrays — ask user to specify
546
+ return (
547
+ f"Response contains multiple array fields: {', '.join(array_fields)}.\n"
548
+ f"Use --json_path=<field> to select one. Example: --json_path={array_fields[0]}"
549
+ )
550
+ else:
551
+ return f"Unexpected JSON type: {type(payload).__name__}. Expected list or object."
552
+ except Exception as exc:
553
+ return friendly_error(exc, "webhook")
554
+
555
+ session.snapshot()
556
+ session.df = df
557
+ session.dataset_path = url
558
+ session.dataset_name = url.split("/")[-1].split("?")[0] or "webhook"
559
+ session._undo_stack.clear()
560
+ r, c = df.shape
561
+ return f"Loaded {r:,} rows x {c} columns from {url} [{method}]."
@@ -0,0 +1,134 @@
1
+ """Influence and diagnostics commands: dfbeta, leverage, cooksd, outlier, avplot, coefplot."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ from openstat.commands.base import command
8
+ from openstat.session import Session
9
+
10
+
11
+ def _stata_opts(raw: str) -> tuple[list[str], dict[str, str]]:
12
+ opts: dict[str, str] = {}
13
+ for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
14
+ opts[m.group(1).lower()] = m.group(2)
15
+ rest = re.sub(r'\w+\([^)]*\)', '', raw)
16
+ positional = [t.strip(',') for t in rest.split() if t.strip(',')]
17
+ return positional, opts
18
+
19
+
20
+ def _get_last_ols(session: Session):
21
+ m = session._last_model
22
+ if m is None:
23
+ return None, None, None
24
+ if hasattr(m, "model") and hasattr(m, "dep"):
25
+ return m, m.dep, m.indeps
26
+ if isinstance(m, dict) and "dep" in m and "indeps" in m:
27
+ return m, m["dep"], m["indeps"]
28
+ return None, None, None
29
+
30
+
31
+ @command("dfbeta", usage="dfbeta [dep indeps]")
32
+ def cmd_dfbeta(session: Session, args: str) -> str:
33
+ """Compute DFBETAs for last OLS or specified variables."""
34
+ from openstat.stats.influence import compute_influence
35
+ df = session.require_data()
36
+ positional, opts = _stata_opts(args)
37
+ if len(positional) >= 2:
38
+ dep, indeps = positional[0], positional[1:]
39
+ else:
40
+ _, dep, indeps = _get_last_ols(session)
41
+ if dep is None:
42
+ return "Specify dep and indeps, or fit a model first."
43
+ indeps = [c for c in indeps if c in df.columns]
44
+ if not indeps or dep not in df.columns:
45
+ return "Invalid variables."
46
+ try:
47
+ r = compute_influence(df, dep, indeps)
48
+ lines = ["\nDFBETA Statistics", "-" * 50]
49
+ for name, vals in r["dfbetas"].items():
50
+ import numpy as np
51
+ arr = np.array(vals)
52
+ lines.append(f" {name:<20} max|DFBETA| = {np.abs(arr).max():.4f}")
53
+ lines.append(f"\n Threshold (2/sqrt(n)): {2/r['n_obs']**0.5:.4f}")
54
+ return "\n".join(lines)
55
+ except Exception as exc:
56
+ return f"dfbeta error: {exc}"
57
+
58
+
59
+ @command("leverage", usage="leverage [dep indeps]")
60
+ def cmd_leverage(session: Session, args: str) -> str:
61
+ """Show leverage statistics for OLS regression."""
62
+ from openstat.stats.influence import compute_influence
63
+ df = session.require_data()
64
+ positional, opts = _stata_opts(args)
65
+ if len(positional) >= 2:
66
+ dep, indeps = positional[0], positional[1:]
67
+ else:
68
+ _, dep, indeps = _get_last_ols(session)
69
+ if dep is None:
70
+ return "Specify dep and indeps, or fit a model first."
71
+ indeps = [c for c in indeps if c in df.columns]
72
+ try:
73
+ r = compute_influence(df, dep, indeps)
74
+ lines = ["\nLeverage Statistics", "-" * 50]
75
+ lines.append(f" {'High leverage threshold':<35} {r['high_leverage_threshold']:.4f}")
76
+ lines.append(f" {'Observations with high leverage':<35} {r['n_high_leverage']}")
77
+ import numpy as np
78
+ lev = np.array(r["leverage"])
79
+ lines.append(f" {'Mean leverage':<35} {lev.mean():.4f}")
80
+ lines.append(f" {'Max leverage':<35} {lev.max():.4f}")
81
+ return "\n".join(lines)
82
+ except Exception as exc:
83
+ return f"leverage error: {exc}"
84
+
85
+
86
+ @command("cooksd", usage="cooksd [dep indeps]")
87
+ def cmd_cooksd(session: Session, args: str) -> str:
88
+ """Compute Cook's distance for influence detection."""
89
+ from openstat.stats.influence import compute_influence
90
+ df = session.require_data()
91
+ positional, opts = _stata_opts(args)
92
+ if len(positional) >= 2:
93
+ dep, indeps = positional[0], positional[1:]
94
+ else:
95
+ _, dep, indeps = _get_last_ols(session)
96
+ if dep is None:
97
+ return "Specify dep and indeps, or fit a model first."
98
+ indeps = [c for c in indeps if c in df.columns]
99
+ try:
100
+ r = compute_influence(df, dep, indeps)
101
+ lines = ["\nCook's Distance", "-" * 50]
102
+ lines.append(f" {'Threshold (4/n)':<35} {r['high_cooks_threshold']:.4f}")
103
+ lines.append(f" {'Influential observations':<35} {r['n_high_cooks']}")
104
+ import numpy as np
105
+ cd = np.array(r["cooks_d"])
106
+ lines.append(f" {'Max Cook''s D':<35} {cd.max():.4f}")
107
+ if r["n_high_cooks"] > 0:
108
+ top = np.argsort(cd)[::-1][:5]
109
+ lines.append(f" Top influential obs (index): {top.tolist()}")
110
+ return "\n".join(lines)
111
+ except Exception as exc:
112
+ return f"cooksd error: {exc}"
113
+
114
+
115
+ @command("outlier", usage="outlier dep indeps [threshold(3.0)]")
116
+ def cmd_outlier(session: Session, args: str) -> str:
117
+ """Detect outliers by studentized residuals."""
118
+ from openstat.stats.influence import detect_outliers
119
+ df = session.require_data()
120
+ positional, opts = _stata_opts(args)
121
+ if len(positional) < 2:
122
+ return "Usage: outlier dep indeps [threshold(3.0)]"
123
+ dep = positional[0]
124
+ indeps = [c for c in positional[1:] if c in df.columns]
125
+ threshold = float(opts.get("threshold", 3.0))
126
+ try:
127
+ r = detect_outliers(df, dep, indeps, threshold=threshold)
128
+ lines = [f"\nOutlier Detection (|studentized resid| > {threshold})", "-" * 50]
129
+ lines.append(f" Outliers found: {r['n_outliers']}")
130
+ if r["outlier_indices"]:
131
+ lines.append(f" Outlier indices: {r['outlier_indices'][:20]}")
132
+ return "\n".join(lines)
133
+ except Exception as exc:
134
+ return f"outlier error: {exc}"