openstat-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. openstat/__init__.py +3 -0
  2. openstat/__main__.py +4 -0
  3. openstat/backends/__init__.py +16 -0
  4. openstat/backends/duckdb_backend.py +70 -0
  5. openstat/backends/polars_backend.py +52 -0
  6. openstat/cli.py +92 -0
  7. openstat/commands/__init__.py +82 -0
  8. openstat/commands/adv_stat_cmds.py +1255 -0
  9. openstat/commands/advanced_ml_cmds.py +576 -0
  10. openstat/commands/advreg_cmds.py +207 -0
  11. openstat/commands/alias_cmds.py +135 -0
  12. openstat/commands/arch_cmds.py +82 -0
  13. openstat/commands/arules_cmds.py +111 -0
  14. openstat/commands/automodel_cmds.py +212 -0
  15. openstat/commands/backend_cmds.py +82 -0
  16. openstat/commands/base.py +170 -0
  17. openstat/commands/bayes_cmds.py +71 -0
  18. openstat/commands/causal_cmds.py +269 -0
  19. openstat/commands/cluster_cmds.py +152 -0
  20. openstat/commands/data_cmds.py +996 -0
  21. openstat/commands/datamanip_cmds.py +672 -0
  22. openstat/commands/dataquality_cmds.py +174 -0
  23. openstat/commands/datetime_cmds.py +176 -0
  24. openstat/commands/dimreduce_cmds.py +184 -0
  25. openstat/commands/discrete_cmds.py +149 -0
  26. openstat/commands/dsl_cmds.py +143 -0
  27. openstat/commands/epi_cmds.py +93 -0
  28. openstat/commands/equiv_tobit_cmds.py +94 -0
  29. openstat/commands/esttab_cmds.py +196 -0
  30. openstat/commands/export_beamer_cmds.py +142 -0
  31. openstat/commands/export_cmds.py +201 -0
  32. openstat/commands/export_extra_cmds.py +240 -0
  33. openstat/commands/factor_cmds.py +180 -0
  34. openstat/commands/groupby_cmds.py +155 -0
  35. openstat/commands/help_cmds.py +237 -0
  36. openstat/commands/i18n_cmds.py +43 -0
  37. openstat/commands/import_extra_cmds.py +561 -0
  38. openstat/commands/influence_cmds.py +134 -0
  39. openstat/commands/iv_cmds.py +106 -0
  40. openstat/commands/manova_cmds.py +105 -0
  41. openstat/commands/mediate_cmds.py +233 -0
  42. openstat/commands/meta_cmds.py +284 -0
  43. openstat/commands/mi_cmds.py +228 -0
  44. openstat/commands/mixed_cmds.py +79 -0
  45. openstat/commands/mixture_changepoint_cmds.py +166 -0
  46. openstat/commands/ml_adv_cmds.py +147 -0
  47. openstat/commands/ml_cmds.py +178 -0
  48. openstat/commands/model_eval_cmds.py +142 -0
  49. openstat/commands/network_cmds.py +288 -0
  50. openstat/commands/nlquery_cmds.py +161 -0
  51. openstat/commands/nonparam_cmds.py +149 -0
  52. openstat/commands/outreg_cmds.py +247 -0
  53. openstat/commands/panel_cmds.py +141 -0
  54. openstat/commands/pdf_cmds.py +226 -0
  55. openstat/commands/pipeline_cmds.py +319 -0
  56. openstat/commands/plot_cmds.py +189 -0
  57. openstat/commands/plugin_cmds.py +79 -0
  58. openstat/commands/posthoc_cmds.py +153 -0
  59. openstat/commands/power_cmds.py +172 -0
  60. openstat/commands/profile_cmds.py +246 -0
  61. openstat/commands/rbridge_cmds.py +81 -0
  62. openstat/commands/regex_cmds.py +104 -0
  63. openstat/commands/report_cmds.py +48 -0
  64. openstat/commands/repro_cmds.py +129 -0
  65. openstat/commands/resampling_cmds.py +109 -0
  66. openstat/commands/reshape_cmds.py +223 -0
  67. openstat/commands/sem_cmds.py +177 -0
  68. openstat/commands/stat_cmds.py +1040 -0
  69. openstat/commands/stata_import_cmds.py +215 -0
  70. openstat/commands/string_cmds.py +124 -0
  71. openstat/commands/surv_cmds.py +145 -0
  72. openstat/commands/survey_cmds.py +153 -0
  73. openstat/commands/textanalysis_cmds.py +192 -0
  74. openstat/commands/ts_adv_cmds.py +136 -0
  75. openstat/commands/ts_cmds.py +195 -0
  76. openstat/commands/tui_cmds.py +111 -0
  77. openstat/commands/ux_cmds.py +191 -0
  78. openstat/commands/validate_cmds.py +270 -0
  79. openstat/commands/viz_adv_cmds.py +312 -0
  80. openstat/commands/viz_extra_cmds.py +251 -0
  81. openstat/commands/watch_cmds.py +69 -0
  82. openstat/config.py +106 -0
  83. openstat/dsl/__init__.py +0 -0
  84. openstat/dsl/parser.py +332 -0
  85. openstat/dsl/tokenizer.py +105 -0
  86. openstat/i18n.py +120 -0
  87. openstat/io/__init__.py +0 -0
  88. openstat/io/loader.py +187 -0
  89. openstat/jupyter/__init__.py +18 -0
  90. openstat/jupyter/display.py +18 -0
  91. openstat/jupyter/magic.py +60 -0
  92. openstat/logging_config.py +59 -0
  93. openstat/plots/__init__.py +0 -0
  94. openstat/plots/plotter.py +437 -0
  95. openstat/plots/surv_plots.py +32 -0
  96. openstat/plots/ts_plots.py +59 -0
  97. openstat/plugins/__init__.py +5 -0
  98. openstat/plugins/manager.py +69 -0
  99. openstat/repl.py +457 -0
  100. openstat/reporting/__init__.py +0 -0
  101. openstat/reporting/eda.py +208 -0
  102. openstat/reporting/report.py +67 -0
  103. openstat/script_runner.py +319 -0
  104. openstat/session.py +133 -0
  105. openstat/stats/__init__.py +0 -0
  106. openstat/stats/advanced_regression.py +269 -0
  107. openstat/stats/arch_garch.py +84 -0
  108. openstat/stats/bayesian.py +103 -0
  109. openstat/stats/causal.py +258 -0
  110. openstat/stats/clustering.py +206 -0
  111. openstat/stats/discrete.py +311 -0
  112. openstat/stats/epidemiology.py +119 -0
  113. openstat/stats/equiv_tobit.py +163 -0
  114. openstat/stats/factor.py +174 -0
  115. openstat/stats/imputation.py +282 -0
  116. openstat/stats/influence.py +78 -0
  117. openstat/stats/iv.py +131 -0
  118. openstat/stats/manova.py +124 -0
  119. openstat/stats/mixed.py +128 -0
  120. openstat/stats/ml.py +275 -0
  121. openstat/stats/ml_advanced.py +117 -0
  122. openstat/stats/model_eval.py +183 -0
  123. openstat/stats/models.py +1342 -0
  124. openstat/stats/nonparametric.py +130 -0
  125. openstat/stats/panel.py +179 -0
  126. openstat/stats/power.py +295 -0
  127. openstat/stats/resampling.py +203 -0
  128. openstat/stats/survey.py +213 -0
  129. openstat/stats/survival.py +196 -0
  130. openstat/stats/timeseries.py +142 -0
  131. openstat/stats/ts_advanced.py +114 -0
  132. openstat/types.py +11 -0
  133. openstat/web/__init__.py +1 -0
  134. openstat/web/app.py +117 -0
  135. openstat/web/session_manager.py +73 -0
  136. openstat/web/static/app.js +117 -0
  137. openstat/web/static/index.html +38 -0
  138. openstat/web/static/style.css +103 -0
  139. openstat_cli-1.0.0.dist-info/METADATA +748 -0
  140. openstat_cli-1.0.0.dist-info/RECORD +143 -0
  141. openstat_cli-1.0.0.dist-info/WHEEL +4 -0
  142. openstat_cli-1.0.0.dist-info/entry_points.txt +2 -0
  143. openstat_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,288 @@
1
+ """Network analysis commands: network descriptives, centrality, community detection."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from openstat.commands.base import command
6
+ from openstat.session import Session
7
+
8
+
9
+ def _require_nx():
10
+ try:
11
+ import networkx as nx
12
+ return nx
13
+ except ImportError:
14
+ raise ImportError("Network analysis requires networkx. Install with: pip install networkx")
15
+
16
+
17
+ @command("network", usage="network <subcommand> ...")
18
+ def cmd_network(session: Session, args: str) -> str:
19
+ """Network analysis using NetworkX.
20
+
21
+ Subcommands:
22
+ network build from <source_col> to <target_col> [weight=<col>]
23
+ network describe
24
+ network centrality [--degree|--betweenness|--closeness|--eigenvector]
25
+ network community [--louvain|--greedy]
26
+ network plot [--layout=spring|circular|kamada]
27
+
28
+ Examples:
29
+ network build from sender to receiver
30
+ network build from from_node to to_node weight=strength
31
+ network describe
32
+ network centrality --degree
33
+ network community --greedy
34
+ network plot
35
+ """
36
+ from openstat.commands.base import CommandArgs
37
+ ca = CommandArgs(args)
38
+ if not ca.positional:
39
+ return cmd_network.__doc__ or "Usage: network <subcommand>"
40
+
41
+ subcmd = ca.positional[0].lower()
42
+
43
+ if subcmd == "build":
44
+ return _build_network(session, args)
45
+ elif subcmd == "describe":
46
+ return _describe_network(session)
47
+ elif subcmd == "centrality":
48
+ return _centrality(session, args)
49
+ elif subcmd == "community":
50
+ return _community(session, args)
51
+ elif subcmd == "plot":
52
+ return _plot_network(session, args)
53
+ else:
54
+ return (
55
+ f"Unknown subcommand: {subcmd}\n"
56
+ "Available: build, describe, centrality, community, plot"
57
+ )
58
+
59
+
60
+ def _build_network(session: Session, args: str) -> str:
61
+ nx = _require_nx()
62
+ import re
63
+
64
+ df = session.require_data()
65
+
66
+ m_from = re.search(r"from\s+(\w+)", args)
67
+ m_to = re.search(r"to\s+(\w+)", args)
68
+ if not m_from or not m_to:
69
+ return "Usage: network build from <source_col> to <target_col> [weight=<col>]"
70
+
71
+ src_col = m_from.group(1)
72
+ tgt_col = m_to.group(1)
73
+
74
+ m_w = re.search(r"weight[= ](\w+)", args)
75
+ weight_col = m_w.group(1) if m_w else None
76
+
77
+ for c in [src_col, tgt_col] + ([weight_col] if weight_col else []):
78
+ if c not in df.columns:
79
+ return f"Column not found: {c}"
80
+
81
+ sub = df.select([c for c in [src_col, tgt_col, weight_col] if c]).drop_nulls()
82
+
83
+ G = nx.DiGraph() if "--directed" in args else nx.Graph()
84
+ for row in sub.iter_rows():
85
+ src, tgt = str(row[0]), str(row[1])
86
+ w = float(row[2]) if weight_col else 1.0
87
+ if G.has_edge(src, tgt):
88
+ G[src][tgt]["weight"] += w
89
+ else:
90
+ G.add_edge(src, tgt, weight=w)
91
+
92
+ session._network = G
93
+ session._network_weight_col = weight_col
94
+
95
+ directed_str = "directed" if G.is_directed() else "undirected"
96
+ return (
97
+ f"Network built: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges ({directed_str})\n"
98
+ f"Source: '{src_col}' → Target: '{tgt_col}'"
99
+ + (f" Weight: '{weight_col}'" if weight_col else "")
100
+ + "\nUse 'network describe', 'network centrality', 'network community', 'network plot'"
101
+ )
102
+
103
+
104
+ def _describe_network(session: Session) -> str:
105
+ nx = _require_nx()
106
+ G = getattr(session, "_network", None)
107
+ if G is None:
108
+ return "No network built. Use 'network build from <src> to <tgt>' first."
109
+
110
+ n = G.number_of_nodes()
111
+ e = G.number_of_edges()
112
+ density = nx.density(G)
113
+ is_connected = nx.is_connected(G.to_undirected()) if G.is_directed() else nx.is_connected(G)
114
+
115
+ lines = [
116
+ f"Nodes: {n}",
117
+ f"Edges: {e}",
118
+ f"Density: {density:.4f}",
119
+ f"Connected: {is_connected}",
120
+ ]
121
+
122
+ if not G.is_directed():
123
+ if n > 0 and e > 0:
124
+ try:
125
+ avg_clust = nx.average_clustering(G)
126
+ lines.append(f"Avg Clustering: {avg_clust:.4f}")
127
+ except Exception:
128
+ pass
129
+ try:
130
+ if nx.is_connected(G):
131
+ avg_path = nx.average_shortest_path_length(G)
132
+ lines.append(f"Avg Path Length: {avg_path:.4f}")
133
+ diam = nx.diameter(G)
134
+ lines.append(f"Diameter: {diam}")
135
+ except Exception:
136
+ pass
137
+
138
+ # Degree distribution summary
139
+ import numpy as np
140
+ degrees = [d for _, d in G.degree()]
141
+ if degrees:
142
+ lines += [
143
+ "",
144
+ f"Degree — Min: {min(degrees)} Mean: {np.mean(degrees):.2f} Max: {max(degrees)}",
145
+ ]
146
+
147
+ # Top 5 nodes by degree
148
+ top = sorted(G.degree(), key=lambda x: x[1], reverse=True)[:5]
149
+ if top:
150
+ lines.append("")
151
+ lines.append("Top nodes by degree:")
152
+ for node, deg in top:
153
+ lines.append(f" {str(node):<20} degree = {deg}")
154
+
155
+ return "\n" + "=" * 50 + "\nNetwork Descriptives\n" + "=" * 50 + "\n" + "\n".join(lines) + "\n" + "=" * 50
156
+
157
+
158
+ def _centrality(session: Session, args: str) -> str:
159
+ nx = _require_nx()
160
+ G = getattr(session, "_network", None)
161
+ if G is None:
162
+ return "No network built. Use 'network build' first."
163
+
164
+ if "--betweenness" in args:
165
+ label, scores = "Betweenness Centrality", nx.betweenness_centrality(G)
166
+ elif "--closeness" in args:
167
+ label, scores = "Closeness Centrality", nx.closeness_centrality(G)
168
+ elif "--eigenvector" in args:
169
+ try:
170
+ label, scores = "Eigenvector Centrality", nx.eigenvector_centrality(G, max_iter=1000)
171
+ except nx.PowerIterationFailedConvergence:
172
+ label, scores = "Degree Centrality (fallback)", nx.degree_centrality(G)
173
+ else:
174
+ label, scores = "Degree Centrality", nx.degree_centrality(G)
175
+
176
+ top_n = 20
177
+ sorted_nodes = sorted(scores.items(), key=lambda x: x[1], reverse=True)[:top_n]
178
+
179
+ lines = [f"Top {min(top_n, len(sorted_nodes))} nodes by {label}:"]
180
+ lines.append(f" {'Node':<25} {'Score':>10}")
181
+ lines.append(" " + "-" * 37)
182
+ for node, score in sorted_nodes:
183
+ lines.append(f" {str(node):<25} {score:>10.4f}")
184
+
185
+ return "\n" + "=" * 50 + f"\n{label}\n" + "=" * 50 + "\n" + "\n".join(lines) + "\n" + "=" * 50
186
+
187
+
188
+ def _community(session: Session, args: str) -> str:
189
+ nx = _require_nx()
190
+ G = getattr(session, "_network", None)
191
+ if G is None:
192
+ return "No network built. Use 'network build' first."
193
+
194
+ G_undir = G.to_undirected() if G.is_directed() else G
195
+
196
+ try:
197
+ if "--louvain" in args:
198
+ try:
199
+ import community as community_louvain
200
+ partition = community_louvain.best_partition(G_undir)
201
+ communities = {}
202
+ for node, comm_id in partition.items():
203
+ communities.setdefault(comm_id, []).append(node)
204
+ method = "Louvain"
205
+ except ImportError:
206
+ return "Louvain requires python-louvain. Install with: pip install python-louvain"
207
+ else:
208
+ # Greedy modularity (built into networkx)
209
+ from networkx.algorithms.community import greedy_modularity_communities
210
+ comms = list(greedy_modularity_communities(G_undir))
211
+ communities = {i: list(c) for i, c in enumerate(comms)}
212
+ method = "Greedy Modularity"
213
+
214
+ n_comm = len(communities)
215
+ modularity = None
216
+ try:
217
+ from networkx.algorithms.community.quality import modularity as nx_mod
218
+ modularity = nx_mod(G_undir, [set(v) for v in communities.values()])
219
+ except Exception:
220
+ pass
221
+
222
+ lines = [
223
+ f"Method: {method}",
224
+ f"Communities found: {n_comm}",
225
+ ]
226
+ if modularity is not None:
227
+ lines.append(f"Modularity: {modularity:.4f}")
228
+ lines.append("")
229
+
230
+ for cid, members in sorted(communities.items(), key=lambda x: -len(x[1])):
231
+ sample = ", ".join(str(m) for m in members[:5])
232
+ if len(members) > 5:
233
+ sample += f", ... (+{len(members)-5})"
234
+ lines.append(f" Community {cid+1:>3}: {len(members):>4} nodes [{sample}]")
235
+
236
+ return "\n" + "=" * 55 + f"\nCommunity Detection ({method})\n" + "=" * 55 + "\n" + "\n".join(lines) + "\n" + "=" * 55
237
+
238
+ except Exception as exc:
239
+ return f"community detection error: {exc}"
240
+
241
+
242
+ def _plot_network(session: Session, args: str) -> str:
243
+ nx = _require_nx()
244
+ G = getattr(session, "_network", None)
245
+ if G is None:
246
+ return "No network built. Use 'network build' first."
247
+
248
+ import matplotlib
249
+ matplotlib.use("Agg")
250
+ import matplotlib.pyplot as plt
251
+ from openstat.plots.plotter import _unique_path
252
+
253
+ # Layout
254
+ if "--circular" in args:
255
+ pos = nx.circular_layout(G)
256
+ layout_name = "circular"
257
+ elif "--kamada" in args:
258
+ pos = nx.kamada_kawai_layout(G)
259
+ layout_name = "kamada-kawai"
260
+ else:
261
+ pos = nx.spring_layout(G, seed=42)
262
+ layout_name = "spring"
263
+
264
+ n = G.number_of_nodes()
265
+ fig_size = min(max(6, n * 0.3), 16)
266
+ fig, ax = plt.subplots(figsize=(fig_size, fig_size))
267
+
268
+ node_size = max(50, min(500, 2000 // (n + 1)))
269
+ nx.draw_networkx(
270
+ G, pos=pos, ax=ax,
271
+ node_size=node_size,
272
+ node_color="#4C72B0",
273
+ edge_color="#AAAAAA",
274
+ font_size=max(6, min(10, 120 // (n + 1))),
275
+ arrows=G.is_directed(),
276
+ width=0.8,
277
+ alpha=0.9,
278
+ )
279
+ ax.set_title(f"Network ({n} nodes, {G.number_of_edges()} edges) — {layout_name} layout")
280
+ ax.axis("off")
281
+ fig.tight_layout()
282
+
283
+ session.output_dir.mkdir(parents=True, exist_ok=True)
284
+ path = _unique_path(session.output_dir, "network_plot")
285
+ fig.savefig(path, dpi=150)
286
+ plt.close(fig)
287
+ session.plot_paths.append(str(path))
288
+ return f"Network plot saved: {path}"
@@ -0,0 +1,161 @@
1
+ """Natural language query: 'ask' command using OpenAI or Anthropic API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from openstat.commands.base import command, CommandArgs, friendly_error
6
+ from openstat.session import Session
7
+
8
+
9
+ def _build_context(session: Session) -> str:
10
+ """Build a compact dataset context string for the LLM."""
11
+ import polars as pl
12
+
13
+ if session.df is None:
14
+ return "No dataset loaded."
15
+
16
+ df = session.df
17
+ NUMERIC = (pl.Float32, pl.Float64, pl.Int8, pl.Int16, pl.Int32, pl.Int64,
18
+ pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64)
19
+
20
+ col_info = []
21
+ for c in df.columns[:30]: # cap at 30 cols
22
+ dtype = str(df[c].dtype)
23
+ n_miss = df[c].null_count()
24
+ if df[c].dtype in NUMERIC:
25
+ col_data = df[c].drop_nulls()
26
+ if col_data.len() > 0:
27
+ extra = f"mean={col_data.mean():.2f}, sd={col_data.std():.2f}"
28
+ else:
29
+ extra = "all null"
30
+ else:
31
+ n_uniq = df[c].drop_nulls().n_unique()
32
+ extra = f"{n_uniq} unique values"
33
+ miss_str = f", {n_miss} missing" if n_miss else ""
34
+ col_info.append(f" {c} ({dtype}{miss_str}): {extra}")
35
+
36
+ lines = [
37
+ f"Dataset: {session.dataset_name or 'unknown'}",
38
+ f"Shape: {df.height} rows × {df.width} columns",
39
+ "Columns:",
40
+ ] + col_info
41
+
42
+ if session.results:
43
+ lines.append("\nLast model: " + session.results[-1].name +
44
+ " — " + session.results[-1].formula)
45
+
46
+ return "\n".join(lines)
47
+
48
+
49
+ def _ask_openai(question: str, context: str, model: str) -> str:
50
+ try:
51
+ from openai import OpenAI
52
+ except ImportError:
53
+ return None # type: ignore
54
+
55
+ client = OpenAI() # uses OPENAI_API_KEY env var
56
+ resp = client.chat.completions.create(
57
+ model=model or "gpt-4o-mini",
58
+ messages=[
59
+ {
60
+ "role": "system",
61
+ "content": (
62
+ "You are a helpful statistical analysis assistant for OpenStat, "
63
+ "a Python-based data analysis REPL similar to Stata. "
64
+ "Answer questions about the dataset concisely. "
65
+ "When relevant, suggest the exact OpenStat command to use."
66
+ ),
67
+ },
68
+ {
69
+ "role": "user",
70
+ "content": f"Dataset context:\n{context}\n\nQuestion: {question}",
71
+ },
72
+ ],
73
+ max_tokens=500,
74
+ )
75
+ return resp.choices[0].message.content.strip()
76
+
77
+
78
+ def _ask_anthropic(question: str, context: str, model: str) -> str:
79
+ try:
80
+ import anthropic
81
+ except ImportError:
82
+ return None # type: ignore
83
+
84
+ client = anthropic.Anthropic() # uses ANTHROPIC_API_KEY env var
85
+ resp = client.messages.create(
86
+ model=model or "claude-haiku-4-5-20251001",
87
+ max_tokens=500,
88
+ system=(
89
+ "You are a helpful statistical analysis assistant for OpenStat, "
90
+ "a Python-based data analysis REPL similar to Stata. "
91
+ "Answer questions about the dataset concisely. "
92
+ "When relevant, suggest the exact OpenStat command to use."
93
+ ),
94
+ messages=[
95
+ {
96
+ "role": "user",
97
+ "content": f"Dataset context:\n{context}\n\nQuestion: {question}",
98
+ }
99
+ ],
100
+ )
101
+ return resp.content[0].text.strip()
102
+
103
+
104
+ @command("ask", usage='ask "<natural language question>"')
105
+ def cmd_ask(session: Session, args: str) -> str:
106
+ """Ask a natural language question about your dataset (requires AI API key).
107
+
108
+ Uses OpenAI (OPENAI_API_KEY) or Anthropic (ANTHROPIC_API_KEY), whichever
109
+ is available. The assistant can suggest OpenStat commands, explain results,
110
+ and answer statistical questions.
111
+
112
+ Options:
113
+ --provider=openai|anthropic (default: auto-detect from env)
114
+ --model=<model-name> (default: gpt-4o-mini / claude-haiku-4-5)
115
+
116
+ Examples:
117
+ ask "What's the correlation between income and education?"
118
+ ask "Which variables have the most missing data?"
119
+ ask "What regression model should I use for this binary outcome?"
120
+ ask "Explain the OLS results" --provider=anthropic
121
+ """
122
+ import os
123
+
124
+ ca = CommandArgs(args)
125
+ # Question is the rest of args after stripping flags
126
+ question = ca.strip_flags_and_options().strip().strip('"\'')
127
+ if not question:
128
+ return 'Usage: ask "<question>" [--provider=openai|anthropic] [--model=<name>]'
129
+
130
+ provider = ca.options.get("provider", "").lower()
131
+ model = ca.options.get("model", "")
132
+
133
+ context = _build_context(session)
134
+
135
+ # Auto-detect provider
136
+ if not provider:
137
+ if os.getenv("ANTHROPIC_API_KEY"):
138
+ provider = "anthropic"
139
+ elif os.getenv("OPENAI_API_KEY"):
140
+ provider = "openai"
141
+ else:
142
+ return (
143
+ "No AI API key found.\n"
144
+ "Set OPENAI_API_KEY or ANTHROPIC_API_KEY environment variable.\n"
145
+ "Install: pip install openai OR pip install anthropic"
146
+ )
147
+
148
+ try:
149
+ if provider == "openai":
150
+ result = _ask_openai(question, context, model)
151
+ if result is None:
152
+ return "openai package not installed. Run: pip install openai"
153
+ elif provider == "anthropic":
154
+ result = _ask_anthropic(question, context, model)
155
+ if result is None:
156
+ return "anthropic package not installed. Run: pip install anthropic"
157
+ else:
158
+ return f"Unknown provider: {provider}. Use 'openai' or 'anthropic'."
159
+ return result
160
+ except Exception as e:
161
+ return friendly_error(e, "ask")
@@ -0,0 +1,149 @@
1
+ """Nonparametric test commands: ranksum, signrank, kwallis, spearman."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+ from openstat.commands.base import command
8
+ from openstat.session import Session
9
+ from openstat.stats.nonparametric import (
10
+ spearman_corr,
11
+ ranksum_test,
12
+ signrank_test,
13
+ kruskal_wallis_test,
14
+ )
15
+
16
+
17
+ def _stata_opts(raw: str) -> tuple[list[str], dict[str, str]]:
18
+ opts: dict[str, str] = {}
19
+ for m in re.finditer(r'(\w+)\(([^)]*)\)', raw):
20
+ opts[m.group(1).lower()] = m.group(2)
21
+ rest = re.sub(r'\w+\([^)]*\)', '', raw)
22
+ positional = [t.strip(',') for t in rest.split() if t.strip(',')]
23
+ return positional, opts
24
+
25
+
26
+ def _fmt(d: dict) -> str:
27
+ lines = [f"\n{d.get('test', 'Result')}", "-" * 55]
28
+ skip = {"test", "groups", "n_per_group", "_model"}
29
+ for k, v in d.items():
30
+ if k in skip:
31
+ continue
32
+ if isinstance(v, float):
33
+ lines.append(f" {k:<30} {v:.6f}")
34
+ elif isinstance(v, list):
35
+ lines.append(f" {k:<30} {v}")
36
+ else:
37
+ lines.append(f" {k:<30} {v}")
38
+ lines.append("-" * 55)
39
+ return "\n".join(lines)
40
+
41
+
42
+ @command("ranksum", usage="ranksum var by(groupvar) [--less|--greater]")
43
+ def cmd_ranksum(session: Session, args: str) -> str:
44
+ """Wilcoxon rank-sum (Mann-Whitney U) test for two independent groups."""
45
+ df = session.require_data()
46
+ positional, opts = _stata_opts(args)
47
+ if not positional:
48
+ return "Usage: ranksum var by(groupvar)"
49
+
50
+ var = positional[0]
51
+ by = opts.get("by")
52
+ if by is None:
53
+ return "Specify group variable: ranksum var by(groupvar)"
54
+
55
+ alt = "two-sided"
56
+ if "--less" in args:
57
+ alt = "less"
58
+ elif "--greater" in args:
59
+ alt = "greater"
60
+
61
+ try:
62
+ r = ranksum_test(df, var, by, alternative=alt)
63
+ return _fmt(r)
64
+ except Exception as exc:
65
+ return f"ranksum error: {exc}"
66
+
67
+
68
+ @command("signrank", usage="signrank var1 [var2] [mu(0)]")
69
+ def cmd_signrank(session: Session, args: str) -> str:
70
+ """Wilcoxon signed-rank test (one-sample or paired)."""
71
+ df = session.require_data()
72
+ positional, opts = _stata_opts(args)
73
+ if not positional:
74
+ return "Usage: signrank var1 [var2] [mu(0)]"
75
+
76
+ mu = float(opts.get("mu", 0.0))
77
+ var1 = positional[0]
78
+ var2 = positional[1] if len(positional) > 1 and positional[1] in df.columns else None
79
+
80
+ try:
81
+ r = signrank_test(df, var1, var2, mu=mu)
82
+ return _fmt(r)
83
+ except Exception as exc:
84
+ return f"signrank error: {exc}"
85
+
86
+
87
+ @command("kwallis", usage="kwallis var by(groupvar)")
88
+ def cmd_kwallis(session: Session, args: str) -> str:
89
+ """Kruskal-Wallis H test for k independent groups."""
90
+ df = session.require_data()
91
+ positional, opts = _stata_opts(args)
92
+ if not positional:
93
+ return "Usage: kwallis var by(groupvar)"
94
+
95
+ var = positional[0]
96
+ by = opts.get("by")
97
+ if by is None:
98
+ return "Specify group variable: kwallis var by(groupvar)"
99
+
100
+ try:
101
+ r = kruskal_wallis_test(df, var, by)
102
+ lines = [_fmt(r)]
103
+ lines.append("\nGroup counts:")
104
+ for g, n in zip(r["groups"], r["n_per_group"]):
105
+ lines.append(f" {g!s:<20} n = {n}")
106
+ return "\n".join(lines)
107
+ except Exception as exc:
108
+ return f"kwallis error: {exc}"
109
+
110
+
111
+ @command("spearman", usage="spearman var1 var2 [var3 ...]")
112
+ def cmd_spearman(session: Session, args: str) -> str:
113
+ """Spearman rank correlation matrix."""
114
+ df = session.require_data()
115
+ positional, opts = _stata_opts(args)
116
+ cols = [c for c in positional if c in df.columns]
117
+ if len(cols) < 2:
118
+ return "spearman requires at least 2 numeric variables."
119
+
120
+ try:
121
+ r = spearman_corr(df, cols)
122
+ except Exception as exc:
123
+ return f"spearman error: {exc}"
124
+
125
+ rho = r["rho"]
126
+ pvals = r["pvalues"]
127
+ k = len(cols)
128
+ w = max(len(c) for c in cols) + 2
129
+
130
+ lines = ["\nSpearman Rank Correlation", "=" * (w + k * 9 + 2)]
131
+ header = " " * w + "".join(f" {c[:7]:>7}" for c in cols)
132
+ lines.append(header)
133
+ lines.append("-" * (w + k * 9 + 2))
134
+ for i, ci in enumerate(cols):
135
+ row = f"{ci:<{w}}"
136
+ for j in range(k):
137
+ row += f" {rho[i][j]:>7.4f}"
138
+ lines.append(row)
139
+ lines.append("")
140
+ lines.append("P-values:")
141
+ for i, ci in enumerate(cols):
142
+ row = f"{ci:<{w}}"
143
+ for j in range(k):
144
+ if i == j:
145
+ row += f" {' .':>7}"
146
+ else:
147
+ row += f" {pvals[i][j]:>7.4f}"
148
+ lines.append(row)
149
+ return "\n".join(lines)