rustystats 0.1.5__cp313-cp313-manylinux_2_34_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rustystats/glm.py ADDED
@@ -0,0 +1,249 @@
1
+ """
2
+ GLM Summary Functions
3
+ =====================
4
+
5
+ This module provides summary formatting functions for GLM results.
6
+ These are used internally by the formula API.
7
+
8
+ Note: The array-based API (fit_glm, GLM class) has been removed.
9
+ Use the formula-based API instead:
10
+
11
+ >>> import rustystats as rs
12
+ >>> result = rs.glm("y ~ x1 + x2 + C(cat)", data, family="poisson").fit()
13
+ >>> print(result.summary())
14
+ """
15
+
16
+ import numpy as np
17
+ from typing import Optional, List
18
+
19
+ from rustystats._rustystats import GLMResults
20
+
21
+
22
+ def summary(
23
+ result: GLMResults,
24
+ feature_names: Optional[List[str]] = None,
25
+ title: str = "GLM Results",
26
+ alpha: float = 0.05,
27
+ ) -> str:
28
+ """
29
+ Generate a summary table for GLM results (statsmodels-style).
30
+
31
+ Parameters
32
+ ----------
33
+ result : GLMResults
34
+ Fitted GLM results object.
35
+
36
+ feature_names : list of str, optional
37
+ Names for each coefficient. If None, uses x0, x1, x2, ...
38
+
39
+ title : str, optional
40
+ Title for the summary table.
41
+
42
+ alpha : float, optional
43
+ Significance level for confidence intervals. Default 0.05 (95% CI).
44
+
45
+ Returns
46
+ -------
47
+ str
48
+ Formatted summary table.
49
+ """
50
+ n_params = len(result.params)
51
+
52
+ # Generate feature names if not provided
53
+ if feature_names is None:
54
+ feature_names = [f"x{i}" for i in range(n_params)]
55
+ elif len(feature_names) != n_params:
56
+ raise ValueError(
57
+ f"feature_names has {len(feature_names)} elements but model has {n_params} parameters"
58
+ )
59
+
60
+ # Get statistics
61
+ coefs = result.params
62
+ std_errs = result.bse()
63
+ z_vals = result.tvalues()
64
+ p_vals = result.pvalues()
65
+ conf_ints = result.conf_int(alpha)
66
+ sig_codes = result.significance_codes()
67
+
68
+ # Get diagnostics
69
+ try:
70
+ llf = result.llf()
71
+ aic_val = result.aic()
72
+ bic_val = result.bic()
73
+ pearson_chi2 = result.pearson_chi2()
74
+ null_dev = result.null_deviance()
75
+ family_name = result.family
76
+ scale = result.scale()
77
+ except Exception:
78
+ # Fallback if diagnostics not available
79
+ llf = aic_val = bic_val = pearson_chi2 = null_dev = float('nan')
80
+ family_name = "Unknown"
81
+ scale = 1.0
82
+
83
+ # Build the table
84
+ lines = []
85
+ lines.append("=" * 78)
86
+ lines.append(title.center(78))
87
+ lines.append("=" * 78)
88
+ lines.append("")
89
+
90
+ # Model info - statsmodels style
91
+ lines.append(f"{'Family:':<20} {family_name:<15} {'No. Observations:':<20} {result.nobs:>10}")
92
+ lines.append(f"{'Link Function:':<20} {'(default)':<15} {'Df Residuals:':<20} {result.df_resid:>10}")
93
+
94
+ # Show regularization info if applicable
95
+ try:
96
+ is_reg = result.is_regularized
97
+ penalty_type = result.penalty_type if is_reg else "none"
98
+ except Exception:
99
+ is_reg = False
100
+ penalty_type = "none"
101
+
102
+ if is_reg:
103
+ method = f"IRLS + {penalty_type.title()}"
104
+ lines.append(f"{'Method:':<20} {method:<15} {'Df Model:':<20} {result.df_model:>10}")
105
+ lines.append(f"{'Scale:':<20} {scale:<15.4f} {'Alpha (λ):':<20} {result.alpha:>10.4f}")
106
+ l1_val = result.l1_ratio if result.l1_ratio is not None else 0.0
107
+ lines.append(f"{'L1 Ratio:':<20} {l1_val:<15.2f} {'Iterations:':<20} {result.iterations:>10}")
108
+ # n_nonzero should always be available for regularized models
109
+ n_nonzero = result.n_nonzero()
110
+ lines.append(f"{'Non-zero coefs:':<20} {n_nonzero:<15}")
111
+ else:
112
+ lines.append(f"{'Method:':<20} {'IRLS':<15} {'Df Model:':<20} {result.df_model:>10}")
113
+ lines.append(f"{'Scale:':<20} {scale:<15.4f} {'Iterations:':<20} {result.iterations:>10}")
114
+ lines.append("")
115
+
116
+ # Goodness of fit
117
+ lines.append(f"{'Log-Likelihood:':<20} {llf:>15.4f} {'Deviance:':<20} {result.deviance:>15.4f}")
118
+ lines.append(f"{'AIC:':<20} {aic_val:>15.4f} {'Null Deviance:':<20} {null_dev:>15.4f}")
119
+ lines.append(f"{'BIC:':<20} {bic_val:>15.4f} {'Pearson chi2:':<20} {pearson_chi2:>15.2f}")
120
+ lines.append(f"{'Converged:':<20} {str(result.converged):<15}")
121
+ lines.append("")
122
+ lines.append("-" * 78)
123
+
124
+ # Calculate dynamic column width for variable names
125
+ # Use max of 16 chars or longest name (capped at 30)
126
+ max_name_len = max(len(name) for name in feature_names)
127
+ name_width = min(max(16, max_name_len), 30)
128
+
129
+ # Coefficient table header
130
+ ci_label = f"{int((1-alpha)*100)}% CI"
131
+ header = f"{'Variable':<{name_width}} {'Coef':>10} {'Std.Err':>10} {'z':>8} {'P>|z|':>8} {ci_label:>22} {'':>4}"
132
+ lines.append(header)
133
+ lines.append("-" * 78)
134
+
135
+ # Coefficient rows
136
+ for i in range(n_params):
137
+ name = feature_names[i][:name_width] # Truncate only if exceeds max
138
+ coef = coefs[i]
139
+ se = std_errs[i]
140
+ z = z_vals[i]
141
+ p = p_vals[i]
142
+ ci_low, ci_high = conf_ints[i]
143
+ sig = sig_codes[i]
144
+
145
+ # Format p-value
146
+ if p < 0.0001:
147
+ p_str = "<0.0001"
148
+ else:
149
+ p_str = f"{p:.4f}"
150
+
151
+ ci_str = f"[{ci_low:>8.4f}, {ci_high:>8.4f}]"
152
+ row = f"{name:<{name_width}} {coef:>10.4f} {se:>10.4f} {z:>8.3f} {p_str:>8} {ci_str:>22} {sig:>4}"
153
+ lines.append(row)
154
+
155
+ lines.append("-" * 78)
156
+ lines.append("Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1")
157
+ lines.append("=" * 78)
158
+
159
+ return "\n".join(lines)
160
+
161
+
162
+ def summary_relativities(
163
+ result: GLMResults,
164
+ feature_names: Optional[List[str]] = None,
165
+ title: str = "GLM Relativities (Log Link)",
166
+ alpha: float = 0.05,
167
+ ) -> str:
168
+ """
169
+ Generate a summary table showing relativities (exp of coefficients).
170
+
171
+ This is appropriate for models with a log link (Poisson, Gamma).
172
+ Relativities show the multiplicative effect of each variable.
173
+
174
+ Parameters
175
+ ----------
176
+ result : GLMResults
177
+ Fitted GLM results object (should use log link).
178
+
179
+ feature_names : list of str, optional
180
+ Names for each coefficient.
181
+
182
+ title : str, optional
183
+ Title for the summary table.
184
+
185
+ alpha : float, optional
186
+ Significance level for confidence intervals.
187
+
188
+ Returns
189
+ -------
190
+ str
191
+ Formatted summary table with relativities.
192
+
193
+ Interpretation
194
+ --------------
195
+ A relativity of 1.15 for "Age 25-35" means that group has 15% higher
196
+ claim frequency than the base level, all else being equal.
197
+ """
198
+ n_params = len(result.params)
199
+
200
+ if feature_names is None:
201
+ feature_names = [f"x{i}" for i in range(n_params)]
202
+ elif len(feature_names) != n_params:
203
+ raise ValueError(
204
+ f"feature_names has {len(feature_names)} elements but model has {n_params} parameters"
205
+ )
206
+
207
+ coefs = result.params
208
+ conf_ints = result.conf_int(alpha)
209
+ p_vals = result.pvalues()
210
+ sig_codes = result.significance_codes()
211
+
212
+ # Build the table
213
+ lines = []
214
+ lines.append("=" * 70)
215
+ lines.append(title.center(70))
216
+ lines.append("=" * 70)
217
+ lines.append("")
218
+ lines.append(f"No. Observations: {result.nobs:>10} Deviance: {result.deviance:>10.4f}")
219
+ lines.append("")
220
+ lines.append("-" * 70)
221
+
222
+ ci_label = f"{int((1-alpha)*100)}% CI"
223
+ header = f"{'Variable':<15} {'Coef':>10} {'Relativity':>12} {ci_label + ' (Rel)':>24} {'P>|z|':>8}"
224
+ lines.append(header)
225
+ lines.append("-" * 70)
226
+
227
+ for i in range(n_params):
228
+ name = feature_names[i][:15]
229
+ coef = coefs[i]
230
+ rel = np.exp(coef)
231
+ ci_low_rel = np.exp(conf_ints[i, 0])
232
+ ci_high_rel = np.exp(conf_ints[i, 1])
233
+ p = p_vals[i]
234
+ sig = sig_codes[i]
235
+
236
+ if p < 0.0001:
237
+ p_str = "<0.0001"
238
+ else:
239
+ p_str = f"{p:.4f}"
240
+
241
+ ci_str = f"[{ci_low_rel:>8.4f}, {ci_high_rel:>8.4f}]"
242
+ row = f"{name:<15} {coef:>10.4f} {rel:>12.4f} {ci_str:>24} {p_str:>8} {sig}"
243
+ lines.append(row)
244
+
245
+ lines.append("-" * 70)
246
+ lines.append("Relativity = exp(Coef). Values > 1 increase the response.")
247
+ lines.append("=" * 70)
248
+
249
+ return "\n".join(lines)