febolt 0.1.58__tar.gz → 0.1.60__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {febolt-0.1.58 → febolt-0.1.60}/Cargo.lock +1 -1
- {febolt-0.1.58 → febolt-0.1.60}/Cargo.toml +1 -1
- {febolt-0.1.58 → febolt-0.1.60}/PKG-INFO +1 -1
- {febolt-0.1.58 → febolt-0.1.60}/pyproject.toml +1 -1
- {febolt-0.1.58 → febolt-0.1.60}/src/lib.rs +87 -41
- {febolt-0.1.58 → febolt-0.1.60}/.github/workflows/CI.yml +0 -0
- {febolt-0.1.58 → febolt-0.1.60}/.gitignore +0 -0
- {febolt-0.1.58 → febolt-0.1.60}/README.md +0 -0
- {febolt-0.1.58 → febolt-0.1.60}/build.rs +0 -0
@@ -4,7 +4,7 @@ build-backend = "maturin"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "febolt"
|
7
|
-
version = "0.1.
|
7
|
+
version = "0.1.60"
|
8
8
|
requires-python = ">=3.8"
|
9
9
|
description = "A Rust-based Statistics and ML package, callable from Python."
|
10
10
|
keywords = ["rust", "python", "Machine Learning", "Statistics", "pyo3"]
|
@@ -82,13 +82,32 @@ fn pdf_logit_probit(is_logit: bool, z: f64) -> f64 {
|
|
82
82
|
}
|
83
83
|
}
|
84
84
|
|
85
|
-
///
|
85
|
+
/// Derivative of the pdf with respect to z
|
86
|
+
fn pdf_deriv_logit_probit(is_logit: bool, z: f64) -> f64 {
|
87
|
+
if is_logit {
|
88
|
+
// For logistic: f'(z) = f(z) * (1 - 2 * F(z))
|
89
|
+
pdf_logit_probit(is_logit, z) * (1.0 - 2.0 * cdf_logit_probit(is_logit, z))
|
90
|
+
} else {
|
91
|
+
// For normal: f'(z) = -z * phi(z)
|
92
|
+
-z * pdf_logit_probit(is_logit, z)
|
93
|
+
}
|
94
|
+
}
|
95
|
+
|
96
|
+
/// The main function that calculates Logit/Probit AME in the same style as the original Probit code.
|
97
|
+
///
|
98
|
+
/// An extra parameter `se_method` (optional, default "rust") controls the SE calculation:
|
99
|
+
/// - "rust": uses your original gradient (Jacobian) calculation
|
100
|
+
/// - "sm": uses a statsmodels-style gradient for continuous variables
|
86
101
|
#[pyfunction]
|
87
102
|
fn ame<'py>(
|
88
103
|
py: Python<'py>,
|
89
|
-
model: &'py PyAny,
|
90
|
-
chunk_size: Option<usize>,
|
104
|
+
model: &'py PyAny, // Could be Logit or Probit
|
105
|
+
chunk_size: Option<usize>, // Optional chunk size
|
106
|
+
se_method: Option<&str>, // "rust" (default) or "sm"
|
91
107
|
) -> PyResult<&'py PyAny> {
|
108
|
+
// Determine which SE method to use.
|
109
|
+
let se_method_str = se_method.unwrap_or("rust");
|
110
|
+
|
92
111
|
// 1) detect Logit vs Probit
|
93
112
|
let is_logit = detect_model_type(model)?;
|
94
113
|
|
@@ -100,7 +119,7 @@ fn ame<'py>(
|
|
100
119
|
params_obj.downcast::<PyArray1<f64>>()?
|
101
120
|
};
|
102
121
|
let beta = unsafe { params_pyarray.as_array() };
|
103
|
-
|
122
|
+
|
104
123
|
// 3) read cov (handle pandas DataFrame)
|
105
124
|
let cov_obj = model.call_method0("cov_params")?;
|
106
125
|
let cov_pyarray = if let Ok(values) = cov_obj.getattr("values") {
|
@@ -112,17 +131,13 @@ fn ame<'py>(
|
|
112
131
|
|
113
132
|
// 4) Get model object and handle exog (X) and exog_names
|
114
133
|
let model_obj = model.getattr("model").unwrap_or(model);
|
115
|
-
|
116
|
-
// Handle pandas DataFrame input
|
117
134
|
let exog_py = model_obj.getattr("exog")?;
|
118
135
|
let (x_pyarray, exog_names) = if let Ok(values) = exog_py.getattr("values") {
|
119
|
-
// Pandas DataFrame path
|
120
136
|
(
|
121
137
|
values.downcast::<PyArray2<f64>>()?,
|
122
138
|
exog_py.getattr("columns")?.extract::<Vec<String>>()?
|
123
139
|
)
|
124
140
|
} else {
|
125
|
-
// Numpy array path
|
126
141
|
(
|
127
142
|
exog_py.downcast::<PyArray2<f64>>()?,
|
128
143
|
model_obj.getattr("exog_names")?.extract::<Vec<String>>()?
|
@@ -165,27 +180,29 @@ fn ame<'py>(
|
|
165
180
|
})
|
166
181
|
.collect();
|
167
182
|
|
168
|
-
// 7) Prepare accumulators
|
169
|
-
|
183
|
+
// 7) Prepare accumulators.
|
184
|
+
// We keep two sets of accumulators:
|
185
|
+
// - one for your "rust" (original) method,
|
186
|
+
// - one for the statsmodels ("sm") method.
|
187
|
+
let mut sum_ame = vec![0.0; k];
|
170
188
|
let mut partial_jl_sums = vec![0.0; k * k];
|
189
|
+
let mut sm_sum_ame = vec![0.0; k];
|
190
|
+
let mut sm_partial_jl_sums = vec![0.0; k * k];
|
171
191
|
let normal = Normal::new(0.0, 1.0).unwrap();
|
172
192
|
|
173
|
-
// 8)
|
193
|
+
// 8) Process data in chunks
|
174
194
|
let mut idx_start = 0;
|
175
195
|
while idx_start < n {
|
176
196
|
let idx_end = (idx_start + chunk).min(n);
|
177
197
|
let x_chunk = X.slice(s![idx_start..idx_end, ..]);
|
178
198
|
let z_chunk = x_chunk.dot(&beta); // shape(n_chunk)
|
179
|
-
|
180
|
-
// pdf => we might do partial for continuous
|
181
199
|
let pdf_chunk = z_chunk.mapv(|z| pdf_logit_probit(is_logit, z));
|
182
200
|
|
183
|
-
//
|
201
|
+
// Handle discrete variables (same for both methods)
|
184
202
|
for &j in &is_discrete {
|
185
203
|
let xj_col = x_chunk.column(j);
|
186
204
|
let b_j = beta[j];
|
187
|
-
//
|
188
|
-
// z_j0 => z - xj*b_j
|
205
|
+
// Compute z for x_j set to 1 and 0:
|
189
206
|
let delta_j1 = (1.0 - &xj_col).mapv(|x| x * b_j);
|
190
207
|
let delta_j0 = xj_col.mapv(|x| -x * b_j);
|
191
208
|
let z_j1 = &z_chunk + &delta_j1;
|
@@ -193,16 +210,14 @@ fn ame<'py>(
|
|
193
210
|
|
194
211
|
let cdf_j1 = z_j1.mapv(|z| cdf_logit_probit(is_logit, z));
|
195
212
|
let cdf_j0 = z_j0.mapv(|z| cdf_logit_probit(is_logit, z));
|
196
|
-
// sum
|
197
213
|
let effect_sum = cdf_j1.sum() - cdf_j0.sum();
|
198
214
|
sum_ame[j] += effect_sum;
|
215
|
+
sm_sum_ame[j] += effect_sum;
|
199
216
|
|
200
|
-
// partial_jl_sums => row j, col l
|
201
217
|
let pdf_j1 = z_j1.mapv(|z| pdf_logit_probit(is_logit, z));
|
202
218
|
let pdf_j0 = z_j0.mapv(|z| pdf_logit_probit(is_logit, z));
|
203
219
|
for l in 0..k {
|
204
220
|
let grad = if l == j {
|
205
|
-
// special case
|
206
221
|
pdf_j1.sum()
|
207
222
|
} else {
|
208
223
|
let x_l = x_chunk.column(l);
|
@@ -210,42 +225,65 @@ fn ame<'py>(
|
|
210
225
|
diff_pdf.dot(&x_l)
|
211
226
|
};
|
212
227
|
partial_jl_sums[j * k + l] += grad;
|
228
|
+
sm_partial_jl_sums[j * k + l] += grad;
|
213
229
|
}
|
214
230
|
}
|
215
231
|
|
216
|
-
//
|
232
|
+
// Handle continuous variables.
|
217
233
|
for j in 0..k {
|
218
234
|
if intercept_indices.contains(&j) || is_discrete.contains(&j) {
|
219
235
|
continue;
|
220
236
|
}
|
221
237
|
let b_j = beta[j];
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
238
|
+
if se_method_str == "rust" {
|
239
|
+
// Original method: update using b_j * pdf
|
240
|
+
sum_ame[j] += b_j * pdf_chunk.sum();
|
241
|
+
for l in 0..k {
|
242
|
+
let grad = if j == l {
|
243
|
+
pdf_chunk.sum()
|
244
|
+
} else {
|
245
|
+
let x_l = x_chunk.column(l);
|
246
|
+
-b_j * (&z_chunk * &x_l).dot(&pdf_chunk)
|
247
|
+
};
|
248
|
+
partial_jl_sums[j * k + l] += grad;
|
249
|
+
}
|
250
|
+
} else if se_method_str == "sm" {
|
251
|
+
// statsmodels-style: use the derivative of the pdf.
|
252
|
+
let fprime_chunk = z_chunk.mapv(|z| pdf_deriv_logit_probit(is_logit, z));
|
253
|
+
sm_sum_ame[j] += b_j * pdf_chunk.sum();
|
254
|
+
for l in 0..k {
|
230
255
|
let x_l = x_chunk.column(l);
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
256
|
+
let term = (&x_l * &fprime_chunk).sum();
|
257
|
+
let grad = if j == l {
|
258
|
+
pdf_chunk.sum() + b_j * term
|
259
|
+
} else {
|
260
|
+
b_j * term
|
261
|
+
};
|
262
|
+
sm_partial_jl_sums[j * k + l] += grad;
|
263
|
+
}
|
235
264
|
}
|
236
265
|
}
|
237
266
|
|
238
267
|
idx_start = idx_end;
|
239
268
|
}
|
240
269
|
|
241
|
-
// 9)
|
242
|
-
let
|
270
|
+
// 9) Average sums over n.
|
271
|
+
let final_ame: Vec<f64> = if se_method_str == "sm" {
|
272
|
+
sm_sum_ame.iter().map(|v| v / (n as f64)).collect()
|
273
|
+
} else {
|
274
|
+
sum_ame.iter().map(|v| v / (n as f64)).collect()
|
275
|
+
};
|
243
276
|
|
244
|
-
// gradient matrix
|
245
|
-
let mut grad_ame = Array2::<f64>::zeros((k,k));
|
277
|
+
// Build gradient matrix: shape (k, k)
|
278
|
+
let mut grad_ame = Array2::<f64>::zeros((k, k));
|
246
279
|
for j in 0..k {
|
247
280
|
for l in 0..k {
|
248
|
-
|
281
|
+
let value = if se_method_str == "sm" {
|
282
|
+
sm_partial_jl_sums[j * k + l] / (n as f64)
|
283
|
+
} else {
|
284
|
+
partial_jl_sums[j * k + l] / (n as f64)
|
285
|
+
};
|
286
|
+
grad_ame[[j, l]] = value;
|
249
287
|
}
|
250
288
|
}
|
251
289
|
|
@@ -257,37 +295,45 @@ fn ame<'py>(
|
|
257
295
|
// 10) Build final results
|
258
296
|
let (mut dy_dx, mut se_err, mut z_vals, mut p_vals, mut sig) =
|
259
297
|
(Vec::new(), Vec::new(), Vec::new(), Vec::new(), Vec::new());
|
298
|
+
let (mut conf_low, mut conf_high) = (Vec::new(), Vec::new());
|
260
299
|
let mut names_out = Vec::new();
|
261
300
|
|
262
301
|
for j in 0..k {
|
263
302
|
if intercept_indices.contains(&j) {
|
264
303
|
continue;
|
265
304
|
}
|
266
|
-
let dy =
|
305
|
+
let dy = final_ame[j];
|
267
306
|
let s = se_ame[j];
|
268
307
|
dy_dx.push(dy);
|
269
308
|
se_err.push(s);
|
270
309
|
if s > 1e-15 {
|
271
310
|
let z = dy / s;
|
272
|
-
let p = 2.0*(1.0 - normal.cdf(z.abs()));
|
311
|
+
let p = 2.0 * (1.0 - normal.cdf(z.abs()));
|
273
312
|
z_vals.push(z);
|
274
313
|
p_vals.push(p);
|
275
314
|
sig.push(add_significance_stars(p));
|
315
|
+
// Compute 95% confidence interval using 1.96 * SE
|
316
|
+
conf_low.push(dy - 1.96 * s);
|
317
|
+
conf_high.push(dy + 1.96 * s);
|
276
318
|
} else {
|
277
319
|
z_vals.push(0.0);
|
278
320
|
p_vals.push(1.0);
|
279
321
|
sig.push("");
|
322
|
+
conf_low.push(dy);
|
323
|
+
conf_high.push(dy);
|
280
324
|
}
|
281
325
|
names_out.push(exog_names[j].clone());
|
282
326
|
}
|
283
327
|
|
284
|
-
// 11) Create DataFrame
|
328
|
+
// 11) Create DataFrame with extra confidence interval columns.
|
285
329
|
let pd = py.import("pandas")?;
|
286
330
|
let data = PyDict::new(py);
|
287
331
|
data.set_item("dy/dx", &dy_dx)?;
|
288
332
|
data.set_item("Std. Err", &se_err)?;
|
289
333
|
data.set_item("z", &z_vals)?;
|
290
334
|
data.set_item("Pr(>|z|)", &p_vals)?;
|
335
|
+
data.set_item("Conf. Int. Low", &conf_low)?;
|
336
|
+
data.set_item("Conf. Int. Hi", &conf_high)?;
|
291
337
|
data.set_item("Significance", &sig)?;
|
292
338
|
|
293
339
|
let kwargs = PyDict::new(py);
|
@@ -298,9 +344,9 @@ fn ame<'py>(
|
|
298
344
|
Ok(df)
|
299
345
|
}
|
300
346
|
|
301
|
-
|
302
347
|
#[pymodule]
|
303
348
|
fn febolt(_py: Python, m: &PyModule) -> PyResult<()> {
|
304
349
|
m.add_function(wrap_pyfunction!(ame, m)?)?;
|
305
350
|
Ok(())
|
306
351
|
}
|
352
|
+
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|