febolt 0.1.58__tar.gz → 0.1.60__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
@@ -362,7 +362,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
362
362
 
363
363
  [[package]]
364
364
  name = "febolt"
365
- version = "0.1.58"
365
+ version = "0.1.60"
366
366
  dependencies = [
367
367
  "blas",
368
368
  "intel-mkl-src",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "febolt"
3
- version = "0.1.58"
3
+ version = "0.1.60"
4
4
  edition = "2021"
5
5
  description = "Statistics library for Python powered by Rust"
6
6
  license = "MIT"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: febolt
3
- Version: 0.1.58
3
+ Version: 0.1.60
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: Implementation :: CPython
6
6
  Classifier: Programming Language :: Python :: Implementation :: PyPy
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "febolt"
7
- version = "0.1.58"
7
+ version = "0.1.60"
8
8
  requires-python = ">=3.8"
9
9
  description = "A Rust-based Statistics and ML package, callable from Python."
10
10
  keywords = ["rust", "python", "Machine Learning", "Statistics", "pyo3"]
@@ -82,13 +82,32 @@ fn pdf_logit_probit(is_logit: bool, z: f64) -> f64 {
82
82
  }
83
83
  }
84
84
 
85
- /// The main function that calculates Logit/Probit AME in the same style as the original Probit code
85
+ /// Derivative of the pdf with respect to z
86
+ fn pdf_deriv_logit_probit(is_logit: bool, z: f64) -> f64 {
87
+ if is_logit {
88
+ // For logistic: f'(z) = f(z) * (1 - 2 * F(z))
89
+ pdf_logit_probit(is_logit, z) * (1.0 - 2.0 * cdf_logit_probit(is_logit, z))
90
+ } else {
91
+ // For normal: f'(z) = -z * phi(z)
92
+ -z * pdf_logit_probit(is_logit, z)
93
+ }
94
+ }
95
+
96
+ /// The main function that calculates Logit/Probit AME in the same style as the original Probit code.
97
+ ///
98
+ /// An extra parameter `se_method` (optional, default "rust") controls the SE calculation:
99
+ /// - "rust": uses your original gradient (Jacobian) calculation
100
+ /// - "sm": uses a statsmodels-style gradient for continuous variables
86
101
  #[pyfunction]
87
102
  fn ame<'py>(
88
103
  py: Python<'py>,
89
- model: &'py PyAny, // Could be Logit or Probit
90
- chunk_size: Option<usize>, // optional chunk
104
+ model: &'py PyAny, // Could be Logit or Probit
105
+ chunk_size: Option<usize>, // Optional chunk size
106
+ se_method: Option<&str>, // "rust" (default) or "sm"
91
107
  ) -> PyResult<&'py PyAny> {
108
+ // Determine which SE method to use.
109
+ let se_method_str = se_method.unwrap_or("rust");
110
+
92
111
  // 1) detect Logit vs Probit
93
112
  let is_logit = detect_model_type(model)?;
94
113
 
@@ -100,7 +119,7 @@ fn ame<'py>(
100
119
  params_obj.downcast::<PyArray1<f64>>()?
101
120
  };
102
121
  let beta = unsafe { params_pyarray.as_array() };
103
-
122
+
104
123
  // 3) read cov (handle pandas DataFrame)
105
124
  let cov_obj = model.call_method0("cov_params")?;
106
125
  let cov_pyarray = if let Ok(values) = cov_obj.getattr("values") {
@@ -112,17 +131,13 @@ fn ame<'py>(
112
131
 
113
132
  // 4) Get model object and handle exog (X) and exog_names
114
133
  let model_obj = model.getattr("model").unwrap_or(model);
115
-
116
- // Handle pandas DataFrame input
117
134
  let exog_py = model_obj.getattr("exog")?;
118
135
  let (x_pyarray, exog_names) = if let Ok(values) = exog_py.getattr("values") {
119
- // Pandas DataFrame path
120
136
  (
121
137
  values.downcast::<PyArray2<f64>>()?,
122
138
  exog_py.getattr("columns")?.extract::<Vec<String>>()?
123
139
  )
124
140
  } else {
125
- // Numpy array path
126
141
  (
127
142
  exog_py.downcast::<PyArray2<f64>>()?,
128
143
  model_obj.getattr("exog_names")?.extract::<Vec<String>>()?
@@ -165,27 +180,29 @@ fn ame<'py>(
165
180
  })
166
181
  .collect();
167
182
 
168
- // 7) Prepare accumulators
169
- let mut sum_ame = vec![0.0; k]; // sum partial effects
183
+ // 7) Prepare accumulators.
184
+ // We keep two sets of accumulators:
185
+ // - one for your "rust" (original) method,
186
+ // - one for the statsmodels ("sm") method.
187
+ let mut sum_ame = vec![0.0; k];
170
188
  let mut partial_jl_sums = vec![0.0; k * k];
189
+ let mut sm_sum_ame = vec![0.0; k];
190
+ let mut sm_partial_jl_sums = vec![0.0; k * k];
171
191
  let normal = Normal::new(0.0, 1.0).unwrap();
172
192
 
173
- // 8) single pass with chunk
193
+ // 8) Process data in chunks
174
194
  let mut idx_start = 0;
175
195
  while idx_start < n {
176
196
  let idx_end = (idx_start + chunk).min(n);
177
197
  let x_chunk = X.slice(s![idx_start..idx_end, ..]);
178
198
  let z_chunk = x_chunk.dot(&beta); // shape(n_chunk)
179
-
180
- // pdf => we might do partial for continuous
181
199
  let pdf_chunk = z_chunk.mapv(|z| pdf_logit_probit(is_logit, z));
182
200
 
183
- // handle discrete
201
+ // Handle discrete variables (same for both methods)
184
202
  for &j in &is_discrete {
185
203
  let xj_col = x_chunk.column(j);
186
204
  let b_j = beta[j];
187
- // z_j1 => z + (1-xj)*b_j
188
- // z_j0 => z - xj*b_j
205
+ // Compute z for x_j set to 1 and 0:
189
206
  let delta_j1 = (1.0 - &xj_col).mapv(|x| x * b_j);
190
207
  let delta_j0 = xj_col.mapv(|x| -x * b_j);
191
208
  let z_j1 = &z_chunk + &delta_j1;
@@ -193,16 +210,14 @@ fn ame<'py>(
193
210
 
194
211
  let cdf_j1 = z_j1.mapv(|z| cdf_logit_probit(is_logit, z));
195
212
  let cdf_j0 = z_j0.mapv(|z| cdf_logit_probit(is_logit, z));
196
- // sum
197
213
  let effect_sum = cdf_j1.sum() - cdf_j0.sum();
198
214
  sum_ame[j] += effect_sum;
215
+ sm_sum_ame[j] += effect_sum;
199
216
 
200
- // partial_jl_sums => row j, col l
201
217
  let pdf_j1 = z_j1.mapv(|z| pdf_logit_probit(is_logit, z));
202
218
  let pdf_j0 = z_j0.mapv(|z| pdf_logit_probit(is_logit, z));
203
219
  for l in 0..k {
204
220
  let grad = if l == j {
205
- // special case
206
221
  pdf_j1.sum()
207
222
  } else {
208
223
  let x_l = x_chunk.column(l);
@@ -210,42 +225,65 @@ fn ame<'py>(
210
225
  diff_pdf.dot(&x_l)
211
226
  };
212
227
  partial_jl_sums[j * k + l] += grad;
228
+ sm_partial_jl_sums[j * k + l] += grad;
213
229
  }
214
230
  }
215
231
 
216
- // handle continuous
232
+ // Handle continuous variables.
217
233
  for j in 0..k {
218
234
  if intercept_indices.contains(&j) || is_discrete.contains(&j) {
219
235
  continue;
220
236
  }
221
237
  let b_j = beta[j];
222
- // sum_ame
223
- sum_ame[j] += b_j * pdf_chunk.sum();
224
- // partial_jl_sums => row j, col l
225
- for l in 0..k {
226
- let grad = if j == l {
227
- pdf_chunk.sum()
228
- } else {
229
- // - b_j * sum(z_chunk * x_col(l) * pdf_chunk)
238
+ if se_method_str == "rust" {
239
+ // Original method: update using b_j * pdf
240
+ sum_ame[j] += b_j * pdf_chunk.sum();
241
+ for l in 0..k {
242
+ let grad = if j == l {
243
+ pdf_chunk.sum()
244
+ } else {
245
+ let x_l = x_chunk.column(l);
246
+ -b_j * (&z_chunk * &x_l).dot(&pdf_chunk)
247
+ };
248
+ partial_jl_sums[j * k + l] += grad;
249
+ }
250
+ } else if se_method_str == "sm" {
251
+ // statsmodels-style: use the derivative of the pdf.
252
+ let fprime_chunk = z_chunk.mapv(|z| pdf_deriv_logit_probit(is_logit, z));
253
+ sm_sum_ame[j] += b_j * pdf_chunk.sum();
254
+ for l in 0..k {
230
255
  let x_l = x_chunk.column(l);
231
- // careful about sign from the original code
232
- -b_j * (&z_chunk * &x_l).dot(&pdf_chunk)
233
- };
234
- partial_jl_sums[j * k + l] += grad;
256
+ let term = (&x_l * &fprime_chunk).sum();
257
+ let grad = if j == l {
258
+ pdf_chunk.sum() + b_j * term
259
+ } else {
260
+ b_j * term
261
+ };
262
+ sm_partial_jl_sums[j * k + l] += grad;
263
+ }
235
264
  }
236
265
  }
237
266
 
238
267
  idx_start = idx_end;
239
268
  }
240
269
 
241
- // 9) average sums
242
- let ame: Vec<f64> = sum_ame.iter().map(|v| v / (n as f64)).collect();
270
+ // 9) Average sums over n.
271
+ let final_ame: Vec<f64> = if se_method_str == "sm" {
272
+ sm_sum_ame.iter().map(|v| v / (n as f64)).collect()
273
+ } else {
274
+ sum_ame.iter().map(|v| v / (n as f64)).collect()
275
+ };
243
276
 
244
- // gradient matrix => shape(k,k)
245
- let mut grad_ame = Array2::<f64>::zeros((k,k));
277
+ // Build gradient matrix: shape (k, k)
278
+ let mut grad_ame = Array2::<f64>::zeros((k, k));
246
279
  for j in 0..k {
247
280
  for l in 0..k {
248
- grad_ame[[j,l]] = partial_jl_sums[j * k + l] / (n as f64);
281
+ let value = if se_method_str == "sm" {
282
+ sm_partial_jl_sums[j * k + l] / (n as f64)
283
+ } else {
284
+ partial_jl_sums[j * k + l] / (n as f64)
285
+ };
286
+ grad_ame[[j, l]] = value;
249
287
  }
250
288
  }
251
289
 
@@ -257,37 +295,45 @@ fn ame<'py>(
257
295
  // 10) Build final results
258
296
  let (mut dy_dx, mut se_err, mut z_vals, mut p_vals, mut sig) =
259
297
  (Vec::new(), Vec::new(), Vec::new(), Vec::new(), Vec::new());
298
+ let (mut conf_low, mut conf_high) = (Vec::new(), Vec::new());
260
299
  let mut names_out = Vec::new();
261
300
 
262
301
  for j in 0..k {
263
302
  if intercept_indices.contains(&j) {
264
303
  continue;
265
304
  }
266
- let dy = ame[j];
305
+ let dy = final_ame[j];
267
306
  let s = se_ame[j];
268
307
  dy_dx.push(dy);
269
308
  se_err.push(s);
270
309
  if s > 1e-15 {
271
310
  let z = dy / s;
272
- let p = 2.0*(1.0 - normal.cdf(z.abs()));
311
+ let p = 2.0 * (1.0 - normal.cdf(z.abs()));
273
312
  z_vals.push(z);
274
313
  p_vals.push(p);
275
314
  sig.push(add_significance_stars(p));
315
+ // Compute 95% confidence interval using 1.96 * SE
316
+ conf_low.push(dy - 1.96 * s);
317
+ conf_high.push(dy + 1.96 * s);
276
318
  } else {
277
319
  z_vals.push(0.0);
278
320
  p_vals.push(1.0);
279
321
  sig.push("");
322
+ conf_low.push(dy);
323
+ conf_high.push(dy);
280
324
  }
281
325
  names_out.push(exog_names[j].clone());
282
326
  }
283
327
 
284
- // 11) Create DataFrame
328
+ // 11) Create DataFrame with extra confidence interval columns.
285
329
  let pd = py.import("pandas")?;
286
330
  let data = PyDict::new(py);
287
331
  data.set_item("dy/dx", &dy_dx)?;
288
332
  data.set_item("Std. Err", &se_err)?;
289
333
  data.set_item("z", &z_vals)?;
290
334
  data.set_item("Pr(>|z|)", &p_vals)?;
335
+ data.set_item("Conf. Int. Low", &conf_low)?;
336
+ data.set_item("Conf. Int. Hi", &conf_high)?;
291
337
  data.set_item("Significance", &sig)?;
292
338
 
293
339
  let kwargs = PyDict::new(py);
@@ -298,9 +344,9 @@ fn ame<'py>(
298
344
  Ok(df)
299
345
  }
300
346
 
301
-
302
347
  #[pymodule]
303
348
  fn febolt(_py: Python, m: &PyModule) -> PyResult<()> {
304
349
  m.add_function(wrap_pyfunction!(ame, m)?)?;
305
350
  Ok(())
306
351
  }
352
+
File without changes
File without changes
File without changes