febolt 0.1.57__tar.gz → 0.1.59__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
@@ -362,7 +362,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
362
362
 
363
363
  [[package]]
364
364
  name = "febolt"
365
- version = "0.1.57"
365
+ version = "0.1.59"
366
366
  dependencies = [
367
367
  "blas",
368
368
  "intel-mkl-src",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "febolt"
3
- version = "0.1.57"
3
+ version = "0.1.59"
4
4
  edition = "2021"
5
5
  description = "Statistics library for Python powered by Rust"
6
6
  license = "MIT"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: febolt
3
- Version: 0.1.57
3
+ Version: 0.1.59
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: Implementation :: CPython
6
6
  Classifier: Programming Language :: Python :: Implementation :: PyPy
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "febolt"
7
- version = "0.1.57"
7
+ version = "0.1.59"
8
8
  requires-python = ">=3.8"
9
9
  description = "A Rust-based Statistics and ML package, callable from Python."
10
10
  keywords = ["rust", "python", "Machine Learning", "Statistics", "pyo3"]
@@ -82,37 +82,62 @@ fn pdf_logit_probit(is_logit: bool, z: f64) -> f64 {
82
82
  }
83
83
  }
84
84
 
85
- /// The main function that calculates Logit/Probit AME in the same style as the original Probit code
85
+ /// Derivative of the pdf with respect to z
86
+ fn pdf_deriv_logit_probit(is_logit: bool, z: f64) -> f64 {
87
+ if is_logit {
88
+ // For logistic: f'(z) = f(z) * (1 - 2 * F(z))
89
+ pdf_logit_probit(is_logit, z) * (1.0 - 2.0 * cdf_logit_probit(is_logit, z))
90
+ } else {
91
+ // For normal: f'(z) = -z * phi(z)
92
+ -z * pdf_logit_probit(is_logit, z)
93
+ }
94
+ }
95
+
96
+ /// The main function that calculates Logit/Probit AME in the same style as the original Probit code.
97
+ ///
98
+ /// An extra parameter `se_method` (optional, default "rust") controls the SE calculation:
99
+ /// - "rust": uses your original gradient (Jacobian) calculation
100
+ /// - "sm": uses a statsmodels-style gradient for continuous variables
86
101
  #[pyfunction]
87
102
  fn ame<'py>(
88
103
  py: Python<'py>,
89
- model: &'py PyAny, // Could be Logit or Probit
90
- chunk_size: Option<usize>, // optional chunk
104
+ model: &'py PyAny, // Could be Logit or Probit
105
+ chunk_size: Option<usize>, // Optional chunk size
106
+ se_method: Option<&str>, // "rust" (default) or "sm"
91
107
  ) -> PyResult<&'py PyAny> {
108
+ // Determine which SE method to use.
109
+ let se_method_str = se_method.unwrap_or("rust");
110
+
92
111
  // 1) detect Logit vs Probit
93
112
  let is_logit = detect_model_type(model)?;
94
113
 
95
- // 2) read params
96
- let params_pyarray: &PyArray1<f64> = model.getattr("params")?.downcast()?;
97
- let beta = unsafe { params_pyarray.as_array() }; // shape(k)
114
+ // 2) read params (handle pandas Series)
115
+ let params_obj = model.getattr("params")?;
116
+ let params_pyarray = if let Ok(values) = params_obj.getattr("values") {
117
+ values.downcast::<PyArray1<f64>>()?
118
+ } else {
119
+ params_obj.downcast::<PyArray1<f64>>()?
120
+ };
121
+ let beta = unsafe { params_pyarray.as_array() };
98
122
 
99
- // 3) read cov
100
- let cov_pyarray: &PyArray2<f64> = model.call_method0("cov_params")?.downcast()?;
101
- let cov_beta = unsafe { cov_pyarray.as_array() }; // shape(k,k)
123
+ // 3) read cov (handle pandas DataFrame)
124
+ let cov_obj = model.call_method0("cov_params")?;
125
+ let cov_pyarray = if let Ok(values) = cov_obj.getattr("values") {
126
+ values.downcast::<PyArray2<f64>>()?
127
+ } else {
128
+ cov_obj.downcast::<PyArray2<f64>>()?
129
+ };
130
+ let cov_beta = unsafe { cov_pyarray.as_array() };
102
131
 
103
132
  // 4) Get model object and handle exog (X) and exog_names
104
133
  let model_obj = model.getattr("model").unwrap_or(model);
105
-
106
- // Handle pandas DataFrame input
107
134
  let exog_py = model_obj.getattr("exog")?;
108
135
  let (x_pyarray, exog_names) = if let Ok(values) = exog_py.getattr("values") {
109
- // Pandas DataFrame path
110
136
  (
111
137
  values.downcast::<PyArray2<f64>>()?,
112
138
  exog_py.getattr("columns")?.extract::<Vec<String>>()?
113
139
  )
114
140
  } else {
115
- // Numpy array path
116
141
  (
117
142
  exog_py.downcast::<PyArray2<f64>>()?,
118
143
  model_obj.getattr("exog_names")?.extract::<Vec<String>>()?
@@ -155,27 +180,29 @@ fn ame<'py>(
155
180
  })
156
181
  .collect();
157
182
 
158
- // 7) Prepare accumulators
159
- let mut sum_ame = vec![0.0; k]; // sum partial effects
183
+ // 7) Prepare accumulators.
184
+ // We keep two sets of accumulators:
185
+ // - one for your "rust" (original) method,
186
+ // - one for the statsmodels ("sm") method.
187
+ let mut sum_ame = vec![0.0; k];
160
188
  let mut partial_jl_sums = vec![0.0; k * k];
189
+ let mut sm_sum_ame = vec![0.0; k];
190
+ let mut sm_partial_jl_sums = vec![0.0; k * k];
161
191
  let normal = Normal::new(0.0, 1.0).unwrap();
162
192
 
163
- // 8) single pass with chunk
193
+ // 8) Process data in chunks
164
194
  let mut idx_start = 0;
165
195
  while idx_start < n {
166
196
  let idx_end = (idx_start + chunk).min(n);
167
197
  let x_chunk = X.slice(s![idx_start..idx_end, ..]);
168
198
  let z_chunk = x_chunk.dot(&beta); // shape(n_chunk)
169
-
170
- // pdf => we might do partial for continuous
171
199
  let pdf_chunk = z_chunk.mapv(|z| pdf_logit_probit(is_logit, z));
172
200
 
173
- // handle discrete
201
+ // Handle discrete variables (same for both methods)
174
202
  for &j in &is_discrete {
175
203
  let xj_col = x_chunk.column(j);
176
204
  let b_j = beta[j];
177
- // z_j1 => z + (1-xj)*b_j
178
- // z_j0 => z - xj*b_j
205
+ // Compute z for x_j set to 1 and 0:
179
206
  let delta_j1 = (1.0 - &xj_col).mapv(|x| x * b_j);
180
207
  let delta_j0 = xj_col.mapv(|x| -x * b_j);
181
208
  let z_j1 = &z_chunk + &delta_j1;
@@ -183,16 +210,14 @@ fn ame<'py>(
183
210
 
184
211
  let cdf_j1 = z_j1.mapv(|z| cdf_logit_probit(is_logit, z));
185
212
  let cdf_j0 = z_j0.mapv(|z| cdf_logit_probit(is_logit, z));
186
- // sum
187
213
  let effect_sum = cdf_j1.sum() - cdf_j0.sum();
188
214
  sum_ame[j] += effect_sum;
215
+ sm_sum_ame[j] += effect_sum;
189
216
 
190
- // partial_jl_sums => row j, col l
191
217
  let pdf_j1 = z_j1.mapv(|z| pdf_logit_probit(is_logit, z));
192
218
  let pdf_j0 = z_j0.mapv(|z| pdf_logit_probit(is_logit, z));
193
219
  for l in 0..k {
194
220
  let grad = if l == j {
195
- // special case
196
221
  pdf_j1.sum()
197
222
  } else {
198
223
  let x_l = x_chunk.column(l);
@@ -200,42 +225,65 @@ fn ame<'py>(
200
225
  diff_pdf.dot(&x_l)
201
226
  };
202
227
  partial_jl_sums[j * k + l] += grad;
228
+ sm_partial_jl_sums[j * k + l] += grad;
203
229
  }
204
230
  }
205
231
 
206
- // handle continuous
232
+ // Handle continuous variables.
207
233
  for j in 0..k {
208
234
  if intercept_indices.contains(&j) || is_discrete.contains(&j) {
209
235
  continue;
210
236
  }
211
237
  let b_j = beta[j];
212
- // sum_ame
213
- sum_ame[j] += b_j * pdf_chunk.sum();
214
- // partial_jl_sums => row j, col l
215
- for l in 0..k {
216
- let grad = if j == l {
217
- pdf_chunk.sum()
218
- } else {
219
- // - b_j * sum(z_chunk * x_col(l) * pdf_chunk)
238
+ if se_method_str == "rust" {
239
+ // Original method: update using b_j * pdf
240
+ sum_ame[j] += b_j * pdf_chunk.sum();
241
+ for l in 0..k {
242
+ let grad = if j == l {
243
+ pdf_chunk.sum()
244
+ } else {
245
+ let x_l = x_chunk.column(l);
246
+ -b_j * (&z_chunk * &x_l).dot(&pdf_chunk)
247
+ };
248
+ partial_jl_sums[j * k + l] += grad;
249
+ }
250
+ } else if se_method_str == "sm" {
251
+ // statsmodels-style: use the derivative of the pdf.
252
+ let fprime_chunk = z_chunk.mapv(|z| pdf_deriv_logit_probit(is_logit, z));
253
+ sm_sum_ame[j] += b_j * pdf_chunk.sum();
254
+ for l in 0..k {
220
255
  let x_l = x_chunk.column(l);
221
- // careful about sign from the original code
222
- -b_j * (&z_chunk * &x_l).dot(&pdf_chunk)
223
- };
224
- partial_jl_sums[j * k + l] += grad;
256
+ let term = (&x_l * &fprime_chunk).sum();
257
+ let grad = if j == l {
258
+ pdf_chunk.sum() + b_j * term
259
+ } else {
260
+ b_j * term
261
+ };
262
+ sm_partial_jl_sums[j * k + l] += grad;
263
+ }
225
264
  }
226
265
  }
227
266
 
228
267
  idx_start = idx_end;
229
268
  }
230
269
 
231
- // 9) average sums
232
- let ame: Vec<f64> = sum_ame.iter().map(|v| v / (n as f64)).collect();
270
+ // 9) Average sums over n.
271
+ let final_ame: Vec<f64> = if se_method_str == "sm" {
272
+ sm_sum_ame.iter().map(|v| v / (n as f64)).collect()
273
+ } else {
274
+ sum_ame.iter().map(|v| v / (n as f64)).collect()
275
+ };
233
276
 
234
- // gradient matrix => shape(k,k)
235
- let mut grad_ame = Array2::<f64>::zeros((k,k));
277
+ // Build gradient matrix: shape (k, k)
278
+ let mut grad_ame = Array2::<f64>::zeros((k, k));
236
279
  for j in 0..k {
237
280
  for l in 0..k {
238
- grad_ame[[j,l]] = partial_jl_sums[j * k + l] / (n as f64);
281
+ let value = if se_method_str == "sm" {
282
+ sm_partial_jl_sums[j * k + l] / (n as f64)
283
+ } else {
284
+ partial_jl_sums[j * k + l] / (n as f64)
285
+ };
286
+ grad_ame[[j, l]] = value;
239
287
  }
240
288
  }
241
289
 
@@ -253,13 +301,13 @@ fn ame<'py>(
253
301
  if intercept_indices.contains(&j) {
254
302
  continue;
255
303
  }
256
- let dy = ame[j];
304
+ let dy = final_ame[j];
257
305
  let s = se_ame[j];
258
306
  dy_dx.push(dy);
259
307
  se_err.push(s);
260
308
  if s > 1e-15 {
261
309
  let z = dy / s;
262
- let p = 2.0*(1.0 - normal.cdf(z.abs()));
310
+ let p = 2.0 * (1.0 - normal.cdf(z.abs()));
263
311
  z_vals.push(z);
264
312
  p_vals.push(p);
265
313
  sig.push(add_significance_stars(p));
@@ -288,7 +336,6 @@ fn ame<'py>(
288
336
  Ok(df)
289
337
  }
290
338
 
291
-
292
339
  #[pymodule]
293
340
  fn febolt(_py: Python, m: &PyModule) -> PyResult<()> {
294
341
  m.add_function(wrap_pyfunction!(ame, m)?)?;
File without changes
File without changes
File without changes