PyPI - febolt - Versions diffs - 0.1.57__tar.gz → 0.1.59__tar.gz - Mend

febolt 0.1.57tar.gz → 0.1.59tar.gz

Files changed (9) hide show

{febolt-0.1.57 → febolt-0.1.59}/Cargo.lock RENAMED Viewed

@@ -362,7 +362,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
 [[package]]
 name = "febolt"
-version = "0.1.57"
+version = "0.1.59"
 dependencies = [
  "blas",
  "intel-mkl-src",

{febolt-0.1.57 → febolt-0.1.59}/Cargo.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [package]
 name = "febolt"
-version = "0.1.57"
+version = "0.1.59"
 edition = "2021"
 description = "Statistics library for Python powered by Rust"
 license = "MIT"

{febolt-0.1.57 → febolt-0.1.59}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: febolt
-Version: 0.1.57
+Version: 0.1.59
 Classifier: Programming Language :: Rust
 Classifier: Programming Language :: Python :: Implementation :: CPython
 Classifier: Programming Language :: Python :: Implementation :: PyPy

{febolt-0.1.57 → febolt-0.1.59}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "maturin"
 [project]
 name = "febolt"
-version = "0.1.57"
+version = "0.1.59"
 requires-python = ">=3.8"
 description = "A Rust-based Statistics and ML package, callable from Python."
 keywords = ["rust", "python", "Machine Learning", "Statistics", "pyo3"]

{febolt-0.1.57 → febolt-0.1.59}/src/lib.rs RENAMED Viewed

@@ -82,37 +82,62 @@ fn pdf_logit_probit(is_logit: bool, z: f64) -> f64 {
     }
 }
-/// The main function that calculates Logit/Probit AME in the same style as the original Probit code
+/// Derivative of the pdf with respect to z
+fn pdf_deriv_logit_probit(is_logit: bool, z: f64) -> f64 {
+    if is_logit {
+        // For logistic: f'(z) = f(z) * (1 - 2 * F(z))
+        pdf_logit_probit(is_logit, z) * (1.0 - 2.0 * cdf_logit_probit(is_logit, z))
+    } else {
+        // For normal: f'(z) = -z * phi(z)
+        -z * pdf_logit_probit(is_logit, z)
+    }
+}
+/// The main function that calculates Logit/Probit AME in the same style as the original Probit code.
+///
+/// An extra parameter `se_method` (optional, default "rust") controls the SE calculation:
+///  - "rust": uses your original gradient (Jacobian) calculation
+///  - "sm": uses a statsmodels-style gradient for continuous variables
 #[pyfunction]
 fn ame<'py>(
     py: Python<'py>,
-    model: &'py PyAny,   // Could be Logit or Probit
-    chunk_size: Option<usize>, // optional chunk
+    model: &'py PyAny,              // Could be Logit or Probit
+    chunk_size: Option<usize>,      // Optional chunk size
+    se_method: Option<&str>,        // "rust" (default) or "sm"
 ) -> PyResult<&'py PyAny> {
+    // Determine which SE method to use.
+    let se_method_str = se_method.unwrap_or("rust");
     // 1) detect Logit vs Probit
     let is_logit = detect_model_type(model)?;
-    // 2) read params
-    let params_pyarray: &PyArray1<f64> = model.getattr("params")?.downcast()?;
-    let beta = unsafe { params_pyarray.as_array() };  // shape(k)
+    // 2) read params (handle pandas Series)
+    let params_obj = model.getattr("params")?;
+    let params_pyarray = if let Ok(values) = params_obj.getattr("values") {
+        values.downcast::<PyArray1<f64>>()?
+    } else {
+        params_obj.downcast::<PyArray1<f64>>()?
+    };
+    let beta = unsafe { params_pyarray.as_array() };
-    // 3) read cov
-    let cov_pyarray: &PyArray2<f64> = model.call_method0("cov_params")?.downcast()?;
-    let cov_beta = unsafe { cov_pyarray.as_array() }; // shape(k,k)
+    // 3) read cov (handle pandas DataFrame)
+    let cov_obj = model.call_method0("cov_params")?;
+    let cov_pyarray = if let Ok(values) = cov_obj.getattr("values") {
+        values.downcast::<PyArray2<f64>>()?
+    } else {
+        cov_obj.downcast::<PyArray2<f64>>()?
+    };
+    let cov_beta = unsafe { cov_pyarray.as_array() };
     // 4) Get model object and handle exog (X) and exog_names
     let model_obj = model.getattr("model").unwrap_or(model);
-    // Handle pandas DataFrame input
     let exog_py = model_obj.getattr("exog")?;
     let (x_pyarray, exog_names) = if let Ok(values) = exog_py.getattr("values") {
-        // Pandas DataFrame path
         (
             values.downcast::<PyArray2<f64>>()?,
             exog_py.getattr("columns")?.extract::<Vec<String>>()?
         )
     } else {
-        // Numpy array path
         (
             exog_py.downcast::<PyArray2<f64>>()?,
             model_obj.getattr("exog_names")?.extract::<Vec<String>>()?
@@ -155,27 +180,29 @@ fn ame<'py>(
         })
         .collect();
-    // 7) Prepare accumulators
-    let mut sum_ame = vec![0.0; k];          // sum partial effects
+    // 7) Prepare accumulators.
+    // We keep two sets of accumulators:
+    //  - one for your "rust" (original) method,
+    //  - one for the statsmodels ("sm") method.
+    let mut sum_ame = vec![0.0; k];
     let mut partial_jl_sums = vec![0.0; k * k];
+    let mut sm_sum_ame = vec![0.0; k];
+    let mut sm_partial_jl_sums = vec![0.0; k * k];
     let normal = Normal::new(0.0, 1.0).unwrap();
-    // 8) single pass with chunk
+    // 8) Process data in chunks
     let mut idx_start = 0;
     while idx_start < n {
         let idx_end = (idx_start + chunk).min(n);
         let x_chunk = X.slice(s![idx_start..idx_end, ..]);
         let z_chunk = x_chunk.dot(&beta);  // shape(n_chunk)
-        // pdf => we might do partial for continuous
         let pdf_chunk = z_chunk.mapv(|z| pdf_logit_probit(is_logit, z));
-        // handle discrete
+        // Handle discrete variables (same for both methods)
         for &j in &is_discrete {
             let xj_col = x_chunk.column(j);
             let b_j = beta[j];
-            // z_j1 => z + (1-xj)*b_j
-            // z_j0 => z - xj*b_j
+            // Compute z for x_j set to 1 and 0:
             let delta_j1 = (1.0 - &xj_col).mapv(|x| x * b_j);
             let delta_j0 = xj_col.mapv(|x| -x * b_j);
             let z_j1 = &z_chunk + &delta_j1;
@@ -183,16 +210,14 @@ fn ame<'py>(
             let cdf_j1 = z_j1.mapv(|z| cdf_logit_probit(is_logit, z));
             let cdf_j0 = z_j0.mapv(|z| cdf_logit_probit(is_logit, z));
-            // sum
             let effect_sum = cdf_j1.sum() - cdf_j0.sum();
             sum_ame[j] += effect_sum;
+            sm_sum_ame[j] += effect_sum;
-            // partial_jl_sums => row j, col l
             let pdf_j1 = z_j1.mapv(|z| pdf_logit_probit(is_logit, z));
             let pdf_j0 = z_j0.mapv(|z| pdf_logit_probit(is_logit, z));
             for l in 0..k {
                 let grad = if l == j {
-                    // special case
                     pdf_j1.sum()
                 } else {
                     let x_l = x_chunk.column(l);
@@ -200,42 +225,65 @@ fn ame<'py>(
                     diff_pdf.dot(&x_l)
                 };
                 partial_jl_sums[j * k + l] += grad;
+                sm_partial_jl_sums[j * k + l] += grad;
             }
         }
-        // handle continuous
+        // Handle continuous variables.
         for j in 0..k {
             if intercept_indices.contains(&j) || is_discrete.contains(&j) {
                 continue;
             }
             let b_j = beta[j];
-            // sum_ame
-            sum_ame[j] += b_j * pdf_chunk.sum();
-            // partial_jl_sums => row j, col l
-            for l in 0..k {
-                let grad = if j == l {
-                    pdf_chunk.sum()
-                } else {
-                    // - b_j * sum(z_chunk * x_col(l) * pdf_chunk)
+            if se_method_str == "rust" {
+                // Original method: update using b_j * pdf
+                sum_ame[j] += b_j * pdf_chunk.sum();
+                for l in 0..k {
+                    let grad = if j == l {
+                        pdf_chunk.sum()
+                    } else {
+                        let x_l = x_chunk.column(l);
+                        -b_j * (&z_chunk * &x_l).dot(&pdf_chunk)
+                    };
+                    partial_jl_sums[j * k + l] += grad;
+                }
+            } else if se_method_str == "sm" {
+                // statsmodels-style: use the derivative of the pdf.
+                let fprime_chunk = z_chunk.mapv(|z| pdf_deriv_logit_probit(is_logit, z));
+                sm_sum_ame[j] += b_j * pdf_chunk.sum();
+                for l in 0..k {
                     let x_l = x_chunk.column(l);
-                    // careful about sign from the original code
-                    -b_j * (&z_chunk * &x_l).dot(&pdf_chunk)
-                };
-                partial_jl_sums[j * k + l] += grad;
+                    let term = (&x_l * &fprime_chunk).sum();
+                    let grad = if j == l {
+                        pdf_chunk.sum() + b_j * term
+                    } else {
+                        b_j * term
+                    };
+                    sm_partial_jl_sums[j * k + l] += grad;
+                }
             }
         }
         idx_start = idx_end;
     }
-    // 9) average sums
-    let ame: Vec<f64> = sum_ame.iter().map(|v| v / (n as f64)).collect();
+    // 9) Average sums over n.
+    let final_ame: Vec<f64> = if se_method_str == "sm" {
+        sm_sum_ame.iter().map(|v| v / (n as f64)).collect()
+    } else {
+        sum_ame.iter().map(|v| v / (n as f64)).collect()
+    };
-    // gradient matrix => shape(k,k)
-    let mut grad_ame = Array2::<f64>::zeros((k,k));
+    // Build gradient matrix: shape (k, k)
+    let mut grad_ame = Array2::<f64>::zeros((k, k));
     for j in 0..k {
         for l in 0..k {
-            grad_ame[[j,l]] = partial_jl_sums[j * k + l] / (n as f64);
+            let value = if se_method_str == "sm" {
+                sm_partial_jl_sums[j * k + l] / (n as f64)
+            } else {
+                partial_jl_sums[j * k + l] / (n as f64)
+            };
+            grad_ame[[j, l]] = value;
         }
     }
@@ -253,13 +301,13 @@ fn ame<'py>(
         if intercept_indices.contains(&j) {
             continue;
         }
-        let dy = ame[j];
+        let dy = final_ame[j];
         let s = se_ame[j];
         dy_dx.push(dy);
         se_err.push(s);
         if s > 1e-15 {
             let z = dy / s;
-            let p = 2.0*(1.0 - normal.cdf(z.abs()));
+            let p = 2.0 * (1.0 - normal.cdf(z.abs()));
             z_vals.push(z);
             p_vals.push(p);
             sig.push(add_significance_stars(p));
@@ -288,7 +336,6 @@ fn ame<'py>(
     Ok(df)
 }
 #[pymodule]
 fn febolt(_py: Python, m: &PyModule) -> PyResult<()> {
     m.add_function(wrap_pyfunction!(ame, m)?)?;

{febolt-0.1.57 → febolt-0.1.59}/.github/workflows/CI.yml RENAMED Viewed

File without changes

{febolt-0.1.57 → febolt-0.1.59}/.gitignore RENAMED Viewed

File without changes

{febolt-0.1.57 → febolt-0.1.59}/README.md RENAMED Viewed

File without changes

{febolt-0.1.57 → febolt-0.1.59}/build.rs RENAMED Viewed

File without changes

febolt 0.1.57__tar.gz → 0.1.59__tar.gz

febolt 0.1.57tar.gz → 0.1.59tar.gz