PyPI - febolt - Versions diffs - 0.1.58__tar.gz → 0.1.60__tar.gz - Mend

febolt 0.1.58tar.gz → 0.1.60tar.gz

Files changed (9) hide show

{febolt-0.1.58 → febolt-0.1.60}/Cargo.lock RENAMED Viewed

@@ -362,7 +362,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
 [[package]]
 name = "febolt"
-version = "0.1.58"
+version = "0.1.60"
 dependencies = [
  "blas",
  "intel-mkl-src",

{febolt-0.1.58 → febolt-0.1.60}/Cargo.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [package]
 name = "febolt"
-version = "0.1.58"
+version = "0.1.60"
 edition = "2021"
 description = "Statistics library for Python powered by Rust"
 license = "MIT"

{febolt-0.1.58 → febolt-0.1.60}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: febolt
-Version: 0.1.58
+Version: 0.1.60
 Classifier: Programming Language :: Rust
 Classifier: Programming Language :: Python :: Implementation :: CPython
 Classifier: Programming Language :: Python :: Implementation :: PyPy

{febolt-0.1.58 → febolt-0.1.60}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "maturin"
 [project]
 name = "febolt"
-version = "0.1.58"
+version = "0.1.60"
 requires-python = ">=3.8"
 description = "A Rust-based Statistics and ML package, callable from Python."
 keywords = ["rust", "python", "Machine Learning", "Statistics", "pyo3"]

{febolt-0.1.58 → febolt-0.1.60}/src/lib.rs RENAMED Viewed

@@ -82,13 +82,32 @@ fn pdf_logit_probit(is_logit: bool, z: f64) -> f64 {
     }
 }
-/// The main function that calculates Logit/Probit AME in the same style as the original Probit code
+/// Derivative of the pdf with respect to z
+fn pdf_deriv_logit_probit(is_logit: bool, z: f64) -> f64 {
+    if is_logit {
+        // For logistic: f'(z) = f(z) * (1 - 2 * F(z))
+        pdf_logit_probit(is_logit, z) * (1.0 - 2.0 * cdf_logit_probit(is_logit, z))
+    } else {
+        // For normal: f'(z) = -z * phi(z)
+        -z * pdf_logit_probit(is_logit, z)
+    }
+}
+/// The main function that calculates Logit/Probit AME in the same style as the original Probit code.
+///
+/// An extra parameter `se_method` (optional, default "rust") controls the SE calculation:
+///  - "rust": uses your original gradient (Jacobian) calculation
+///  - "sm": uses a statsmodels-style gradient for continuous variables
 #[pyfunction]
 fn ame<'py>(
     py: Python<'py>,
-    model: &'py PyAny,   // Could be Logit or Probit
-    chunk_size: Option<usize>, // optional chunk
+    model: &'py PyAny,              // Could be Logit or Probit
+    chunk_size: Option<usize>,      // Optional chunk size
+    se_method: Option<&str>,        // "rust" (default) or "sm"
 ) -> PyResult<&'py PyAny> {
+    // Determine which SE method to use.
+    let se_method_str = se_method.unwrap_or("rust");
     // 1) detect Logit vs Probit
     let is_logit = detect_model_type(model)?;
@@ -100,7 +119,7 @@ fn ame<'py>(
         params_obj.downcast::<PyArray1<f64>>()?
     };
     let beta = unsafe { params_pyarray.as_array() };
     // 3) read cov (handle pandas DataFrame)
     let cov_obj = model.call_method0("cov_params")?;
     let cov_pyarray = if let Ok(values) = cov_obj.getattr("values") {
@@ -112,17 +131,13 @@ fn ame<'py>(
     // 4) Get model object and handle exog (X) and exog_names
     let model_obj = model.getattr("model").unwrap_or(model);
-    // Handle pandas DataFrame input
     let exog_py = model_obj.getattr("exog")?;
     let (x_pyarray, exog_names) = if let Ok(values) = exog_py.getattr("values") {
-        // Pandas DataFrame path
         (
             values.downcast::<PyArray2<f64>>()?,
             exog_py.getattr("columns")?.extract::<Vec<String>>()?
         )
     } else {
-        // Numpy array path
         (
             exog_py.downcast::<PyArray2<f64>>()?,
             model_obj.getattr("exog_names")?.extract::<Vec<String>>()?
@@ -165,27 +180,29 @@ fn ame<'py>(
         })
         .collect();
-    // 7) Prepare accumulators
-    let mut sum_ame = vec![0.0; k];          // sum partial effects
+    // 7) Prepare accumulators.
+    // We keep two sets of accumulators:
+    //  - one for your "rust" (original) method,
+    //  - one for the statsmodels ("sm") method.
+    let mut sum_ame = vec![0.0; k];
     let mut partial_jl_sums = vec![0.0; k * k];
+    let mut sm_sum_ame = vec![0.0; k];
+    let mut sm_partial_jl_sums = vec![0.0; k * k];
     let normal = Normal::new(0.0, 1.0).unwrap();
-    // 8) single pass with chunk
+    // 8) Process data in chunks
     let mut idx_start = 0;
     while idx_start < n {
         let idx_end = (idx_start + chunk).min(n);
         let x_chunk = X.slice(s![idx_start..idx_end, ..]);
         let z_chunk = x_chunk.dot(&beta);  // shape(n_chunk)
-        // pdf => we might do partial for continuous
         let pdf_chunk = z_chunk.mapv(|z| pdf_logit_probit(is_logit, z));
-        // handle discrete
+        // Handle discrete variables (same for both methods)
         for &j in &is_discrete {
             let xj_col = x_chunk.column(j);
             let b_j = beta[j];
-            // z_j1 => z + (1-xj)*b_j
-            // z_j0 => z - xj*b_j
+            // Compute z for x_j set to 1 and 0:
             let delta_j1 = (1.0 - &xj_col).mapv(|x| x * b_j);
             let delta_j0 = xj_col.mapv(|x| -x * b_j);
             let z_j1 = &z_chunk + &delta_j1;
@@ -193,16 +210,14 @@ fn ame<'py>(
             let cdf_j1 = z_j1.mapv(|z| cdf_logit_probit(is_logit, z));
             let cdf_j0 = z_j0.mapv(|z| cdf_logit_probit(is_logit, z));
-            // sum
             let effect_sum = cdf_j1.sum() - cdf_j0.sum();
             sum_ame[j] += effect_sum;
+            sm_sum_ame[j] += effect_sum;
-            // partial_jl_sums => row j, col l
             let pdf_j1 = z_j1.mapv(|z| pdf_logit_probit(is_logit, z));
             let pdf_j0 = z_j0.mapv(|z| pdf_logit_probit(is_logit, z));
             for l in 0..k {
                 let grad = if l == j {
-                    // special case
                     pdf_j1.sum()
                 } else {
                     let x_l = x_chunk.column(l);
@@ -210,42 +225,65 @@ fn ame<'py>(
                     diff_pdf.dot(&x_l)
                 };
                 partial_jl_sums[j * k + l] += grad;
+                sm_partial_jl_sums[j * k + l] += grad;
             }
         }
-        // handle continuous
+        // Handle continuous variables.
         for j in 0..k {
             if intercept_indices.contains(&j) || is_discrete.contains(&j) {
                 continue;
             }
             let b_j = beta[j];
-            // sum_ame
-            sum_ame[j] += b_j * pdf_chunk.sum();
-            // partial_jl_sums => row j, col l
-            for l in 0..k {
-                let grad = if j == l {
-                    pdf_chunk.sum()
-                } else {
-                    // - b_j * sum(z_chunk * x_col(l) * pdf_chunk)
+            if se_method_str == "rust" {
+                // Original method: update using b_j * pdf
+                sum_ame[j] += b_j * pdf_chunk.sum();
+                for l in 0..k {
+                    let grad = if j == l {
+                        pdf_chunk.sum()
+                    } else {
+                        let x_l = x_chunk.column(l);
+                        -b_j * (&z_chunk * &x_l).dot(&pdf_chunk)
+                    };
+                    partial_jl_sums[j * k + l] += grad;
+                }
+            } else if se_method_str == "sm" {
+                // statsmodels-style: use the derivative of the pdf.
+                let fprime_chunk = z_chunk.mapv(|z| pdf_deriv_logit_probit(is_logit, z));
+                sm_sum_ame[j] += b_j * pdf_chunk.sum();
+                for l in 0..k {
                     let x_l = x_chunk.column(l);
-                    // careful about sign from the original code
-                    -b_j * (&z_chunk * &x_l).dot(&pdf_chunk)
-                };
-                partial_jl_sums[j * k + l] += grad;
+                    let term = (&x_l * &fprime_chunk).sum();
+                    let grad = if j == l {
+                        pdf_chunk.sum() + b_j * term
+                    } else {
+                        b_j * term
+                    };
+                    sm_partial_jl_sums[j * k + l] += grad;
+                }
             }
         }
         idx_start = idx_end;
     }
-    // 9) average sums
-    let ame: Vec<f64> = sum_ame.iter().map(|v| v / (n as f64)).collect();
+    // 9) Average sums over n.
+    let final_ame: Vec<f64> = if se_method_str == "sm" {
+        sm_sum_ame.iter().map(|v| v / (n as f64)).collect()
+    } else {
+        sum_ame.iter().map(|v| v / (n as f64)).collect()
+    };
-    // gradient matrix => shape(k,k)
-    let mut grad_ame = Array2::<f64>::zeros((k,k));
+    // Build gradient matrix: shape (k, k)
+    let mut grad_ame = Array2::<f64>::zeros((k, k));
     for j in 0..k {
         for l in 0..k {
-            grad_ame[[j,l]] = partial_jl_sums[j * k + l] / (n as f64);
+            let value = if se_method_str == "sm" {
+                sm_partial_jl_sums[j * k + l] / (n as f64)
+            } else {
+                partial_jl_sums[j * k + l] / (n as f64)
+            };
+            grad_ame[[j, l]] = value;
         }
     }
@@ -257,37 +295,45 @@ fn ame<'py>(
     // 10) Build final results
     let (mut dy_dx, mut se_err, mut z_vals, mut p_vals, mut sig) =
         (Vec::new(), Vec::new(), Vec::new(), Vec::new(), Vec::new());
+    let (mut conf_low, mut conf_high) = (Vec::new(), Vec::new());
     let mut names_out = Vec::new();
     for j in 0..k {
         if intercept_indices.contains(&j) {
             continue;
         }
-        let dy = ame[j];
+        let dy = final_ame[j];
         let s = se_ame[j];
         dy_dx.push(dy);
         se_err.push(s);
         if s > 1e-15 {
             let z = dy / s;
-            let p = 2.0*(1.0 - normal.cdf(z.abs()));
+            let p = 2.0 * (1.0 - normal.cdf(z.abs()));
             z_vals.push(z);
             p_vals.push(p);
             sig.push(add_significance_stars(p));
+            // Compute 95% confidence interval using 1.96 * SE
+            conf_low.push(dy - 1.96 * s);
+            conf_high.push(dy + 1.96 * s);
         } else {
             z_vals.push(0.0);
             p_vals.push(1.0);
             sig.push("");
+            conf_low.push(dy);
+            conf_high.push(dy);
         }
         names_out.push(exog_names[j].clone());
     }
-    // 11) Create DataFrame
+    // 11) Create DataFrame with extra confidence interval columns.
     let pd = py.import("pandas")?;
     let data = PyDict::new(py);
     data.set_item("dy/dx", &dy_dx)?;
     data.set_item("Std. Err", &se_err)?;
     data.set_item("z", &z_vals)?;
     data.set_item("Pr(>|z|)", &p_vals)?;
+    data.set_item("Conf. Int. Low", &conf_low)?;
+    data.set_item("Conf. Int. Hi", &conf_high)?;
     data.set_item("Significance", &sig)?;
     let kwargs = PyDict::new(py);
@@ -298,9 +344,9 @@ fn ame<'py>(
     Ok(df)
 }
 #[pymodule]
 fn febolt(_py: Python, m: &PyModule) -> PyResult<()> {
     m.add_function(wrap_pyfunction!(ame, m)?)?;
     Ok(())
 }

{febolt-0.1.58 → febolt-0.1.60}/.github/workflows/CI.yml RENAMED Viewed

File without changes

{febolt-0.1.58 → febolt-0.1.60}/.gitignore RENAMED Viewed

File without changes

{febolt-0.1.58 → febolt-0.1.60}/README.md RENAMED Viewed

File without changes

{febolt-0.1.58 → febolt-0.1.60}/build.rs RENAMED Viewed

File without changes

febolt 0.1.58__tar.gz → 0.1.60__tar.gz

febolt 0.1.58tar.gz → 0.1.60tar.gz