PyPI - marginaleffects - Versions diffs - 0.5.1__tar.gz → 0.6.0__tar.gz - Mend

marginaleffects 0.5.1tar.gz → 0.6.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (129) hide show

{marginaleffects-0.5.1 → marginaleffects-0.6.0}/PKG-INFO RENAMED Viewed

@@ -1,11 +1,12 @@
 Metadata-Version: 2.4
 Name: marginaleffects
-Version: 0.5.1
+Version: 0.6.0
 Summary: Predictions, counterfactual comparisons, slopes, and hypothesis tests for statistical models.
 License-Expression: GPL-3.0-or-later
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 Requires-Dist: formulaic>=1.0.2
+Requires-Dist: jax>=0.4.0
 Requires-Dist: narwhals>=1.34.0
 Requires-Dist: numpy>=2.0.0
 Requires-Dist: patsy>=1.0.1
@@ -14,8 +15,6 @@ Requires-Dist: pydantic>=2.10.3
 Requires-Dist: plotnine>=0.14.5
 Requires-Dist: scipy>=1.14.1
 Requires-Dist: pyarrow>=19.0.1
-Provides-Extra: autodiff
-Requires-Dist: jax>=0.4.0; extra == "autodiff"
 Provides-Extra: test
 Requires-Dist: duckdb>=1.1.2; extra == "test"
 Requires-Dist: matplotlib>=3.7.1; extra == "test"

{marginaleffects-0.5.1 → marginaleffects-0.6.0}/marginaleffects/autodiff/__init__.py RENAMED Viewed

@@ -29,18 +29,14 @@ if _JAX_AVAILABLE:
     jax.config.update("jax_enable_x64", True)
-    # Import submodules to make them accessible
-    from . import linear as linear
     from . import glm as glm
+    from . import pipeline as pipeline
-    # Re-export types for convenience
-    from .comparisons import ComparisonType as ComparisonType
     from .glm.families import Family as Family, Link as Link
     __all__ = [
-        "linear",
         "glm",
-        "ComparisonType",
+        "pipeline",
         "Family",
         "Link",
     ]
@@ -50,13 +46,8 @@ else:
         def __getattr__(self, name):
             _raise_jax_error()
-    linear = _DummyModule()
     glm = _DummyModule()
-    # Create dummy enums that raise errors
-    class ComparisonType:
-        def __getattribute__(self, name):
-            _raise_jax_error()
+    pipeline = _DummyModule()
     class Family:
         def __getattribute__(self, name):
@@ -66,4 +57,4 @@ else:
         def __getattribute__(self, name):
             _raise_jax_error()
-    __all__ = []
+    __all__ = ["glm", "pipeline", "Family", "Link"]

marginaleffects-0.6.0/marginaleffects/autodiff/glm/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from . import families as families

marginaleffects-0.6.0/marginaleffects/autodiff/lower.py ADDED Viewed

@@ -0,0 +1,219 @@
+from __future__ import annotations
+from dataclasses import dataclass
+import warnings
+import numpy as np
+from ..settings import is_autodiff_enabled, is_autodiff_forced
+from ..uncertainty import get_se
+from .ops import COMPARISON_OPS
+@dataclass
+class Lowered:
+    ok: bool
+    kwargs: dict | None = None
+    reason: str = ""
+@dataclass
+class AutodiffResult:
+    std_error: np.ndarray
+    jacobian: np.ndarray
+def _fail(reason: str) -> Lowered:
+    return Lowered(False, kwargs=None, reason=reason)
+def _model_args(model):
+    args = model.get_autodiff_args()
+    if args is None:
+        return None, _fail("")
+    if isinstance(args, str):
+        return None, _fail(args)
+    return args, None
+def _coefs_or_failure(model):
+    coefs = np.asarray(model.get_coef(), dtype=float).reshape(-1)
+    if np.isnan(coefs).any():
+        return None, _fail("models with NA coefficients")
+    return coefs, None
+def _design_or_failure(X, coefs, n_pred):
+    X = np.asarray(X, dtype=float)
+    if X.ndim != 2 or X.shape[1] != coefs.size or X.shape[0] != n_pred:
+        return None, _fail("this model/data configuration")
+    return X, None
+def _hypothesis_or_failure(plan):
+    if plan.hyp is None:
+        return None, None
+    if plan.hyp.kind != "matrix":
+        return None, _fail("this form of the `hypothesis` argument")
+    return np.asarray(plan.hyp.H, dtype=float), None
+def _has_nan(x) -> bool:
+    return x is not None and np.isnan(np.asarray(x, dtype=float)).any()
+def lower_predictions(plan, model) -> Lowered:
+    args, failure = _model_args(model)
+    if failure is not None:
+        return failure
+    coefs, failure = _coefs_or_failure(model)
+    if failure is not None:
+        return failure
+    X, failure = _design_or_failure(plan.exog, coefs, plan.n_pred)
+    if failure is not None:
+        return failure
+    if plan.align is not None:
+        return _fail("models with grouped/multi-equation outcomes")
+    if plan.has_na:
+        return _fail("missing values in predictions")
+    H, failure = _hypothesis_or_failure(plan)
+    if failure is not None:
+        return failure
+    agg_segments = None
+    agg_num_segments = None
+    agg_weights = None
+    if plan.agg is not None:
+        agg_segments = np.empty(plan.n_pred, dtype=np.int32)
+        agg_weights = np.ones(plan.n_pred, dtype=float)
+        for i, group in enumerate(plan.agg):
+            agg_segments[group.idx] = i
+            if group.w is not None:
+                if _has_nan(group.w):
+                    return _fail("missing values in weights")
+                agg_weights[group.idx] = np.asarray(group.w, dtype=float)
+        agg_num_segments = len(plan.agg)
+    kwargs = {
+        **args,
+        "X": X,
+        "agg_segments": agg_segments,
+        "agg_num_segments": agg_num_segments,
+        "agg_weights": agg_weights if agg_segments is not None else None,
+        "H": H,
+    }
+    return Lowered(True, kwargs=kwargs)
+def lower_comparisons(plan, model) -> Lowered:
+    args, failure = _model_args(model)
+    if failure is not None:
+        return failure
+    coefs, failure = _coefs_or_failure(model)
+    if failure is not None:
+        return failure
+    X_hi, failure = _design_or_failure(plan.exog_hi, coefs, plan.n_pred)
+    if failure is not None:
+        return failure
+    X_lo, failure = _design_or_failure(plan.exog_lo, coefs, plan.n_pred)
+    if failure is not None:
+        return failure
+    if plan.align is not None:
+        return _fail("models with grouped/multi-equation outcomes")
+    if plan.has_na:
+        return _fail("missing values in predictions")
+    if any(group.fun_key is None for group in plan.groups):
+        return _fail("custom comparison functions")
+    if plan.need_y:
+        return _fail("elasticities")
+    for group in plan.groups:
+        if group.fun_key not in COMPARISON_OPS:
+            return _fail(f"comparison='{group.fun_key}'")
+    H, failure = _hypothesis_or_failure(plan)
+    if failure is not None:
+        return failure
+    if not plan.groups:
+        order = np.asarray([], dtype=int)
+    else:
+        order = np.concatenate([group.idx for group in plan.groups]).astype(int)
+    if order.size != plan.n_pred:
+        return _fail("this model/data configuration")
+    ops = []
+    for group in plan.groups:
+        spec = COMPARISON_OPS[group.fun_key]
+        w = None
+        if spec.weighted:
+            if _has_nan(group.w):
+                return _fail("missing values in weights")
+            w = None if group.w is None else np.asarray(group.w, dtype=float)
+        ops.append({"op": spec.pipeline_op, "n": len(group.idx), "w": w})
+    kwargs = {
+        **args,
+        "X_hi": X_hi[order],
+        "X_lo": X_lo[order],
+        "ops": ops,
+        "H": H,
+    }
+    return Lowered(True, kwargs=kwargs)
+def _warn_unsupported(reason):
+    if reason:
+        warnings.warn(
+            "Automatic differentiation does not support "
+            f"{reason}. Reverting to finite differences.",
+            UserWarning,
+            stacklevel=3,
+        )
+def autodiff_try(plan, model, V, estimate, kind):
+    if plan is None or V is None or not is_autodiff_enabled():
+        return None
+    warn_on_fallback = is_autodiff_forced()
+    lowered = (
+        lower_predictions(plan, model)
+        if kind == "predictions"
+        else lower_comparisons(plan, model)
+    )
+    if not lowered.ok:
+        if warn_on_fallback:
+            _warn_unsupported(lowered.reason)
+        return None
+    try:
+        from . import pipeline
+        result = pipeline.compute(beta=model.get_coef(), **lowered.kwargs)
+    except Exception as exc:
+        if warn_on_fallback:
+            warnings.warn(
+                "Automatic differentiation failed "
+                f"({exc}). Reverting to finite differences.",
+                UserWarning,
+                stacklevel=3,
+            )
+        return None
+    estimate = np.asarray(estimate, dtype=float).reshape(-1)
+    if not np.allclose(result["estimate"], estimate, rtol=1e-8, atol=1e-8):
+        if warn_on_fallback:
+            warnings.warn(
+                "Automatic differentiation estimates did not match the standard "
+                "pipeline. Reverting to finite differences.",
+                UserWarning,
+                stacklevel=3,
+            )
+        return None
+    # Coef/vcov positional alignment is guaranteed by each adapter vault.
+    J = np.asarray(result["jacobian"], dtype=float)
+    se = get_se(J, V)
+    se[se == 0] = np.nan
+    return AutodiffResult(std_error=se, jacobian=J)

marginaleffects-0.6.0/marginaleffects/autodiff/ops.py ADDED Viewed

@@ -0,0 +1,60 @@
+"""Shared comparison operation registry for autodiff lowering and execution."""
+from __future__ import annotations
+from collections.abc import Callable
+from dataclasses import dataclass
+from typing import Any
+ArrayFn = Callable[[list[Any]], Any]
+WMeanFn = Callable[[Any, Any | None], Any]
+EstimateFn = Callable[[Any, Any, Any | None, ArrayFn, WMeanFn], Any]
+@dataclass(frozen=True)
+class PipelineOp:
+    estimate: EstimateFn
+    scalar: bool
+    def output_size(self, n: int) -> int:
+        return 1 if self.scalar else n
+@dataclass(frozen=True)
+class ComparisonOp:
+    pipeline_op: str
+    weighted: bool
+def _difference(hi, lo, _w, _array, _wmean):
+    return hi - lo
+def _ratio(hi, lo, _w, _array, _wmean):
+    return hi / lo
+def _differenceavg(hi, lo, w, array, wmean):
+    return array([wmean(hi, w) - wmean(lo, w)])
+def _ratioavg(hi, lo, w, array, wmean):
+    return array([wmean(hi, w) / wmean(lo, w)])
+PIPELINE_OPS = {
+    "difference": PipelineOp(estimate=_difference, scalar=False),
+    "ratio": PipelineOp(estimate=_ratio, scalar=False),
+    "differenceavg": PipelineOp(estimate=_differenceavg, scalar=True),
+    "ratioavg": PipelineOp(estimate=_ratioavg, scalar=True),
+}
+COMPARISON_OPS = {
+    "difference": ComparisonOp(pipeline_op="difference", weighted=False),
+    "ratio": ComparisonOp(pipeline_op="ratio", weighted=False),
+    "differenceavg": ComparisonOp(pipeline_op="differenceavg", weighted=False),
+    "ratioavg": ComparisonOp(pipeline_op="ratioavg", weighted=False),
+    "differenceavgwts": ComparisonOp(pipeline_op="differenceavg", weighted=True),
+    "ratioavgwts": ComparisonOp(pipeline_op="ratioavg", weighted=True),
+}

marginaleffects-0.6.0/marginaleffects/autodiff/pipeline.py ADDED Viewed

@@ -0,0 +1,237 @@
+"""Composable JAX pipeline for R-side plan lowering."""
+from __future__ import annotations
+from functools import partial
+import numpy as np
+import jax
+import jax.numpy as jnp
+from .glm.families import Family, Link, linkinv, resolve_link
+from .ops import PIPELINE_OPS
+_FAMILY = {
+    "gaussian": Family.GAUSSIAN,
+    "binomial": Family.BINOMIAL,
+    "poisson": Family.POISSON,
+    "gamma": Family.GAMMA,
+    "Gamma": Family.GAMMA,
+}
+_LINK = {
+    "identity": Link.IDENTITY,
+    "log": Link.LOG,
+    "logit": Link.LOGIT,
+    "probit": Link.PROBIT,
+    "inverse": Link.INVERSE,
+    "sqrt": Link.SQRT,
+    "cloglog": Link.CLOGLOG,
+}
+def _resolve_family_link(family: str | None, link: str | None) -> int | None:
+    if family is None and link is None:
+        return None
+    try:
+        family_type = _FAMILY[family]
+        link_type = _LINK[link] if link is not None else None
+    except KeyError as exc:
+        raise ValueError(f"Unsupported GLM family/link: {family}/{link}") from exc
+    return resolve_link(family_type, link_type)
+def _wmean(x, w):
+    if w is None:
+        return jnp.mean(x)
+    w = jnp.asarray(w, dtype=jnp.float64)
+    return jnp.sum(x * w) / jnp.sum(w)
+def _asarray1(x):
+    return jnp.asarray(x, dtype=jnp.float64)
+def _apply_agg(est, segments, num_segments, weights):
+    if segments is None:
+        return est
+    segments = jnp.asarray(segments, dtype=jnp.int32)
+    if weights is None:
+        weights = jnp.ones_like(est, dtype=jnp.float64)
+    else:
+        weights = jnp.asarray(weights, dtype=jnp.float64)
+    numer = jax.ops.segment_sum(est * weights, segments, num_segments=num_segments)
+    denom = jax.ops.segment_sum(weights, segments, num_segments=num_segments)
+    return numer / denom
+def _comparison_estimates(mu_hi, mu_lo, ops_meta, ops_weights):
+    pieces = []
+    start = 0
+    w_iter = iter(ops_weights)
+    for name, n, has_w in ops_meta:
+        stop = start + n
+        hi = mu_hi[start:stop]
+        lo = mu_lo[start:stop]
+        w = next(w_iter) if has_w else None
+        spec = PIPELINE_OPS[name]
+        pieces.append(spec.estimate(hi, lo, w, _asarray1, _wmean))
+        start = stop
+    if not pieces:
+        return jnp.asarray([], dtype=jnp.float64)
+    return jnp.concatenate(pieces)
+def _predict(model_type, link_type, x, b):
+    eta = x @ b
+    if model_type == "glm":
+        return linkinv(link_type, eta)
+    return eta
+# The whole estimate+Jacobian computation is compiled with XLA. Traced arrays
+# only affect the cache key through shape/dtype, so repeated calls at the same
+# problem size recompile nothing; a new shape or plan structure pays one
+# compilation.
+@partial(
+    jax.jit,
+    static_argnames=(
+        "model_type",
+        "link_type",
+        "ops_meta",
+        "agg_num_segments",
+        "use_fwd",
+    ),
+)
+def _estimate_and_jacobian(
+    beta,
+    model_type,
+    link_type,
+    X,
+    X_hi,
+    X_lo,
+    ops_meta,
+    ops_weights,
+    est_keep,
+    agg_segments,
+    agg_num_segments,
+    agg_weights,
+    H,
+    use_fwd,
+):
+    def f(b):
+        if X is not None:
+            est = _predict(model_type, link_type, X, b)
+        else:
+            mu_hi = _predict(model_type, link_type, X_hi, b)
+            mu_lo = _predict(model_type, link_type, X_lo, b)
+            est = _comparison_estimates(mu_hi, mu_lo, ops_meta, ops_weights)
+        if est_keep is not None:
+            est = est[est_keep]
+        est = _apply_agg(est, agg_segments, agg_num_segments, agg_weights)
+        if H is not None:
+            est = est @ H
+        return jnp.atleast_1d(est)
+    estimate = f(beta)
+    jac_fun = jax.jacfwd if use_fwd else jax.jacrev
+    jacobian = jac_fun(f)(beta)
+    jacobian = jnp.reshape(jacobian, (estimate.size, beta.size))
+    return estimate, jacobian
+def compute(
+    beta,
+    model_type,
+    family=None,
+    link=None,
+    X=None,
+    X_hi=None,
+    X_lo=None,
+    ops=None,
+    est_keep=None,
+    agg_segments=None,
+    agg_num_segments=None,
+    agg_weights=None,
+    H=None,
+):
+    """Return estimate and Jacobian for a lowered autodiff plan."""
+    beta = jnp.asarray(beta, dtype=jnp.float64)
+    if model_type not in ("linear", "glm"):
+        raise ValueError(f"Unsupported model_type: {model_type}")
+    link_type = _resolve_family_link(family, link) if model_type == "glm" else None
+    if X is not None:
+        X = jnp.asarray(X, dtype=jnp.float64)
+    if X_hi is not None:
+        X_hi = jnp.asarray(X_hi, dtype=jnp.float64)
+    if X_lo is not None:
+        X_lo = jnp.asarray(X_lo, dtype=jnp.float64)
+    if est_keep is not None:
+        est_keep = jnp.asarray(est_keep, dtype=jnp.int32)
+    if agg_segments is not None:
+        agg_segments = jnp.asarray(agg_segments, dtype=jnp.int32)
+        agg_num_segments = int(agg_num_segments)
+    else:
+        agg_num_segments = None
+    if agg_weights is not None:
+        agg_weights = jnp.asarray(agg_weights, dtype=jnp.float64)
+    if H is not None:
+        H = jnp.asarray(H, dtype=jnp.float64)
+    # Split ops into a hashable static structure (jit cache key) and traced
+    # weight arrays, while tallying the pre-keep estimate length.
+    ops_meta = ()
+    ops_weights = ()
+    if X is not None:
+        n_est = X.shape[0]
+    else:
+        n_est = 0
+        for op in ops or []:
+            name = op["op"]
+            if name not in PIPELINE_OPS:
+                raise ValueError(f"Unsupported comparison op: {name}")
+            spec = PIPELINE_OPS[name]
+            n = int(op["n"])
+            w = op.get("w")
+            ops_meta = ops_meta + ((name, n, w is not None),)
+            if w is not None:
+                ops_weights = ops_weights + (jnp.asarray(w, dtype=jnp.float64),)
+            n_est += spec.output_size(n)
+    # Output size is known from shapes alone; it picks forward vs reverse mode
+    # before tracing.
+    n_out = n_est
+    if est_keep is not None:
+        n_out = est_keep.shape[0]
+    if agg_segments is not None:
+        n_out = agg_num_segments
+    if H is not None:
+        n_out = H.shape[1]
+    use_fwd = beta.size <= max(n_out, 1)
+    estimate, jacobian = _estimate_and_jacobian(
+        beta,
+        model_type,
+        link_type,
+        X,
+        X_hi,
+        X_lo,
+        ops_meta,
+        ops_weights,
+        est_keep,
+        agg_segments,
+        agg_num_segments,
+        agg_weights,
+        H,
+        use_fwd,
+    )
+    return {
+        "estimate": np.asarray(estimate, dtype=np.float64),
+        "jacobian": np.asarray(jacobian, dtype=np.float64),
+    }

marginaleffects 0.5.1__tar.gz → 0.6.0__tar.gz

marginaleffects 0.5.1tar.gz → 0.6.0tar.gz