PyPI - SearchLibrium - Versions diffs - 0.0.83__tar.gz → 0.0.85__tar.gz - Mend

SearchLibrium 0.0.83tar.gz → 0.0.85tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

{searchlibrium-0.0.83 → searchlibrium-0.0.85}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: SearchLibrium
-Version: 0.0.83
+Version: 0.0.85
 Summary: A Python package for econometric models driven by search
 Author: Alexander Paz Prithvi Beeramole, Robert Burdett
 Author-email: Zeke Ahern <z.ahern@qut.edu.au>

{searchlibrium-0.0.83 → searchlibrium-0.0.85}/pyproject.toml RENAMED Viewed

@@ -59,7 +59,7 @@ Homepage = "https://github.com/zahern/HypothesisX"
 realpython = "SearchLibrium.__main__:main"
 [tool.bumpver]
-current_version = "0.0.83"
+current_version = "0.0.85"
 version_pattern = "MAJOR.MINOR.PATCH"
 commit_message = "[skip ci] Bump version {old_version} -> {new_version}"
 commit = true

{searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/__init__.py RENAMED Viewed

@@ -55,7 +55,13 @@ def new_features():
            """)
 def get_version_from_pkg_info():
-    """Reads the version from the PKG-INFO file."""
+    """Reads the installed package version via importlib.metadata."""
+    try:
+        from importlib.metadata import version as _pkg_version
+        return _pkg_version("SearchLibrium")
+    except Exception:
+        pass
+    # Fallback: read from egg-info PKG-INFO (editable installs)
     pkg_info_path = os.path.join(os.path.dirname(__file__), "../SearchLibrium.egg-info/PKG-INFO")
     try:
         with open(pkg_info_path, "r") as f:
@@ -63,7 +69,8 @@ def get_version_from_pkg_info():
                 if line.startswith("Version:"):
                     return line.split(":")[1].strip()
     except FileNotFoundError:
-        return "0.0.32"
+        pass
+    return "unknown"
 __version__ = get_version_from_pkg_info()
@@ -86,7 +93,10 @@ try:
     from .rrm import RandomRegret
     from .mixedrrm import MixedRandomRegret
     from .ordered_logit import OrderedLogit, OrderedLogitLong
+    from .selection_models import BinaryProbit, HeckmanTwoStep
     from .latent_class import LatentClassMixedLogit
+    from .mdcev import MDCEVFitResult, MDCEVModel
+    from .multinomial_probit import MultinomialProbit
     from .RandomP import RandomParameters
     from .constraints_builder import ConstraintBuilder, create_constraints
     from .search import Parameters
@@ -102,21 +112,28 @@ except ImportError as e:
     from rrm import RandomRegret
     from mixedrrm import MixedRandomRegret
     from ordered_logit import OrderedLogit, OrderedLogitLong
+    from selection_models import BinaryProbit, HeckmanTwoStep
     from latent_class import LatentClassMixedLogit
+    from mdcev import MDCEVFitResult, MDCEVModel
+    from multinomial_probit import MultinomialProbit
     from RandomP import RandomParameters
     from constraints_builder import ConstraintBuilder, create_constraints
     from search import Parameters
     from call_meta import call_siman, call_harmony, call_search, estimate_ctrl
 try:
     from .main import print_ascii_art_logo
-except:
-    from main import print_ascii_art_logo
+except Exception:
+    try:
+        from main import print_ascii_art_logo
+    except Exception:
+        print_ascii_art_logo = None
-try:
-    print_ascii_art_logo()
-except ImportError:
-    print("Error importing print_ascii_art_logo from main module. Continuing without logo.")
+if print_ascii_art_logo is not None:
+    try:
+        print_ascii_art_logo()
+    except Exception:
+        print("SearchLibrium logo skipped; optional display dependencies are missing.")
 #print('loaded all')
 print('Welcome to SearchLibrium')

searchlibrium-0.0.85/src/SearchLibrium/mdcev.py ADDED Viewed

@@ -0,0 +1,344 @@
+"""MDCEV budget-allocation prototype for SearchLibrium.
+This module implements a compact translated-utility MDCEV-style allocator for
+continuous budget splits such as daily time-use or discretionary activity
+budgets. The implementation is forecasting-oriented: it provides a stable
+fitting heuristic from observed allocations together with an analytical
+budget-allocation solver based on the translated utility first-order
+conditions.
+The class is intended as a practical bridge between the current scalar budget
+models and a fuller MDCEV pipeline. It includes both a stable heuristic fit
+and a likelihood-based quasi-MLE refinement.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Iterable, Optional
+import numpy as np
+import pandas as pd
+from scipy.optimize import minimize
+def _as_2d_float(array_like) -> np.ndarray:
+    arr = np.asarray(array_like, dtype=float)
+    if arr.ndim == 1:
+        arr = arr.reshape(1, -1)
+    if arr.ndim != 2:
+        raise ValueError("Expected a 2D array of allocations")
+    return np.nan_to_num(arr, nan=0.0, posinf=0.0, neginf=0.0)
+@dataclass
+class MDCEVFitResult:
+    labels: list[str]
+    baseline_utility: np.ndarray
+    alpha: np.ndarray
+    gamma: np.ndarray
+    participation_rate: np.ndarray
+    mean_allocation: np.ndarray
+    mean_budget: float
+class MDCEVModel:
+    """Translated-utility MDCEV-style allocator.
+    Parameters are learned from observed budget shares using stable moment-based
+    heuristics, then predictions are produced by solving the translated-utility
+    KKT system with a bisection search on the shadow price.
+    """
+    def __init__(
+        self,
+        outside_good: Optional[int] = 0,
+        alpha_floor: float = 0.05,
+        alpha_cap: float = 0.95,
+        gamma_floor: float = 1e-3,
+        tol: float = 1e-9,
+    ):
+        self.outside_good = outside_good
+        self.alpha_floor = alpha_floor
+        self.alpha_cap = alpha_cap
+        self.gamma_floor = gamma_floor
+        self.tol = tol
+        self.labels_: list[str] | None = None
+        self.baseline_utility_: np.ndarray | None = None
+        self.alpha_: np.ndarray | None = None
+        self.gamma_: np.ndarray | None = None
+        self.fit_result_: MDCEVFitResult | None = None
+    def fit(self, allocations, labels: Optional[Iterable[str]] = None):
+        """Estimate baseline utility and satiation terms from observed allocations.
+        Parameters
+        ----------
+        allocations:
+            Matrix of observed budgets split across alternatives. Rows are
+            observations and columns are alternatives.
+        labels:
+            Optional alternative labels.
+        """
+        y = _as_2d_float(allocations)
+        n_obs, n_alt = y.shape
+        budgets = y.sum(axis=1)
+        if np.any(budgets < self.tol):
+            raise ValueError("Each observation must have a positive total budget")
+        labels_list = list(labels) if labels is not None else [f"alt_{i}" for i in range(n_alt)]
+        if len(labels_list) != n_alt:
+            raise ValueError("labels length must match number of alternatives")
+        positive = y > self.tol
+        participation = positive.mean(axis=0)
+        mean_allocation = y.mean(axis=0)
+        share = y.sum(axis=0) / np.clip(y.sum(), self.tol, None)
+        if self.outside_good is not None and 0 <= self.outside_good < n_alt:
+            ref_share = max(float(share[self.outside_good]), self.tol)
+            baseline = np.log(np.clip(share, self.tol, None)) - np.log(ref_share)
+            baseline[self.outside_good] = 0.0
+        else:
+            baseline = np.log(np.clip(share, self.tol, None))
+            baseline = baseline - baseline.mean()
+        gamma = np.full(n_alt, self.gamma_floor, dtype=float)
+        alpha = np.full(n_alt, 0.5, dtype=float)
+        for idx in range(n_alt):
+            pos_vals = y[positive[:, idx], idx]
+            if pos_vals.size == 0:
+                gamma[idx] = max(np.median(budgets) * 0.05, self.gamma_floor)
+                alpha[idx] = self.alpha_floor
+                baseline[idx] = min(baseline[idx], -8.0)
+                continue
+            median_pos = float(np.median(pos_vals))
+            mean_pos = float(np.mean(pos_vals))
+            std_pos = float(np.std(pos_vals))
+            cv_pos = std_pos / max(mean_pos, self.tol)
+            gamma[idx] = max(median_pos * max(1.0 - participation[idx], 0.1), self.gamma_floor)
+            raw_alpha = 0.2 + 0.6 * participation[idx] / (1.0 + cv_pos)
+            alpha[idx] = float(np.clip(raw_alpha, self.alpha_floor, self.alpha_cap))
+        if self.outside_good is not None and 0 <= self.outside_good < n_alt:
+            gamma[self.outside_good] = self.gamma_floor
+            alpha[self.outside_good] = max(alpha[self.outside_good], 0.8)
+        self.labels_ = labels_list
+        self.baseline_utility_ = baseline
+        self.alpha_ = alpha
+        self.gamma_ = gamma
+        self.fit_result_ = MDCEVFitResult(
+            labels=labels_list,
+            baseline_utility=baseline.copy(),
+            alpha=alpha.copy(),
+            gamma=gamma.copy(),
+            participation_rate=participation.copy(),
+            mean_allocation=mean_allocation.copy(),
+            mean_budget=float(np.mean(budgets)),
+        )
+        return self
+    def fit_mle(
+        self,
+        allocations,
+        labels: Optional[Iterable[str]] = None,
+        maxiter: int = 400,
+        l2_penalty: float = 1e-4,
+    ):
+        """Likelihood-based parameter refinement.
+        The objective is a Gaussian log-likelihood on log allocations around
+        translated-utility MDCEV deterministic predictions. This is a practical
+        quasi-MLE refinement that preserves the MDCEV budget constraint while
+        improving fit over pure moments.
+        """
+        self.fit(allocations, labels=labels)
+        y = _as_2d_float(allocations)
+        budgets = y.sum(axis=1)
+        n_alt = y.shape[1]
+        free_base_idx = [i for i in range(n_alt) if i != self.outside_good]
+        def _pack(base, alpha, gamma, sigma):
+            b = np.asarray(base, dtype=float)
+            a = np.asarray(alpha, dtype=float)
+            g = np.asarray(gamma, dtype=float)
+            p = []
+            p.extend(b[free_base_idx].tolist())
+            p.extend(np.log(np.clip((a - self.alpha_floor) / np.clip(self.alpha_cap - a, self.tol, None), self.tol, None)).tolist())
+            p.extend(np.log(np.clip(g, self.gamma_floor, None)).tolist())
+            p.append(np.log(max(float(sigma), 1e-3)))
+            return np.asarray(p, dtype=float)
+        def _unpack(theta):
+            theta = np.asarray(theta, dtype=float)
+            o = 0
+            base = self.baseline_utility_.copy()
+            for idx in free_base_idx:
+                base[idx] = theta[o]
+                o += 1
+            if self.outside_good is not None and 0 <= self.outside_good < n_alt:
+                base[self.outside_good] = 0.0
+            alpha_raw = theta[o:o + n_alt]
+            o += n_alt
+            alpha_sig = 1.0 / (1.0 + np.exp(-alpha_raw))
+            alpha = self.alpha_floor + (self.alpha_cap - self.alpha_floor) * alpha_sig
+            gamma_raw = theta[o:o + n_alt]
+            o += n_alt
+            gamma = np.maximum(np.exp(gamma_raw), self.gamma_floor)
+            sigma = max(np.exp(theta[o]), 1e-3)
+            return base, alpha, gamma, sigma
+        def _neg_loglike(theta):
+            base, alpha, gamma, sigma = _unpack(theta)
+            old_b, old_a, old_g = self.baseline_utility_, self.alpha_, self.gamma_
+            self.baseline_utility_, self.alpha_, self.gamma_ = base, alpha, gamma
+            try:
+                mu = np.zeros_like(y)
+                for i, b in enumerate(budgets):
+                    mu[i] = self._solve_budget(float(b), base)
+            finally:
+                self.baseline_utility_, self.alpha_, self.gamma_ = old_b, old_a, old_g
+            log_y = np.log(np.clip(y, self.tol, None))
+            log_mu = np.log(np.clip(mu, self.tol, None))
+            resid = log_y - log_mu
+            ll = -0.5 * resid.size * np.log(2.0 * np.pi * sigma * sigma)
+            ll -= 0.5 * np.sum((resid / sigma) ** 2)
+            ll -= l2_penalty * np.sum(theta * theta)
+            return -float(ll)
+        theta0 = _pack(self.baseline_utility_, self.alpha_, self.gamma_, sigma=0.5)
+        res = minimize(
+            _neg_loglike,
+            theta0,
+            method="L-BFGS-B",
+            options={"maxiter": int(maxiter), "ftol": 1e-9},
+        )
+        base, alpha, gamma, sigma = _unpack(res.x)
+        self.baseline_utility_ = base
+        self.alpha_ = alpha
+        self.gamma_ = gamma
+        self.noise_sigma_ = float(sigma)
+        self.mle_success_ = bool(res.success)
+        self.mle_message_ = str(res.message)
+        return self
+    def summary(self) -> pd.DataFrame:
+        if self.fit_result_ is None:
+            raise RuntimeError("Model must be fit before calling summary()")
+        result = self.fit_result_
+        return pd.DataFrame(
+            {
+                "alternative": result.labels,
+                "baseline_utility": result.baseline_utility,
+                "alpha": result.alpha,
+                "gamma": result.gamma,
+                "participation_rate": result.participation_rate,
+                "mean_allocation": result.mean_allocation,
+            }
+        )
+    def predict(self, budgets, utility_shift=None) -> np.ndarray:
+        """Predict deterministic budget allocations for one or more budgets.
+        Parameters
+        ----------
+        budgets:
+            Scalar or vector of total budgets.
+        utility_shift:
+            Optional additive utility adjustment. Can be shape ``(J,)`` or
+            ``(N, J)``.
+        """
+        self._check_fitted()
+        budgets_arr = np.asarray(budgets, dtype=float).reshape(-1)
+        shifts = self._prepare_utility_shift(utility_shift, len(budgets_arr))
+        predictions = np.zeros((len(budgets_arr), len(self.baseline_utility_)), dtype=float)
+        for row_idx, budget in enumerate(budgets_arr):
+            predictions[row_idx] = self._solve_budget(budget, self.baseline_utility_ + shifts[row_idx])
+        return predictions
+    def simulate(self, budgets, utility_shift=None, n_draws: int = 100, random_state: Optional[int] = None) -> np.ndarray:
+        """Simulate stochastic budget allocations with Gumbel utility shocks."""
+        self._check_fitted()
+        budgets_arr = np.asarray(budgets, dtype=float).reshape(-1)
+        shifts = self._prepare_utility_shift(utility_shift, len(budgets_arr))
+        rng = np.random.default_rng(random_state)
+        sims = np.zeros((n_draws, len(budgets_arr), len(self.baseline_utility_)), dtype=float)
+        for draw_idx in range(n_draws):
+            shocks = rng.gumbel(loc=0.0, scale=1.0, size=shifts.shape)
+            for row_idx, budget in enumerate(budgets_arr):
+                sims[draw_idx, row_idx] = self._solve_budget(
+                    budget,
+                    self.baseline_utility_ + shifts[row_idx] + shocks[row_idx],
+                )
+        return sims
+    def _prepare_utility_shift(self, utility_shift, n_rows: int) -> np.ndarray:
+        n_alt = len(self.baseline_utility_)
+        if utility_shift is None:
+            return np.zeros((n_rows, n_alt), dtype=float)
+        shift_arr = np.asarray(utility_shift, dtype=float)
+        if shift_arr.ndim == 1:
+            if shift_arr.shape[0] != n_alt:
+                raise ValueError("utility_shift has the wrong number of alternatives")
+            return np.repeat(shift_arr.reshape(1, -1), n_rows, axis=0)
+        if shift_arr.shape != (n_rows, n_alt):
+            raise ValueError("utility_shift must have shape (J,) or (N, J)")
+        return shift_arr
+    def _solve_budget(self, budget: float, utility_index: np.ndarray) -> np.ndarray:
+        if budget <= self.tol:
+            return np.zeros(len(self.baseline_utility_), dtype=float)
+        weights = np.exp(np.clip(utility_index, -40.0, 40.0))
+        def alloc_for_lambda(lam: float) -> np.ndarray:
+            lam = max(lam, self.tol)
+            power = 1.0 / np.clip(1.0 - self.alpha_, self.tol, None)
+            raw = np.power(weights / lam, power) - self.gamma_
+            return np.maximum(raw, 0.0)
+        lo = self.tol
+        hi = max(np.max(weights), 1.0)
+        while alloc_for_lambda(hi).sum() > budget:
+            hi *= 2.0
+        for _ in range(80):
+            mid = 0.5 * (lo + hi)
+            if alloc_for_lambda(mid).sum() > budget:
+                lo = mid
+            else:
+                hi = mid
+        allocation = alloc_for_lambda(hi)
+        total = allocation.sum()
+        if total > self.tol:
+            allocation *= budget / total
+        elif self.outside_good is not None and 0 <= self.outside_good < len(allocation):
+            allocation[self.outside_good] = budget
+        residual = budget - allocation.sum()
+        if self.outside_good is not None and 0 <= self.outside_good < len(allocation) and residual > self.tol:
+            allocation[self.outside_good] += residual
+        return allocation
+    def _check_fitted(self):
+        if self.fit_result_ is None or self.baseline_utility_ is None:
+            raise RuntimeError("Model must be fit before prediction")

searchlibrium-0.0.85/src/SearchLibrium/selection_models.py ADDED Viewed

@@ -0,0 +1,268 @@
+import math
+from dataclasses import dataclass
+import numpy as np
+import pandas as pd
+from scipy.optimize import minimize
+from scipy.stats import norm, t as student_t
+try:
+    import jax
+    import jax.numpy as jnp
+    from jax.scipy.special import ndtr as jax_ndtr
+except ImportError:  # pragma: no cover
+    jax = None
+    jnp = None
+    jax_ndtr = None
+try:
+    from ._choice_model import DiscreteChoiceModel
+except ImportError:
+    from _choice_model import DiscreteChoiceModel
+class BinaryProbit(DiscreteChoiceModel):
+    """Binary probit estimated with JAX autodiff and scipy L-BFGS-B."""
+    def __init__(self, _jax=False):
+        super(BinaryProbit, self).__init__(_jax)
+        self.descr = "Binary Probit"
+        self.result = None
+        self._X_design = None
+    def setup(self, X, y, varnames=None, fit_intercept=True):
+        X = np.asarray(X)
+        y = np.asarray(y).reshape(-1)
+        if varnames is None:
+            varnames = [f"x{i}" for i in range(X.shape[1])]
+        self.X = X
+        self.y = y
+        self.varnames = np.asarray(varnames, dtype="<U64")
+        self.fit_intercept = bool(fit_intercept)
+        self.sample_size = int(X.shape[0])
+        if self.fit_intercept:
+            self._X_design = np.column_stack([np.ones((X.shape[0], 1)), X])
+            self._design_names = np.asarray(["intercept", *self.varnames], dtype="<U64")
+        else:
+            self._X_design = X.copy()
+            self._design_names = self.varnames.copy()
+        return self
+    def _negloglik_jax(self, params, X, y):
+        xb = X @ params
+        p = jnp.clip(jax_ndtr(xb), 1e-10, 1.0 - 1e-10)
+        ll = y * jnp.log(p) + (1.0 - y) * jnp.log(1.0 - p)
+        return -jnp.sum(ll)
+    def fit(self, disp=False, **fit_kwargs):
+        if jax is None or jnp is None or jax_ndtr is None:
+            raise ImportError("JAX is required for BinaryProbit")
+        X = jnp.asarray(self._X_design)
+        y = jnp.asarray(self.y)
+        init = np.zeros(X.shape[1], dtype=float)
+        val_grad = jax.jit(jax.value_and_grad(self._negloglik_jax))
+        def _obj(params_np):
+            val, grad = val_grad(jnp.asarray(params_np), X, y)
+            return float(val), np.asarray(grad, dtype=float)
+        res = minimize(
+            fun=lambda p: _obj(p)[0],
+            x0=init,
+            jac=lambda p: _obj(p)[1],
+            method="L-BFGS-B",
+            options={"disp": bool(disp), "maxiter": int(fit_kwargs.pop("maxiter", 1000))},
+        )
+        self.result = res
+        self.coeff_names = self._design_names.copy()
+        self.coeff_est = np.asarray(res.x, dtype=float)
+        self.loglik = float(-res.fun)
+        self.converged = bool(res.success)
+        self.total_fun_eval = int(getattr(res, "nfev", 0))
+        hess_inv = getattr(res, "hess_inv", None)
+        if hess_inv is not None:
+            if hasattr(hess_inv, "todense"):
+                cov = np.asarray(hess_inv.todense(), dtype=float)
+            else:
+                cov = np.asarray(hess_inv, dtype=float)
+            stderr = np.sqrt(np.clip(np.diag(cov), 1e-12, None))
+        else:
+            stderr = np.full_like(self.coeff_est, np.nan, dtype=float)
+        self.stderr = stderr
+        self.zvalues = self.coeff_est / np.where(stderr > 0, stderr, np.nan)
+        self.pvalues = 2.0 * (1.0 - norm.cdf(np.abs(self.zvalues)))
+        k = len(self.coeff_est)
+        n = max(int(self.sample_size), 1)
+        self.aic = float(2 * k - 2 * self.loglik)
+        self.bic = float(k * np.log(n) - 2 * self.loglik)
+        return res
+    def predict_proba(self, X=None):
+        if self.coeff_est is None:
+            raise RuntimeError("BinaryProbit must be fit before prediction")
+        X_arr = self.X if X is None else np.asarray(X)
+        if self.fit_intercept:
+            X_arr = np.column_stack([np.ones((X_arr.shape[0], 1)), X_arr])
+        xb = X_arr @ self.coeff_est
+        return norm.cdf(xb)
+    def summary_frame(self):
+        if self.coeff_est is None:
+            return pd.DataFrame()
+        return pd.DataFrame({
+            "coef": self.coeff_est,
+            "stderr": self.stderr,
+            "z": self.zvalues,
+            "pvalue": self.pvalues,
+        }, index=self.coeff_names)
+@dataclass
+class _OLSResult:
+    params: pd.Series
+    bse: pd.Series
+    tvalues: pd.Series
+    pvalues: pd.Series
+    llf: float
+class HeckmanTwoStep(DiscreteChoiceModel):
+    """Heckman selection model using JAX probit + closed-form OLS second stage."""
+    def __init__(self, _jax=False):
+        super(HeckmanTwoStep, self).__init__(_jax)
+        self.descr = "Heckman Two-Step"
+        self.selection_result = None
+        self.outcome_result = None
+        self.params_table = pd.DataFrame()
+    def setup(
+        self,
+        selection_X,
+        selection_y,
+        outcome_X,
+        outcome_y,
+        selection_varnames=None,
+        outcome_varnames=None,
+        fit_intercept=True,
+    ):
+        selection_X = np.asarray(selection_X)
+        selection_y = np.asarray(selection_y).reshape(-1)
+        outcome_X = np.asarray(outcome_X)
+        outcome_y = np.asarray(outcome_y).reshape(-1)
+        if selection_varnames is None:
+            selection_varnames = [f"s{i}" for i in range(selection_X.shape[1])]
+        if outcome_varnames is None:
+            outcome_varnames = [f"o{i}" for i in range(outcome_X.shape[1])]
+        self.selection_X = selection_X
+        self.selection_y = selection_y
+        self.outcome_X = outcome_X
+        self.outcome_y = outcome_y
+        self.selection_varnames = np.asarray(selection_varnames, dtype="<U64")
+        self.outcome_varnames = np.asarray(outcome_varnames, dtype="<U64")
+        self.fit_intercept = bool(fit_intercept)
+        self.sample_size = int(selection_X.shape[0])
+        return self
+    def fit(self, disp=False, **fit_kwargs):
+        sel_X = np.asarray(self.selection_X, dtype=float)
+        out_X = np.asarray(self.outcome_X, dtype=float)
+        if self.fit_intercept:
+            sel_X = np.column_stack([np.ones((sel_X.shape[0], 1)), sel_X])
+            out_X = np.column_stack([np.ones((out_X.shape[0], 1)), out_X])
+        probit_model = BinaryProbit(_jax=True)
+        sel_names = (["intercept"] if self.fit_intercept else []) + list(self.selection_varnames)
+        probit_model.setup(sel_X[:, 1:] if self.fit_intercept else sel_X,
+                           self.selection_y,
+                           varnames=sel_names[1:] if self.fit_intercept else sel_names,
+                           fit_intercept=self.fit_intercept)
+        probit_model.fit(disp=disp, **fit_kwargs)
+        xb = sel_X @ probit_model.coeff_est
+        mills = norm.pdf(xb) / np.clip(norm.cdf(xb), 1e-10, None)
+        mask = self.selection_y == 1
+        out_design = np.column_stack([out_X[mask], mills[mask]])
+        out_y = self.outcome_y[mask]
+        xtx = out_design.T @ out_design
+        xtx_inv = np.linalg.pinv(xtx)
+        beta = xtx_inv @ (out_design.T @ out_y)
+        resid = out_y - out_design @ beta
+        dof = max(out_design.shape[0] - out_design.shape[1], 1)
+        sigma2 = float((resid @ resid) / dof)
+        cov = sigma2 * xtx_inv
+        se = np.sqrt(np.clip(np.diag(cov), 1e-12, None))
+        tvals = beta / np.where(se > 0, se, np.nan)
+        pvals = 2.0 * (1.0 - student_t.cdf(np.abs(tvals), df=dof))
+        ll_ols = -0.5 * out_design.shape[0] * (math.log(2.0 * math.pi * sigma2) + 1.0)
+        out_names = (["intercept"] if self.fit_intercept else []) + list(self.outcome_varnames) + ["IMR"]
+        ols = _OLSResult(
+            params=pd.Series(beta, index=out_names),
+            bse=pd.Series(se, index=out_names),
+            tvalues=pd.Series(tvals, index=out_names),
+            pvalues=pd.Series(pvals, index=out_names),
+            llf=float(ll_ols),
+        )
+        self.selection_result = probit_model
+        self.outcome_result = ols
+        self.loglik = float(probit_model.loglik + ll_ols)
+        total_k = len(probit_model.coeff_est) + len(beta)
+        self.aic = float(2 * total_k - 2 * self.loglik)
+        self.bic = float(total_k * np.log(max(self.sample_size, 1)) - 2 * self.loglik)
+        self.converged = bool(probit_model.converged)
+        selection_tbl = pd.DataFrame({
+            "coef": probit_model.coeff_est,
+            "stderr": probit_model.stderr,
+            "z": probit_model.zvalues,
+            "pvalue": probit_model.pvalues,
+        }, index=probit_model.coeff_names)
+        outcome_tbl = pd.DataFrame({
+            "coef": ols.params,
+            "stderr": ols.bse,
+            "z": ols.tvalues,
+            "pvalue": ols.pvalues,
+        })
+        self.params_table = pd.concat(
+            {"selection": selection_tbl, "outcome": outcome_tbl},
+            names=["equation", "term"],
+        )
+        coeff_names = [f"selection::{name}" for name in selection_tbl.index]
+        coeff_names += [f"outcome::{name}" for name in outcome_tbl.index]
+        self.coeff_names = np.asarray(coeff_names, dtype="<U128")
+        self.coeff_est = np.concatenate([selection_tbl["coef"].values, outcome_tbl["coef"].values])
+        self.stderr = np.concatenate([selection_tbl["stderr"].values, outcome_tbl["stderr"].values])
+        self.zvalues = np.concatenate([selection_tbl["z"].values, outcome_tbl["z"].values])
+        self.pvalues = np.concatenate([selection_tbl["pvalue"].values, outcome_tbl["pvalue"].values])
+        return {"probit": probit_model, "ols": ols}
+    def predict_selection_proba(self, X=None):
+        if self.selection_result is None:
+            raise RuntimeError("HeckmanTwoStep must be fit before prediction")
+        X_arr = self.selection_X if X is None else np.asarray(X)
+        return self.selection_result.predict_proba(X_arr)
+    def predict_outcome(self, X=None, selection_probability=None):
+        if self.outcome_result is None:
+            raise RuntimeError("HeckmanTwoStep must be fit before prediction")
+        X_arr = self.outcome_X if X is None else np.asarray(X)
+        if self.fit_intercept:
+            X_arr = np.column_stack([np.ones((X_arr.shape[0], 1)), X_arr])
+        if selection_probability is None:
+            selection_probability = np.clip(self.predict_selection_proba(), 1e-10, 1 - 1e-10)
+        xb = norm.ppf(np.clip(selection_probability, 1e-10, 1 - 1e-10))
+        imr = norm.pdf(xb) / np.clip(norm.cdf(xb), 1e-10, None)
+        X_aug = np.column_stack([X_arr, imr])
+        return X_aug @ self.outcome_result.params.values
+    def summary_frame(self):
+        return self.params_table.copy()

{searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/siman.py RENAMED Viewed

@@ -1102,7 +1102,7 @@ class SA(Search):
         # {
             if overall_best_solution is None or \
                 is_better(self.best_sol.obj(0), overall_best_solution.obj(0), self.param.sign_crit(0)):
-                overall_best_solution = self.best_sol  # Update overall best solution
+                overall_best_solution = self.copy_solution(self.best_sol)  # Update overall best solution (deep copy to prevent overwriting)
             elif overall_best_solution is not None and \
                 is_worse(self.best_sol.obj(0), overall_best_solution.obj(0), self.param.sign_crit(0)):
                 self.update_best(overall_best_solution)  # Revise best solution of current SA solver

{searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/threshold.py RENAMED Viewed

@@ -448,7 +448,7 @@ class TA(Search):
         # {
             if overall_best_solution is None or \
                 is_better(self.best_sol.obj(0), overall_best_solution.obj(0), self.param.sign_crit(0)):
-                overall_best_solution = self.best_sol  # Update overall best solution
+                overall_best_solution = self.copy_solution(self.best_sol)  # Update overall best solution (deep copy to prevent overwriting)
             elif overall_best_solution is not None and \
                 is_worse(self.best_sol.obj(0), overall_best_solution.obj(0), self.param.sign_crit(0)):
                 self.update_best(overall_best_solution)  # Revise best solution of current TA solver

searchlibrium-0.0.85/src/SearchLibrium/version.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.0.85

{searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: SearchLibrium
-Version: 0.0.83
+Version: 0.0.85
 Summary: A Python package for econometric models driven by search
 Author: Alexander Paz Prithvi Beeramole, Robert Burdett
 Author-email: Zeke Ahern <z.ahern@qut.edu.au>

{searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium.egg-info/SOURCES.txt RENAMED Viewed

@@ -17,6 +17,7 @@ src/SearchLibrium/harmony.py
 src/SearchLibrium/latent_class.py
 src/SearchLibrium/main.py
 src/SearchLibrium/main_debug.py
+src/SearchLibrium/mdcev.py
 src/SearchLibrium/misc.py
 src/SearchLibrium/mixed_logit.py
 src/SearchLibrium/mixed_nested.py
@@ -28,6 +29,7 @@ src/SearchLibrium/ordered_logit.py
 src/SearchLibrium/ordered_logit_mixed.py
 src/SearchLibrium/rrm.py
 src/SearchLibrium/search.py
+src/SearchLibrium/selection_models.py
 src/SearchLibrium/setup.py
 src/SearchLibrium/siman.py
 src/SearchLibrium/threshold.py