SearchLibrium 0.0.83__tar.gz → 0.0.85__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/PKG-INFO +1 -1
  2. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/pyproject.toml +1 -1
  3. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/__init__.py +26 -9
  4. searchlibrium-0.0.85/src/SearchLibrium/mdcev.py +344 -0
  5. searchlibrium-0.0.85/src/SearchLibrium/selection_models.py +268 -0
  6. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/siman.py +1 -1
  7. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/threshold.py +1 -1
  8. searchlibrium-0.0.85/src/SearchLibrium/version.txt +1 -0
  9. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium.egg-info/PKG-INFO +1 -1
  10. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium.egg-info/SOURCES.txt +2 -0
  11. searchlibrium-0.0.83/src/SearchLibrium/version.txt +0 -1
  12. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/README.md +0 -0
  13. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/setup.cfg +0 -0
  14. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/Halton.py +0 -0
  15. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/MixedLogit.py +0 -0
  16. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/Mode_Activity_Nested.py +0 -0
  17. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/RandomP.py +0 -0
  18. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/SEARCH_SM_MARIO.py +0 -0
  19. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/Two_Level_Nest.py +0 -0
  20. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/__main__.py +0 -0
  21. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/_choice_model.py +0 -0
  22. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/_device.py +0 -0
  23. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/bhhh/minimize.py +0 -0
  24. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/boxcox_functions.py +0 -0
  25. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/call_meta.py +0 -0
  26. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/constraints_builder.py +0 -0
  27. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/harmony.py +0 -0
  28. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/latent_class.py +0 -0
  29. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/main.py +0 -0
  30. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/main_debug.py +0 -0
  31. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/misc.py +0 -0
  32. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/mixed_logit.py +0 -0
  33. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/mixed_nested.py +0 -0
  34. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/mixedrrm.py +0 -0
  35. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/multinomial_logit.py +0 -0
  36. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/multinomial_nested.py +0 -0
  37. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/multinomial_probit.py +0 -0
  38. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/ordered_logit.py +0 -0
  39. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/ordered_logit_mixed.py +0 -0
  40. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/rrm.py +0 -0
  41. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/search.py +0 -0
  42. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium/setup.py +0 -0
  43. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium.egg-info/dependency_links.txt +0 -0
  44. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium.egg-info/entry_points.txt +0 -0
  45. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium.egg-info/requires.txt +0 -0
  46. {searchlibrium-0.0.83 → searchlibrium-0.0.85}/src/SearchLibrium.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: SearchLibrium
3
- Version: 0.0.83
3
+ Version: 0.0.85
4
4
  Summary: A Python package for econometric models driven by search
5
5
  Author: Alexander Paz Prithvi Beeramole, Robert Burdett
6
6
  Author-email: Zeke Ahern <z.ahern@qut.edu.au>
@@ -59,7 +59,7 @@ Homepage = "https://github.com/zahern/HypothesisX"
59
59
  realpython = "SearchLibrium.__main__:main"
60
60
 
61
61
  [tool.bumpver]
62
- current_version = "0.0.83"
62
+ current_version = "0.0.85"
63
63
  version_pattern = "MAJOR.MINOR.PATCH"
64
64
  commit_message = "[skip ci] Bump version {old_version} -> {new_version}"
65
65
  commit = true
@@ -55,7 +55,13 @@ def new_features():
55
55
  """)
56
56
 
57
57
  def get_version_from_pkg_info():
58
- """Reads the version from the PKG-INFO file."""
58
+ """Reads the installed package version via importlib.metadata."""
59
+ try:
60
+ from importlib.metadata import version as _pkg_version
61
+ return _pkg_version("SearchLibrium")
62
+ except Exception:
63
+ pass
64
+ # Fallback: read from egg-info PKG-INFO (editable installs)
59
65
  pkg_info_path = os.path.join(os.path.dirname(__file__), "../SearchLibrium.egg-info/PKG-INFO")
60
66
  try:
61
67
  with open(pkg_info_path, "r") as f:
@@ -63,7 +69,8 @@ def get_version_from_pkg_info():
63
69
  if line.startswith("Version:"):
64
70
  return line.split(":")[1].strip()
65
71
  except FileNotFoundError:
66
- return "0.0.32"
72
+ pass
73
+ return "unknown"
67
74
 
68
75
  __version__ = get_version_from_pkg_info()
69
76
 
@@ -86,7 +93,10 @@ try:
86
93
  from .rrm import RandomRegret
87
94
  from .mixedrrm import MixedRandomRegret
88
95
  from .ordered_logit import OrderedLogit, OrderedLogitLong
96
+ from .selection_models import BinaryProbit, HeckmanTwoStep
89
97
  from .latent_class import LatentClassMixedLogit
98
+ from .mdcev import MDCEVFitResult, MDCEVModel
99
+ from .multinomial_probit import MultinomialProbit
90
100
  from .RandomP import RandomParameters
91
101
  from .constraints_builder import ConstraintBuilder, create_constraints
92
102
  from .search import Parameters
@@ -102,21 +112,28 @@ except ImportError as e:
102
112
  from rrm import RandomRegret
103
113
  from mixedrrm import MixedRandomRegret
104
114
  from ordered_logit import OrderedLogit, OrderedLogitLong
115
+ from selection_models import BinaryProbit, HeckmanTwoStep
105
116
  from latent_class import LatentClassMixedLogit
117
+ from mdcev import MDCEVFitResult, MDCEVModel
118
+ from multinomial_probit import MultinomialProbit
106
119
  from RandomP import RandomParameters
107
120
  from constraints_builder import ConstraintBuilder, create_constraints
108
121
  from search import Parameters
109
122
  from call_meta import call_siman, call_harmony, call_search, estimate_ctrl
110
123
  try:
111
124
  from .main import print_ascii_art_logo
112
- except:
113
- from main import print_ascii_art_logo
114
-
125
+ except Exception:
126
+ try:
127
+ from main import print_ascii_art_logo
128
+ except Exception:
129
+ print_ascii_art_logo = None
115
130
 
116
- try:
117
- print_ascii_art_logo()
118
- except ImportError:
119
- print("Error importing print_ascii_art_logo from main module. Continuing without logo.")
131
+
132
+ if print_ascii_art_logo is not None:
133
+ try:
134
+ print_ascii_art_logo()
135
+ except Exception:
136
+ print("SearchLibrium logo skipped; optional display dependencies are missing.")
120
137
 
121
138
  #print('loaded all')
122
139
  print('Welcome to SearchLibrium')
@@ -0,0 +1,344 @@
1
+ """MDCEV budget-allocation prototype for SearchLibrium.
2
+
3
+ This module implements a compact translated-utility MDCEV-style allocator for
4
+ continuous budget splits such as daily time-use or discretionary activity
5
+ budgets. The implementation is forecasting-oriented: it provides a stable
6
+ fitting heuristic from observed allocations together with an analytical
7
+ budget-allocation solver based on the translated utility first-order
8
+ conditions.
9
+
10
+ The class is intended as a practical bridge between the current scalar budget
11
+ models and a fuller MDCEV pipeline. It includes both a stable heuristic fit
12
+ and a likelihood-based quasi-MLE refinement.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from dataclasses import dataclass
18
+ from typing import Iterable, Optional
19
+
20
+ import numpy as np
21
+ import pandas as pd
22
+ from scipy.optimize import minimize
23
+
24
+
25
+ def _as_2d_float(array_like) -> np.ndarray:
26
+ arr = np.asarray(array_like, dtype=float)
27
+ if arr.ndim == 1:
28
+ arr = arr.reshape(1, -1)
29
+ if arr.ndim != 2:
30
+ raise ValueError("Expected a 2D array of allocations")
31
+ return np.nan_to_num(arr, nan=0.0, posinf=0.0, neginf=0.0)
32
+
33
+
34
+ @dataclass
35
+ class MDCEVFitResult:
36
+ labels: list[str]
37
+ baseline_utility: np.ndarray
38
+ alpha: np.ndarray
39
+ gamma: np.ndarray
40
+ participation_rate: np.ndarray
41
+ mean_allocation: np.ndarray
42
+ mean_budget: float
43
+
44
+
45
+ class MDCEVModel:
46
+ """Translated-utility MDCEV-style allocator.
47
+
48
+ Parameters are learned from observed budget shares using stable moment-based
49
+ heuristics, then predictions are produced by solving the translated-utility
50
+ KKT system with a bisection search on the shadow price.
51
+ """
52
+
53
+ def __init__(
54
+ self,
55
+ outside_good: Optional[int] = 0,
56
+ alpha_floor: float = 0.05,
57
+ alpha_cap: float = 0.95,
58
+ gamma_floor: float = 1e-3,
59
+ tol: float = 1e-9,
60
+ ):
61
+ self.outside_good = outside_good
62
+ self.alpha_floor = alpha_floor
63
+ self.alpha_cap = alpha_cap
64
+ self.gamma_floor = gamma_floor
65
+ self.tol = tol
66
+
67
+ self.labels_: list[str] | None = None
68
+ self.baseline_utility_: np.ndarray | None = None
69
+ self.alpha_: np.ndarray | None = None
70
+ self.gamma_: np.ndarray | None = None
71
+ self.fit_result_: MDCEVFitResult | None = None
72
+
73
+ def fit(self, allocations, labels: Optional[Iterable[str]] = None):
74
+ """Estimate baseline utility and satiation terms from observed allocations.
75
+
76
+ Parameters
77
+ ----------
78
+ allocations:
79
+ Matrix of observed budgets split across alternatives. Rows are
80
+ observations and columns are alternatives.
81
+ labels:
82
+ Optional alternative labels.
83
+ """
84
+ y = _as_2d_float(allocations)
85
+ n_obs, n_alt = y.shape
86
+ budgets = y.sum(axis=1)
87
+ if np.any(budgets < self.tol):
88
+ raise ValueError("Each observation must have a positive total budget")
89
+
90
+ labels_list = list(labels) if labels is not None else [f"alt_{i}" for i in range(n_alt)]
91
+ if len(labels_list) != n_alt:
92
+ raise ValueError("labels length must match number of alternatives")
93
+
94
+ positive = y > self.tol
95
+ participation = positive.mean(axis=0)
96
+ mean_allocation = y.mean(axis=0)
97
+ share = y.sum(axis=0) / np.clip(y.sum(), self.tol, None)
98
+
99
+ if self.outside_good is not None and 0 <= self.outside_good < n_alt:
100
+ ref_share = max(float(share[self.outside_good]), self.tol)
101
+ baseline = np.log(np.clip(share, self.tol, None)) - np.log(ref_share)
102
+ baseline[self.outside_good] = 0.0
103
+ else:
104
+ baseline = np.log(np.clip(share, self.tol, None))
105
+ baseline = baseline - baseline.mean()
106
+
107
+ gamma = np.full(n_alt, self.gamma_floor, dtype=float)
108
+ alpha = np.full(n_alt, 0.5, dtype=float)
109
+
110
+ for idx in range(n_alt):
111
+ pos_vals = y[positive[:, idx], idx]
112
+ if pos_vals.size == 0:
113
+ gamma[idx] = max(np.median(budgets) * 0.05, self.gamma_floor)
114
+ alpha[idx] = self.alpha_floor
115
+ baseline[idx] = min(baseline[idx], -8.0)
116
+ continue
117
+
118
+ median_pos = float(np.median(pos_vals))
119
+ mean_pos = float(np.mean(pos_vals))
120
+ std_pos = float(np.std(pos_vals))
121
+ cv_pos = std_pos / max(mean_pos, self.tol)
122
+
123
+ gamma[idx] = max(median_pos * max(1.0 - participation[idx], 0.1), self.gamma_floor)
124
+ raw_alpha = 0.2 + 0.6 * participation[idx] / (1.0 + cv_pos)
125
+ alpha[idx] = float(np.clip(raw_alpha, self.alpha_floor, self.alpha_cap))
126
+
127
+ if self.outside_good is not None and 0 <= self.outside_good < n_alt:
128
+ gamma[self.outside_good] = self.gamma_floor
129
+ alpha[self.outside_good] = max(alpha[self.outside_good], 0.8)
130
+
131
+ self.labels_ = labels_list
132
+ self.baseline_utility_ = baseline
133
+ self.alpha_ = alpha
134
+ self.gamma_ = gamma
135
+ self.fit_result_ = MDCEVFitResult(
136
+ labels=labels_list,
137
+ baseline_utility=baseline.copy(),
138
+ alpha=alpha.copy(),
139
+ gamma=gamma.copy(),
140
+ participation_rate=participation.copy(),
141
+ mean_allocation=mean_allocation.copy(),
142
+ mean_budget=float(np.mean(budgets)),
143
+ )
144
+ return self
145
+
146
+ def fit_mle(
147
+ self,
148
+ allocations,
149
+ labels: Optional[Iterable[str]] = None,
150
+ maxiter: int = 400,
151
+ l2_penalty: float = 1e-4,
152
+ ):
153
+ """Likelihood-based parameter refinement.
154
+
155
+ The objective is a Gaussian log-likelihood on log allocations around
156
+ translated-utility MDCEV deterministic predictions. This is a practical
157
+ quasi-MLE refinement that preserves the MDCEV budget constraint while
158
+ improving fit over pure moments.
159
+ """
160
+ self.fit(allocations, labels=labels)
161
+
162
+ y = _as_2d_float(allocations)
163
+ budgets = y.sum(axis=1)
164
+ n_alt = y.shape[1]
165
+
166
+ free_base_idx = [i for i in range(n_alt) if i != self.outside_good]
167
+
168
+ def _pack(base, alpha, gamma, sigma):
169
+ b = np.asarray(base, dtype=float)
170
+ a = np.asarray(alpha, dtype=float)
171
+ g = np.asarray(gamma, dtype=float)
172
+
173
+ p = []
174
+ p.extend(b[free_base_idx].tolist())
175
+ p.extend(np.log(np.clip((a - self.alpha_floor) / np.clip(self.alpha_cap - a, self.tol, None), self.tol, None)).tolist())
176
+ p.extend(np.log(np.clip(g, self.gamma_floor, None)).tolist())
177
+ p.append(np.log(max(float(sigma), 1e-3)))
178
+ return np.asarray(p, dtype=float)
179
+
180
+ def _unpack(theta):
181
+ theta = np.asarray(theta, dtype=float)
182
+ o = 0
183
+
184
+ base = self.baseline_utility_.copy()
185
+ for idx in free_base_idx:
186
+ base[idx] = theta[o]
187
+ o += 1
188
+ if self.outside_good is not None and 0 <= self.outside_good < n_alt:
189
+ base[self.outside_good] = 0.0
190
+
191
+ alpha_raw = theta[o:o + n_alt]
192
+ o += n_alt
193
+ alpha_sig = 1.0 / (1.0 + np.exp(-alpha_raw))
194
+ alpha = self.alpha_floor + (self.alpha_cap - self.alpha_floor) * alpha_sig
195
+
196
+ gamma_raw = theta[o:o + n_alt]
197
+ o += n_alt
198
+ gamma = np.maximum(np.exp(gamma_raw), self.gamma_floor)
199
+
200
+ sigma = max(np.exp(theta[o]), 1e-3)
201
+ return base, alpha, gamma, sigma
202
+
203
+ def _neg_loglike(theta):
204
+ base, alpha, gamma, sigma = _unpack(theta)
205
+
206
+ old_b, old_a, old_g = self.baseline_utility_, self.alpha_, self.gamma_
207
+ self.baseline_utility_, self.alpha_, self.gamma_ = base, alpha, gamma
208
+ try:
209
+ mu = np.zeros_like(y)
210
+ for i, b in enumerate(budgets):
211
+ mu[i] = self._solve_budget(float(b), base)
212
+ finally:
213
+ self.baseline_utility_, self.alpha_, self.gamma_ = old_b, old_a, old_g
214
+
215
+ log_y = np.log(np.clip(y, self.tol, None))
216
+ log_mu = np.log(np.clip(mu, self.tol, None))
217
+ resid = log_y - log_mu
218
+ ll = -0.5 * resid.size * np.log(2.0 * np.pi * sigma * sigma)
219
+ ll -= 0.5 * np.sum((resid / sigma) ** 2)
220
+ ll -= l2_penalty * np.sum(theta * theta)
221
+ return -float(ll)
222
+
223
+ theta0 = _pack(self.baseline_utility_, self.alpha_, self.gamma_, sigma=0.5)
224
+ res = minimize(
225
+ _neg_loglike,
226
+ theta0,
227
+ method="L-BFGS-B",
228
+ options={"maxiter": int(maxiter), "ftol": 1e-9},
229
+ )
230
+
231
+ base, alpha, gamma, sigma = _unpack(res.x)
232
+ self.baseline_utility_ = base
233
+ self.alpha_ = alpha
234
+ self.gamma_ = gamma
235
+ self.noise_sigma_ = float(sigma)
236
+ self.mle_success_ = bool(res.success)
237
+ self.mle_message_ = str(res.message)
238
+ return self
239
+
240
+ def summary(self) -> pd.DataFrame:
241
+ if self.fit_result_ is None:
242
+ raise RuntimeError("Model must be fit before calling summary()")
243
+ result = self.fit_result_
244
+ return pd.DataFrame(
245
+ {
246
+ "alternative": result.labels,
247
+ "baseline_utility": result.baseline_utility,
248
+ "alpha": result.alpha,
249
+ "gamma": result.gamma,
250
+ "participation_rate": result.participation_rate,
251
+ "mean_allocation": result.mean_allocation,
252
+ }
253
+ )
254
+
255
+ def predict(self, budgets, utility_shift=None) -> np.ndarray:
256
+ """Predict deterministic budget allocations for one or more budgets.
257
+
258
+ Parameters
259
+ ----------
260
+ budgets:
261
+ Scalar or vector of total budgets.
262
+ utility_shift:
263
+ Optional additive utility adjustment. Can be shape ``(J,)`` or
264
+ ``(N, J)``.
265
+ """
266
+ self._check_fitted()
267
+ budgets_arr = np.asarray(budgets, dtype=float).reshape(-1)
268
+ shifts = self._prepare_utility_shift(utility_shift, len(budgets_arr))
269
+
270
+ predictions = np.zeros((len(budgets_arr), len(self.baseline_utility_)), dtype=float)
271
+ for row_idx, budget in enumerate(budgets_arr):
272
+ predictions[row_idx] = self._solve_budget(budget, self.baseline_utility_ + shifts[row_idx])
273
+ return predictions
274
+
275
+ def simulate(self, budgets, utility_shift=None, n_draws: int = 100, random_state: Optional[int] = None) -> np.ndarray:
276
+ """Simulate stochastic budget allocations with Gumbel utility shocks."""
277
+ self._check_fitted()
278
+ budgets_arr = np.asarray(budgets, dtype=float).reshape(-1)
279
+ shifts = self._prepare_utility_shift(utility_shift, len(budgets_arr))
280
+ rng = np.random.default_rng(random_state)
281
+
282
+ sims = np.zeros((n_draws, len(budgets_arr), len(self.baseline_utility_)), dtype=float)
283
+ for draw_idx in range(n_draws):
284
+ shocks = rng.gumbel(loc=0.0, scale=1.0, size=shifts.shape)
285
+ for row_idx, budget in enumerate(budgets_arr):
286
+ sims[draw_idx, row_idx] = self._solve_budget(
287
+ budget,
288
+ self.baseline_utility_ + shifts[row_idx] + shocks[row_idx],
289
+ )
290
+ return sims
291
+
292
+ def _prepare_utility_shift(self, utility_shift, n_rows: int) -> np.ndarray:
293
+ n_alt = len(self.baseline_utility_)
294
+ if utility_shift is None:
295
+ return np.zeros((n_rows, n_alt), dtype=float)
296
+
297
+ shift_arr = np.asarray(utility_shift, dtype=float)
298
+ if shift_arr.ndim == 1:
299
+ if shift_arr.shape[0] != n_alt:
300
+ raise ValueError("utility_shift has the wrong number of alternatives")
301
+ return np.repeat(shift_arr.reshape(1, -1), n_rows, axis=0)
302
+ if shift_arr.shape != (n_rows, n_alt):
303
+ raise ValueError("utility_shift must have shape (J,) or (N, J)")
304
+ return shift_arr
305
+
306
+ def _solve_budget(self, budget: float, utility_index: np.ndarray) -> np.ndarray:
307
+ if budget <= self.tol:
308
+ return np.zeros(len(self.baseline_utility_), dtype=float)
309
+
310
+ weights = np.exp(np.clip(utility_index, -40.0, 40.0))
311
+
312
+ def alloc_for_lambda(lam: float) -> np.ndarray:
313
+ lam = max(lam, self.tol)
314
+ power = 1.0 / np.clip(1.0 - self.alpha_, self.tol, None)
315
+ raw = np.power(weights / lam, power) - self.gamma_
316
+ return np.maximum(raw, 0.0)
317
+
318
+ lo = self.tol
319
+ hi = max(np.max(weights), 1.0)
320
+ while alloc_for_lambda(hi).sum() > budget:
321
+ hi *= 2.0
322
+
323
+ for _ in range(80):
324
+ mid = 0.5 * (lo + hi)
325
+ if alloc_for_lambda(mid).sum() > budget:
326
+ lo = mid
327
+ else:
328
+ hi = mid
329
+
330
+ allocation = alloc_for_lambda(hi)
331
+ total = allocation.sum()
332
+ if total > self.tol:
333
+ allocation *= budget / total
334
+ elif self.outside_good is not None and 0 <= self.outside_good < len(allocation):
335
+ allocation[self.outside_good] = budget
336
+
337
+ residual = budget - allocation.sum()
338
+ if self.outside_good is not None and 0 <= self.outside_good < len(allocation) and residual > self.tol:
339
+ allocation[self.outside_good] += residual
340
+ return allocation
341
+
342
+ def _check_fitted(self):
343
+ if self.fit_result_ is None or self.baseline_utility_ is None:
344
+ raise RuntimeError("Model must be fit before prediction")
@@ -0,0 +1,268 @@
1
+ import math
2
+ from dataclasses import dataclass
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+ from scipy.optimize import minimize
7
+ from scipy.stats import norm, t as student_t
8
+
9
+ try:
10
+ import jax
11
+ import jax.numpy as jnp
12
+ from jax.scipy.special import ndtr as jax_ndtr
13
+ except ImportError: # pragma: no cover
14
+ jax = None
15
+ jnp = None
16
+ jax_ndtr = None
17
+
18
+ try:
19
+ from ._choice_model import DiscreteChoiceModel
20
+ except ImportError:
21
+ from _choice_model import DiscreteChoiceModel
22
+
23
+
24
+ class BinaryProbit(DiscreteChoiceModel):
25
+ """Binary probit estimated with JAX autodiff and scipy L-BFGS-B."""
26
+
27
+ def __init__(self, _jax=False):
28
+ super(BinaryProbit, self).__init__(_jax)
29
+ self.descr = "Binary Probit"
30
+ self.result = None
31
+ self._X_design = None
32
+
33
+ def setup(self, X, y, varnames=None, fit_intercept=True):
34
+ X = np.asarray(X)
35
+ y = np.asarray(y).reshape(-1)
36
+ if varnames is None:
37
+ varnames = [f"x{i}" for i in range(X.shape[1])]
38
+ self.X = X
39
+ self.y = y
40
+ self.varnames = np.asarray(varnames, dtype="<U64")
41
+ self.fit_intercept = bool(fit_intercept)
42
+ self.sample_size = int(X.shape[0])
43
+ if self.fit_intercept:
44
+ self._X_design = np.column_stack([np.ones((X.shape[0], 1)), X])
45
+ self._design_names = np.asarray(["intercept", *self.varnames], dtype="<U64")
46
+ else:
47
+ self._X_design = X.copy()
48
+ self._design_names = self.varnames.copy()
49
+ return self
50
+
51
+ def _negloglik_jax(self, params, X, y):
52
+ xb = X @ params
53
+ p = jnp.clip(jax_ndtr(xb), 1e-10, 1.0 - 1e-10)
54
+ ll = y * jnp.log(p) + (1.0 - y) * jnp.log(1.0 - p)
55
+ return -jnp.sum(ll)
56
+
57
+ def fit(self, disp=False, **fit_kwargs):
58
+ if jax is None or jnp is None or jax_ndtr is None:
59
+ raise ImportError("JAX is required for BinaryProbit")
60
+
61
+ X = jnp.asarray(self._X_design)
62
+ y = jnp.asarray(self.y)
63
+ init = np.zeros(X.shape[1], dtype=float)
64
+
65
+ val_grad = jax.jit(jax.value_and_grad(self._negloglik_jax))
66
+
67
+ def _obj(params_np):
68
+ val, grad = val_grad(jnp.asarray(params_np), X, y)
69
+ return float(val), np.asarray(grad, dtype=float)
70
+
71
+ res = minimize(
72
+ fun=lambda p: _obj(p)[0],
73
+ x0=init,
74
+ jac=lambda p: _obj(p)[1],
75
+ method="L-BFGS-B",
76
+ options={"disp": bool(disp), "maxiter": int(fit_kwargs.pop("maxiter", 1000))},
77
+ )
78
+ self.result = res
79
+ self.coeff_names = self._design_names.copy()
80
+ self.coeff_est = np.asarray(res.x, dtype=float)
81
+ self.loglik = float(-res.fun)
82
+ self.converged = bool(res.success)
83
+ self.total_fun_eval = int(getattr(res, "nfev", 0))
84
+
85
+ hess_inv = getattr(res, "hess_inv", None)
86
+ if hess_inv is not None:
87
+ if hasattr(hess_inv, "todense"):
88
+ cov = np.asarray(hess_inv.todense(), dtype=float)
89
+ else:
90
+ cov = np.asarray(hess_inv, dtype=float)
91
+ stderr = np.sqrt(np.clip(np.diag(cov), 1e-12, None))
92
+ else:
93
+ stderr = np.full_like(self.coeff_est, np.nan, dtype=float)
94
+
95
+ self.stderr = stderr
96
+ self.zvalues = self.coeff_est / np.where(stderr > 0, stderr, np.nan)
97
+ self.pvalues = 2.0 * (1.0 - norm.cdf(np.abs(self.zvalues)))
98
+ k = len(self.coeff_est)
99
+ n = max(int(self.sample_size), 1)
100
+ self.aic = float(2 * k - 2 * self.loglik)
101
+ self.bic = float(k * np.log(n) - 2 * self.loglik)
102
+ return res
103
+
104
+ def predict_proba(self, X=None):
105
+ if self.coeff_est is None:
106
+ raise RuntimeError("BinaryProbit must be fit before prediction")
107
+ X_arr = self.X if X is None else np.asarray(X)
108
+ if self.fit_intercept:
109
+ X_arr = np.column_stack([np.ones((X_arr.shape[0], 1)), X_arr])
110
+ xb = X_arr @ self.coeff_est
111
+ return norm.cdf(xb)
112
+
113
+ def summary_frame(self):
114
+ if self.coeff_est is None:
115
+ return pd.DataFrame()
116
+ return pd.DataFrame({
117
+ "coef": self.coeff_est,
118
+ "stderr": self.stderr,
119
+ "z": self.zvalues,
120
+ "pvalue": self.pvalues,
121
+ }, index=self.coeff_names)
122
+
123
+
124
+ @dataclass
125
+ class _OLSResult:
126
+ params: pd.Series
127
+ bse: pd.Series
128
+ tvalues: pd.Series
129
+ pvalues: pd.Series
130
+ llf: float
131
+
132
+
133
+ class HeckmanTwoStep(DiscreteChoiceModel):
134
+ """Heckman selection model using JAX probit + closed-form OLS second stage."""
135
+
136
+ def __init__(self, _jax=False):
137
+ super(HeckmanTwoStep, self).__init__(_jax)
138
+ self.descr = "Heckman Two-Step"
139
+ self.selection_result = None
140
+ self.outcome_result = None
141
+ self.params_table = pd.DataFrame()
142
+
143
+ def setup(
144
+ self,
145
+ selection_X,
146
+ selection_y,
147
+ outcome_X,
148
+ outcome_y,
149
+ selection_varnames=None,
150
+ outcome_varnames=None,
151
+ fit_intercept=True,
152
+ ):
153
+ selection_X = np.asarray(selection_X)
154
+ selection_y = np.asarray(selection_y).reshape(-1)
155
+ outcome_X = np.asarray(outcome_X)
156
+ outcome_y = np.asarray(outcome_y).reshape(-1)
157
+ if selection_varnames is None:
158
+ selection_varnames = [f"s{i}" for i in range(selection_X.shape[1])]
159
+ if outcome_varnames is None:
160
+ outcome_varnames = [f"o{i}" for i in range(outcome_X.shape[1])]
161
+ self.selection_X = selection_X
162
+ self.selection_y = selection_y
163
+ self.outcome_X = outcome_X
164
+ self.outcome_y = outcome_y
165
+ self.selection_varnames = np.asarray(selection_varnames, dtype="<U64")
166
+ self.outcome_varnames = np.asarray(outcome_varnames, dtype="<U64")
167
+ self.fit_intercept = bool(fit_intercept)
168
+ self.sample_size = int(selection_X.shape[0])
169
+ return self
170
+
171
+ def fit(self, disp=False, **fit_kwargs):
172
+ sel_X = np.asarray(self.selection_X, dtype=float)
173
+ out_X = np.asarray(self.outcome_X, dtype=float)
174
+ if self.fit_intercept:
175
+ sel_X = np.column_stack([np.ones((sel_X.shape[0], 1)), sel_X])
176
+ out_X = np.column_stack([np.ones((out_X.shape[0], 1)), out_X])
177
+
178
+ probit_model = BinaryProbit(_jax=True)
179
+ sel_names = (["intercept"] if self.fit_intercept else []) + list(self.selection_varnames)
180
+ probit_model.setup(sel_X[:, 1:] if self.fit_intercept else sel_X,
181
+ self.selection_y,
182
+ varnames=sel_names[1:] if self.fit_intercept else sel_names,
183
+ fit_intercept=self.fit_intercept)
184
+ probit_model.fit(disp=disp, **fit_kwargs)
185
+
186
+ xb = sel_X @ probit_model.coeff_est
187
+ mills = norm.pdf(xb) / np.clip(norm.cdf(xb), 1e-10, None)
188
+
189
+ mask = self.selection_y == 1
190
+ out_design = np.column_stack([out_X[mask], mills[mask]])
191
+ out_y = self.outcome_y[mask]
192
+
193
+ xtx = out_design.T @ out_design
194
+ xtx_inv = np.linalg.pinv(xtx)
195
+ beta = xtx_inv @ (out_design.T @ out_y)
196
+ resid = out_y - out_design @ beta
197
+ dof = max(out_design.shape[0] - out_design.shape[1], 1)
198
+ sigma2 = float((resid @ resid) / dof)
199
+ cov = sigma2 * xtx_inv
200
+ se = np.sqrt(np.clip(np.diag(cov), 1e-12, None))
201
+ tvals = beta / np.where(se > 0, se, np.nan)
202
+ pvals = 2.0 * (1.0 - student_t.cdf(np.abs(tvals), df=dof))
203
+ ll_ols = -0.5 * out_design.shape[0] * (math.log(2.0 * math.pi * sigma2) + 1.0)
204
+
205
+ out_names = (["intercept"] if self.fit_intercept else []) + list(self.outcome_varnames) + ["IMR"]
206
+ ols = _OLSResult(
207
+ params=pd.Series(beta, index=out_names),
208
+ bse=pd.Series(se, index=out_names),
209
+ tvalues=pd.Series(tvals, index=out_names),
210
+ pvalues=pd.Series(pvals, index=out_names),
211
+ llf=float(ll_ols),
212
+ )
213
+
214
+ self.selection_result = probit_model
215
+ self.outcome_result = ols
216
+ self.loglik = float(probit_model.loglik + ll_ols)
217
+ total_k = len(probit_model.coeff_est) + len(beta)
218
+ self.aic = float(2 * total_k - 2 * self.loglik)
219
+ self.bic = float(total_k * np.log(max(self.sample_size, 1)) - 2 * self.loglik)
220
+ self.converged = bool(probit_model.converged)
221
+
222
+ selection_tbl = pd.DataFrame({
223
+ "coef": probit_model.coeff_est,
224
+ "stderr": probit_model.stderr,
225
+ "z": probit_model.zvalues,
226
+ "pvalue": probit_model.pvalues,
227
+ }, index=probit_model.coeff_names)
228
+ outcome_tbl = pd.DataFrame({
229
+ "coef": ols.params,
230
+ "stderr": ols.bse,
231
+ "z": ols.tvalues,
232
+ "pvalue": ols.pvalues,
233
+ })
234
+ self.params_table = pd.concat(
235
+ {"selection": selection_tbl, "outcome": outcome_tbl},
236
+ names=["equation", "term"],
237
+ )
238
+
239
+ coeff_names = [f"selection::{name}" for name in selection_tbl.index]
240
+ coeff_names += [f"outcome::{name}" for name in outcome_tbl.index]
241
+ self.coeff_names = np.asarray(coeff_names, dtype="<U128")
242
+ self.coeff_est = np.concatenate([selection_tbl["coef"].values, outcome_tbl["coef"].values])
243
+ self.stderr = np.concatenate([selection_tbl["stderr"].values, outcome_tbl["stderr"].values])
244
+ self.zvalues = np.concatenate([selection_tbl["z"].values, outcome_tbl["z"].values])
245
+ self.pvalues = np.concatenate([selection_tbl["pvalue"].values, outcome_tbl["pvalue"].values])
246
+ return {"probit": probit_model, "ols": ols}
247
+
248
+ def predict_selection_proba(self, X=None):
249
+ if self.selection_result is None:
250
+ raise RuntimeError("HeckmanTwoStep must be fit before prediction")
251
+ X_arr = self.selection_X if X is None else np.asarray(X)
252
+ return self.selection_result.predict_proba(X_arr)
253
+
254
+ def predict_outcome(self, X=None, selection_probability=None):
255
+ if self.outcome_result is None:
256
+ raise RuntimeError("HeckmanTwoStep must be fit before prediction")
257
+ X_arr = self.outcome_X if X is None else np.asarray(X)
258
+ if self.fit_intercept:
259
+ X_arr = np.column_stack([np.ones((X_arr.shape[0], 1)), X_arr])
260
+ if selection_probability is None:
261
+ selection_probability = np.clip(self.predict_selection_proba(), 1e-10, 1 - 1e-10)
262
+ xb = norm.ppf(np.clip(selection_probability, 1e-10, 1 - 1e-10))
263
+ imr = norm.pdf(xb) / np.clip(norm.cdf(xb), 1e-10, None)
264
+ X_aug = np.column_stack([X_arr, imr])
265
+ return X_aug @ self.outcome_result.params.values
266
+
267
+ def summary_frame(self):
268
+ return self.params_table.copy()
@@ -1102,7 +1102,7 @@ class SA(Search):
1102
1102
  # {
1103
1103
  if overall_best_solution is None or \
1104
1104
  is_better(self.best_sol.obj(0), overall_best_solution.obj(0), self.param.sign_crit(0)):
1105
- overall_best_solution = self.best_sol # Update overall best solution
1105
+ overall_best_solution = self.copy_solution(self.best_sol) # Update overall best solution (deep copy to prevent overwriting)
1106
1106
  elif overall_best_solution is not None and \
1107
1107
  is_worse(self.best_sol.obj(0), overall_best_solution.obj(0), self.param.sign_crit(0)):
1108
1108
  self.update_best(overall_best_solution) # Revise best solution of current SA solver
@@ -448,7 +448,7 @@ class TA(Search):
448
448
  # {
449
449
  if overall_best_solution is None or \
450
450
  is_better(self.best_sol.obj(0), overall_best_solution.obj(0), self.param.sign_crit(0)):
451
- overall_best_solution = self.best_sol # Update overall best solution
451
+ overall_best_solution = self.copy_solution(self.best_sol) # Update overall best solution (deep copy to prevent overwriting)
452
452
  elif overall_best_solution is not None and \
453
453
  is_worse(self.best_sol.obj(0), overall_best_solution.obj(0), self.param.sign_crit(0)):
454
454
  self.update_best(overall_best_solution) # Revise best solution of current TA solver
@@ -0,0 +1 @@
1
+ 0.0.85
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: SearchLibrium
3
- Version: 0.0.83
3
+ Version: 0.0.85
4
4
  Summary: A Python package for econometric models driven by search
5
5
  Author: Alexander Paz Prithvi Beeramole, Robert Burdett
6
6
  Author-email: Zeke Ahern <z.ahern@qut.edu.au>
@@ -17,6 +17,7 @@ src/SearchLibrium/harmony.py
17
17
  src/SearchLibrium/latent_class.py
18
18
  src/SearchLibrium/main.py
19
19
  src/SearchLibrium/main_debug.py
20
+ src/SearchLibrium/mdcev.py
20
21
  src/SearchLibrium/misc.py
21
22
  src/SearchLibrium/mixed_logit.py
22
23
  src/SearchLibrium/mixed_nested.py
@@ -28,6 +29,7 @@ src/SearchLibrium/ordered_logit.py
28
29
  src/SearchLibrium/ordered_logit_mixed.py
29
30
  src/SearchLibrium/rrm.py
30
31
  src/SearchLibrium/search.py
32
+ src/SearchLibrium/selection_models.py
31
33
  src/SearchLibrium/setup.py
32
34
  src/SearchLibrium/siman.py
33
35
  src/SearchLibrium/threshold.py
@@ -1 +0,0 @@
1
- 0.0.83
File without changes
File without changes