itlog 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. itlog-0.1.0/LICENSE +21 -0
  2. itlog-0.1.0/PKG-INFO +441 -0
  3. itlog-0.1.0/README.md +409 -0
  4. itlog-0.1.0/itlog/__init__.py +56 -0
  5. itlog-0.1.0/itlog/backends/__init__.py +5 -0
  6. itlog-0.1.0/itlog/backends/jax_backend.py +67 -0
  7. itlog-0.1.0/itlog/backends/likelihoods.py +220 -0
  8. itlog-0.1.0/itlog/backends/numpy_kernels.py +488 -0
  9. itlog-0.1.0/itlog/benchmarks/__init__.py +0 -0
  10. itlog-0.1.0/itlog/benchmarks/electricity.py +31 -0
  11. itlog-0.1.0/itlog/benchmarks/runner.py +96 -0
  12. itlog-0.1.0/itlog/benchmarks/swissmetro.py +118 -0
  13. itlog-0.1.0/itlog/core/__init__.py +11 -0
  14. itlog-0.1.0/itlog/core/objective.py +36 -0
  15. itlog-0.1.0/itlog/core/optimizer.py +98 -0
  16. itlog-0.1.0/itlog/data.py +362 -0
  17. itlog-0.1.0/itlog/distributions.py +76 -0
  18. itlog-0.1.0/itlog/draws.py +60 -0
  19. itlog-0.1.0/itlog/expr/__init__.py +27 -0
  20. itlog-0.1.0/itlog/expr/compile.py +164 -0
  21. itlog-0.1.0/itlog/expr/nodes.py +122 -0
  22. itlog-0.1.0/itlog/latex.py +131 -0
  23. itlog-0.1.0/itlog/metrics.py +35 -0
  24. itlog-0.1.0/itlog/models/__init__.py +24 -0
  25. itlog-0.1.0/itlog/models/base.py +140 -0
  26. itlog-0.1.0/itlog/models/cross_nested.py +108 -0
  27. itlog-0.1.0/itlog/models/latent_class.py +98 -0
  28. itlog-0.1.0/itlog/models/membership.py +17 -0
  29. itlog-0.1.0/itlog/models/mixed.py +175 -0
  30. itlog-0.1.0/itlog/models/mnl.py +59 -0
  31. itlog-0.1.0/itlog/models/nest.py +18 -0
  32. itlog-0.1.0/itlog/models/nested.py +128 -0
  33. itlog-0.1.0/itlog/models/ordered.py +106 -0
  34. itlog-0.1.0/itlog/results.py +90 -0
  35. itlog-0.1.0/itlog/suite.py +187 -0
  36. itlog-0.1.0/itlog.egg-info/PKG-INFO +441 -0
  37. itlog-0.1.0/itlog.egg-info/SOURCES.txt +47 -0
  38. itlog-0.1.0/itlog.egg-info/dependency_links.txt +1 -0
  39. itlog-0.1.0/itlog.egg-info/requires.txt +9 -0
  40. itlog-0.1.0/itlog.egg-info/top_level.txt +1 -0
  41. itlog-0.1.0/pyproject.toml +81 -0
  42. itlog-0.1.0/setup.cfg +4 -0
  43. itlog-0.1.0/tests/test_benchmarks_smoke.py +40 -0
  44. itlog-0.1.0/tests/test_latex.py +56 -0
  45. itlog-0.1.0/tests/test_metrics.py +20 -0
  46. itlog-0.1.0/tests/test_parity_mxl.py +84 -0
  47. itlog-0.1.0/tests/test_parity_swissmetro.py +119 -0
  48. itlog-0.1.0/tests/test_results.py +46 -0
  49. itlog-0.1.0/tests/test_suite.py +104 -0
itlog-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 itlog contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
itlog-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,441 @@
1
+ Metadata-Version: 2.4
2
+ Name: itlog
3
+ Version: 0.1.0
4
+ Summary: Declarative, JAX-backed discrete choice modeling for Python (MNL, Nested, Cross-Nested, Mixed, Latent Class, Ordered)
5
+ Author: itlog contributors
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/cobylim/itlog
8
+ Project-URL: Repository, https://github.com/cobylim/itlog
9
+ Project-URL: Issues, https://github.com/cobylim/itlog/issues
10
+ Keywords: discrete-choice,logit,mixed-logit,nested-logit,latent-class,econometrics,choice-modeling,jax
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Scientific/Engineering
19
+ Classifier: Topic :: Scientific/Engineering :: Mathematics
20
+ Requires-Python: >=3.10
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Requires-Dist: jax>=0.4.20
24
+ Requires-Dist: jaxlib>=0.4.20
25
+ Requires-Dist: numpy>=1.24
26
+ Requires-Dist: pandas>=2.0
27
+ Requires-Dist: scipy>=1.11
28
+ Provides-Extra: dev
29
+ Requires-Dist: pytest>=7.4; extra == "dev"
30
+ Requires-Dist: pytest-cov>=4.1; extra == "dev"
31
+ Dynamic: license-file
32
+
33
+ # itlog
34
+
35
+ Next-generation Python discrete choice modeling with a type-safe, declarative
36
+ API and a JAX/XLA backend that runs unchanged on CPU or GPU.
37
+
38
+ itlog separates *what your data is* from *what your model is* from *how it is
39
+ estimated*. That separation is the whole design, and it maps onto three steps:
40
+
41
+ | Step | Name | You write | itlog gives you |
42
+ |------|------|-----------|-----------------|
43
+ | **0** | **Declaration** | A `ChoiceDataset` schema class | A validated, canonical long-format tensor representation |
44
+ | **1** | **Formulation** | Utility equations with `Parameter` / `Var` | A model checked against your data *before* any optimization |
45
+ | **2** | **Execution** | `model.fit(dataset, ...)` | Estimates, standard errors, fit metrics, LaTeX |
46
+
47
+ Supported model families (v0.1.0):
48
+
49
+ - **Multinomial Logit (MNL)** — fixed coefficients.
50
+ - **Nested Logit (NL)** — GEV nests with alternative-specific nest scales.
51
+ - **Cross-Nested Logit (CNL)** — allocations across overlapping nests.
52
+ - **Mixed Logit (MXL)** — random coefficients (Normal, LogNormal, Triangular,
53
+ Uniform, TruncatedNormal), optional correlation, Halton or pseudo-random draws.
54
+ - **Latent Class (LC)** — class-specific MNL with softmax membership.
55
+ - **Ordered Logit / Probit** — single-index ordinal models with monotone thresholds.
56
+
57
+ ---
58
+
59
+ ## Quickstart
60
+
61
+ ```bash
62
+ pip install itlog
63
+ ```
64
+
65
+ Describe the data once, write utilities as plain Python expressions, and call
66
+ `fit` — the whole flow is a handful of lines:
67
+
68
+ ```python
69
+ import pandas as pd
70
+ import itlog as it
71
+
72
+ # 1. Declare the data (a small inline schema is all you need to get going)
73
+ class Travel(it.ChoiceDataset):
74
+ choice = it.Field(index=True, mapping={1: "train", 2: "car"})
75
+ travel_time = it.Field(sources={"train": "train_time", "car": "car_time"})
76
+ travel_cost = it.Field(sources={"train": "train_cost", "car": "car_cost"})
77
+
78
+ df = pd.DataFrame({
79
+ "choice": [1, 2, 1, 2],
80
+ "train_time": [40, 41, 35, 50], "car_time": [30, 25, 40, 35],
81
+ "train_cost": [3.0, 3.2, 2.8, 3.5], "car_cost": [5.0, 4.0, 6.0, 4.5],
82
+ })
83
+ data = Travel.from_pandas(df)
84
+
85
+ # 2. Formulate utilities
86
+ b_time, b_cost, asc_train = it.Parameter("b_time"), it.Parameter("b_cost"), it.Parameter("asc_train")
87
+ V = b_time * it.Var("travel_time") + b_cost * it.Var("travel_cost")
88
+ model = it.MultinomialLogit(utilities={"train": asc_train + V, "car": V})
89
+
90
+ # 3. Estimate
91
+ result = model.fit(data)
92
+ print(result.summary())
93
+ ```
94
+
95
+ Swap `MultinomialLogit` for `NestedLogit`, `MixedLogit`, `LatentClass`,
96
+ `OrderedLogit`, or any other family — the three-step flow below is identical.
97
+
98
+ ---
99
+
100
+ ## The three steps
101
+
102
+ ### Step 0 — Declaration *(the data step)*
103
+
104
+ > **The declarative schema is optional.** For a quick fit you can keep the schema
105
+ > tiny — just point `Field`s at your columns, as in the [Quickstart](#quickstart),
106
+ > and you are done. The full machinery below (row filters, derived variables,
107
+ > availability masks, panel ids) pays off for **complex, real-world datasets** and
108
+ > for **long-term model development**, where declaring the data once keeps every
109
+ > downstream model consistent and reproducible. If you just need something quick,
110
+ > you do not need any of it.
111
+
112
+ Real choice data is messy: it arrives **wide** (one row per choice situation,
113
+ alternative-specific attributes spread across columns) or **long** (one row per
114
+ alternative), with availability flags, alternative-specific variables, free
115
+ tickets, panel/repeated-choice structure, and filtering rules buried in the raw
116
+ file. Step 0 is where you tame that *once*, declaratively, instead of threading
117
+ reshape logic through the rest of your analysis.
118
+
119
+ You declare the dataset as a **class**. Each `Field` describes how a raw column
120
+ (or set of columns) maps into itlog's canonical long-format tensors. itlog
121
+ handles the wide→long reshape, builds the availability matrix, indexes the
122
+ choice and panel structure, and validates everything on ingestion.
123
+
124
+ **Why a declarative schema (use cases):**
125
+
126
+ - **Wide datasets with alternative-specific columns** — `sources={...}` collapses
127
+ `TRAIN_TT`, `SM_TT`, `CAR_TT` into a single `travel_time` variable without a
128
+ manual `melt`/`pivot`.
129
+ - **Complex, real-world data** — row filtering (`filter_rows`), derived variables,
130
+ availability masks, and free-ticket / scaling logic live in *one declared
131
+ place* and are reused across every model you fit on that data.
132
+ - **Panel / repeated-choice data** — a single `panel_id` field tells itlog which
133
+ rows share random draws in Mixed Logit; the simulator groups them for you.
134
+ - **Reproducibility & validation** — the same schema yields the same canonical
135
+ tensors every time, and the model is validated against the schema before
136
+ estimation, so typos and shape mismatches fail fast with a clear message.
137
+
138
+ Wide format:
139
+
140
+ ```python
141
+ import pandas as pd
142
+ import itlog as it
143
+
144
+ class SwissmetroWide(it.ChoiceDataset):
145
+ choice = it.Field(index=True, mapping={1: "train", 2: "sm", 3: "car"})
146
+ travel_time = it.Field(sources={"train": "TRAIN_TT", "sm": "SM_TT", "car": "CAR_TT"})
147
+ travel_cost = it.Field(sources={"train": "TRAIN_CO", "sm": "SM_CO", "car": "CAR_CO"})
148
+ availability = it.Field(
149
+ availability=True,
150
+ sources={"train": "TRAIN_AV", "sm": "SM_AV", "car": "CAR_AV"},
151
+ )
152
+
153
+ @classmethod
154
+ def filter_rows(cls, df):
155
+ return df[(df["PURPOSE"].isin([1, 3])) & (df["CHOICE"] != 0)].copy()
156
+
157
+ dataset = SwissmetroWide.from_pandas(pd.read_csv("swissmetro.dat", sep="\t"))
158
+ ```
159
+
160
+ Long format with a panel id (for Mixed Logit):
161
+
162
+ ```python
163
+ class ElectricityLong(it.ChoiceDataset):
164
+ observation_id = it.Field(session_id=True, source="chid")
165
+ alternative = it.Field(alternative_id=True, source="alt")
166
+ choice = it.Field(is_choice_indicator=True, source="choice")
167
+ panel_id = it.Field(panel_id=True, source="id") # rows sharing draws
168
+ pf = it.Field(source="pf")
169
+ cl = it.Field(source="cl")
170
+
171
+ dataset = ElectricityLong.from_csv("electricity_long.csv")
172
+ ```
173
+
174
+ See [`examples/declarative_schema.py`](examples/declarative_schema.py) for a
175
+ runnable tour that inspects the canonical tensors produced by each schema.
176
+
177
+ ### Step 1 — Formulation *(the model step)*
178
+
179
+ Utilities are written as ordinary Python expressions using `Parameter` and
180
+ `Var`, with operator overloading building the symbolic utility for each
181
+ alternative. Coefficients can be shared across alternatives (conditional logit)
182
+ or be alternative-specific. The model is validated against the dataset's
183
+ feature names and alternatives **before** any optimization runs.
184
+
185
+ ```python
186
+ b_time = it.Parameter("b_time")
187
+ b_cost = it.Parameter("b_cost")
188
+ asc_sm = it.Parameter("asc_sm")
189
+ asc_car = it.Parameter("asc_car")
190
+
191
+ model = it.MultinomialLogit(utilities={
192
+ "train": b_time * it.Var("travel_time") + b_cost * it.Var("travel_cost"),
193
+ "sm": b_time * it.Var("travel_time") + b_cost * it.Var("travel_cost") + asc_sm,
194
+ "car": b_time * it.Var("travel_time") + b_cost * it.Var("travel_cost") + asc_car,
195
+ })
196
+ ```
197
+
198
+ For Mixed Logit, declare a coefficient as random by giving it a distribution;
199
+ itlog estimates its mean and standard deviation by simulated maximum likelihood:
200
+
201
+ ```python
202
+ RANDOM = ["pf", "cl", "loc", "wk", "tod", "seas"]
203
+ params = {name: it.Parameter(name, distribution="Normal") for name in RANDOM}
204
+ utility = sum(params[name] * it.Var(name) for name in RANDOM)
205
+ model = it.MixedLogit(utilities={alt: utility for alt in (1, 2, 3, 4)})
206
+ ```
207
+
208
+ **Nested Logit** — nest membership and nest scales:
209
+
210
+ ```python
211
+ model = it.NestedLogit(
212
+ utilities=V,
213
+ nests={
214
+ "public": it.Nest(["train", "sm"], scale=it.Parameter("mu_public")),
215
+ "private": it.Nest(["car"]),
216
+ },
217
+ )
218
+ ```
219
+
220
+ **Cross-Nested Logit** — allocation weights per alternative×nest:
221
+
222
+ ```python
223
+ model = it.CrossNestedLogit(
224
+ utilities=V,
225
+ nests={"public": it.Nest(scale=it.Parameter("mu_pub")), "fast": it.Nest(scale=it.Parameter("mu_fast"))},
226
+ allocations={"train": {"public": it.Parameter("a_train_pub"), "fast": 1.0}, "sm": {"public": 1.0}, "car": {"fast": 1.0}},
227
+ )
228
+ ```
229
+
230
+ **Latent Class** — per-class utilities and membership logits:
231
+
232
+ ```python
233
+ model = it.LatentClass(
234
+ classes=[utilities_class1, utilities_class2],
235
+ membership=it.ClassMembership({"class2": it.Parameter("g0") + it.Parameter("g_inc") * it.Var("income")}),
236
+ )
237
+ ```
238
+
239
+ **Ordered Logit** — ordinal outcome on the dataset schema:
240
+
241
+ ```python
242
+ model = it.OrderedLogit(
243
+ utility=it.Parameter("b_age") * it.Var("age") + it.Parameter("b_inc") * it.Var("income"),
244
+ outcome="rating",
245
+ n_categories=5,
246
+ )
247
+ ```
248
+
249
+ ### Step 2 — Execution *(the estimation step)*
250
+
251
+ `model.fit(...)` estimates the parameters and returns a result object with the
252
+ log-likelihood, standard errors, and fit metrics. The same call runs on CPU or
253
+ GPU via the `engine` argument — the model and data declarations are unchanged.
254
+
255
+ ```python
256
+ results = model.fit(dataset, method="BFGS") # CPU (default)
257
+ # results = model.fit(dataset, method="BFGS", engine="gpu") # GPU, if available
258
+
259
+ print(results.summary())
260
+ print(f"AIC={results.aic:.2f}, BIC={results.bic:.2f}")
261
+ ```
262
+
263
+ Mixed Logit takes the number of simulation draws and a seed:
264
+
265
+ ```python
266
+ import numpy as np
267
+ results = model.fit(dataset, method="BFGS", init=np.zeros(12), num_draws=600, seed=123)
268
+ ```
269
+
270
+ Pass `compute_se=False` to skip the standard-error (Hessian) computation when you
271
+ only need point estimates (e.g. inside timing loops).
272
+
273
+ ---
274
+
275
+ ## Estimation engine
276
+
277
+ | `engine` | Backend | Notes |
278
+ |----------|---------|-------|
279
+ | `cpu` (default) | NumPy analytic kernels | Differenced-utility MNL and simulated MXL with closed-form gradients |
280
+ | `gpu` | JAX / XLA JIT | Same model, same data; offloads the hot kernels to the GPU |
281
+
282
+ Both paths run in **double precision** for estimator-grade accuracy. The CPU
283
+ kernels use the xlogit-style differenced-utility formulation,
284
+ `P = 1 / (1 + Σ exp(Vd))`, with analytic gradients; the GPU path uses JAX
285
+ `jit`/`grad` on the same objective. GPU is the intended path for large
286
+ simulated Mixed Logit workloads.
287
+
288
+ ## Model suites
289
+
290
+ Fit and compare multiple specifications on one dataset, then rank them by any
291
+ fit metric:
292
+
293
+ ```python
294
+ suite = it.ModelSuite("swissmetro-mnl")
295
+ suite.add("base", model)
296
+ suite.add("extended", extended_model)
297
+
298
+ report = suite.fit(dataset, method="BFGS", n_jobs="auto")
299
+ print(report.compare()) # log-likelihood, AIC, BIC, rho-squared per model
300
+ print(report.rank(by="bic")) # ranked comparison table
301
+ print(report["base"].to_dataframe())
302
+ ```
303
+
304
+ ## LaTeX export
305
+
306
+ Every model and result can emit publication-ready LaTeX — utility equations,
307
+ parameter tables, and full suite comparisons:
308
+
309
+ ```python
310
+ print(model.to_latex(labels={"b_time": r"\beta_{\text{time}}"})) # aligned utility equations
311
+ print(result.to_latex()) # parameter table with std errors
312
+ print(report.to_latex(include_parameters=True)) # comparison + per-model appendix
313
+ ```
314
+
315
+ ## Library comparison
316
+
317
+ ### Model & feature support
318
+
319
+ | Capability | itlog | pylogit | xlogit | biogeme | apollo |
320
+ |------------|:-----:|:-------:|:------:|:-------:|:------:|
321
+ | MNL / conditional logit | ✓ | ✓ | ✓ | ✓ | ✓ |
322
+ | Nested logit | ✓ | ✓ | — | ✓ | ✓ |
323
+ | Cross-nested logit | ✓ | — | — | ✓ | ✓ |
324
+ | Mixed logit | ✓ | normal only | ✓ | ✓ | ✓ |
325
+ | Mixing distributions (n/ln/t/u/tn) | ✓ | n only | ✓ | ✓ | ✓ |
326
+ | Correlated random coefficients | ✓ | — | — | ✓ | ✓ |
327
+ | Latent class | ✓ | — | — | ✓ | ✓ |
328
+ | Ordered logit / probit | ✓ | — | — | ✓ | ✓ |
329
+ | Asymmetric closed-form models (scobit, clog-log, uneven) | — | ✓ | — | — | — |
330
+ | Halton / quasi-random draws | ✓ | ✓ | ✓ | ✓ | ✓ |
331
+ | Panel / mixing id | ✓ | ✓ | ✓ | ✓ | ✓ |
332
+ | Declarative data schema | ✓ | — | — | — | — |
333
+ | GPU acceleration | ✓ (JAX/XLA) | — | ✓ (CuPy) | — | — |
334
+ | Python-native API | ✓ | ✓ | ✓ | ✓ | R |
335
+
336
+ Notes:
337
+
338
+ - **pylogit** is broader than mixed logit alone: besides MNL, nested, and mixed
339
+ logit (Normal mixing only), it uniquely offers a family of **asymmetric,
340
+ closed-form choice models** (scobit, clog-log, uneven, asymmetric logit) that
341
+ itlog does not implement.
342
+ - **xlogit** is built for **GPU-accelerated mixed logit** (enable by installing
343
+ CuPy); it covers MNL and mixed logit with the full distribution set but does
344
+ not do nested, cross-nested, latent class, ordered, or correlated random
345
+ parameters.
346
+ - itlog roadmap (documented, not yet implemented): ICLV/hybrid, MDCEV,
347
+ multinomial probit, exploded logit, and pylogit-style asymmetric utilities.
348
+
349
+ ### Measured accuracy & speed
350
+
351
+ Live three-way run on an M4 MacBook Air (CPU), identical specifications, 600 MXL
352
+ draws. itlog and xlogit time the point-estimate path (`compute_se=False` /
353
+ `skip_std_errs`); pylogit computes standard errors by default. Full table and
354
+ methodology in [`docs/benchmark-results.md`](docs/benchmark-results.md);
355
+ reproduce with the scripts in [`benchmarks/`](benchmarks/README.md).
356
+
357
+ **Speed** (warm median, seconds — lower is better):
358
+
359
+ | Benchmark | itlog | pylogit | xlogit |
360
+ |-----------|------:|--------:|-------:|
361
+ | Swissmetro 4p MNL | **0.01** | 0.10 | 0.005 |
362
+ | Swissmetro 14p MNL | **0.02** | 0.11 | 0.006 |
363
+ | Electricity MXL (600 draws) | **6.3** | 38.0 | 4.1 |
364
+
365
+ itlog is **~8–10× faster than pylogit** on MNL and **~6× faster** on the MXL,
366
+ and lands within ~1.3–1.5× of xlogit's CPU kernels — before moving to GPU.
367
+
368
+ **Accuracy** (final log-likelihood):
369
+
370
+ | Benchmark | itlog | pylogit | xlogit |
371
+ |-----------|------:|--------:|-------:|
372
+ | Swissmetro 4p MNL | -5331.2520 | -5331.2520 | -5331.2520 |
373
+ | Swissmetro 14p MNL | -5159.2583 | -5159.2583 | -5564.1788† |
374
+ | Electricity MXL (600 draws) | -3887.92 | -3910.32 | -3901.19 |
375
+
376
+ itlog matches pylogit's MNL log-likelihoods to ~1e-5. †xlogit uses a different
377
+ intercept parameterization on the 14p spec. MXL log-likelihoods differ across
378
+ all three because each library draws from a different random-number generator
379
+ (simulation variance, which shrinks as `num_draws` grows), not estimation error.
380
+
381
+ ## Installation
382
+
383
+ ```bash
384
+ python3.12 -m venv .venv
385
+ source .venv/bin/activate
386
+ pip install -e ".[dev]"
387
+ ```
388
+
389
+ Requires Python >= 3.10. Core dependencies: `jax`, `jaxlib`, `numpy`, `pandas`,
390
+ `scipy`.
391
+
392
+ ## Examples
393
+
394
+ Runnable scripts under `examples/` (run from the repo root, e.g.
395
+ `python examples/swissmetro_mnl_wide.py`):
396
+
397
+ | Script | Step(s) shown |
398
+ |--------|---------------|
399
+ | [`examples/declarative_schema.py`](examples/declarative_schema.py) | **0** — wide & long schemas and the canonical tensors |
400
+ | [`examples/swissmetro_mnl_wide.py`](examples/swissmetro_mnl_wide.py) | 0→2 — wide-format MNL on Swissmetro |
401
+ | [`examples/electricity_mxl_long.py`](examples/electricity_mxl_long.py) | 0→2 — long-format Mixed Logit on Electricity panel data |
402
+ | [`examples/model_suite_comparison.py`](examples/model_suite_comparison.py) | Fit and rank multiple specifications |
403
+ | [`examples/latex_export.py`](examples/latex_export.py) | Export equations and tables to LaTeX |
404
+ | [`examples/gpu_fit.py`](examples/gpu_fit.py) | Fit with `engine="gpu"`, with graceful CPU fallback |
405
+ | [`examples/benchmarks_cpu_gpu.py`](examples/benchmarks_cpu_gpu.py) | CPU/GPU cold vs warm timing |
406
+
407
+ Cross-library benchmark scripts (itlog vs pylogit/xlogit/biogeme) live in
408
+ [`benchmarks/`](benchmarks/README.md).
409
+
410
+ ## Testing
411
+
412
+ ```bash
413
+ pytest -q # full suite with ≥95% coverage gate
414
+ pytest -q -m "not slow" # fast unit tests only
415
+ ```
416
+
417
+ Test layout mirrors the package: `tests/expr/`, `tests/core/`,
418
+ `tests/backends/`, `tests/distributions/`, `tests/models/`.
419
+
420
+ ## Accuracy
421
+
422
+ itlog reproduces the published Swissmetro MNL log-likelihoods exactly to four
423
+ decimals (4-parameter: **-5331.2520**; 14-parameter: **-5159.2583**), and the
424
+ simulated Mixed Logit kernel agrees with an independent reference and with JAX
425
+ autodiff to machine precision. Correctness is pinned by
426
+ [`tests/backends/test_numpy_kernels.py`](tests/backends/test_numpy_kernels.py):
427
+ differenced-utility vs full-softmax equivalence, analytic gradients/Hessian vs
428
+ finite differences, and the NumPy CPU path vs the JAX path. New model families
429
+ (NL, CNL, LC, Ordered) use JAX autodiff kernels validated in
430
+ [`tests/backends/test_likelihoods.py`](tests/backends/test_likelihoods.py) and
431
+ degenerate-equivalence tests under `tests/models/`.
432
+
433
+ ## Documentation
434
+
435
+ - [`docs/feature-map.md`](docs/feature-map.md) — data schemas, model suites, LaTeX export.
436
+ - [`docs/benchmarks.md`](docs/benchmarks.md) — validation methodology and parity discussion.
437
+ - [`docs/benchmark-results.md`](docs/benchmark-results.md) — measured speed and accuracy (M4 CPU).
438
+
439
+ ## License
440
+
441
+ MIT — see [LICENSE](LICENSE).