firthmodels 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- firthmodels-0.1.0/.github/workflows/ci.yml +46 -0
- firthmodels-0.1.0/.github/workflows/publish.yml +101 -0
- firthmodels-0.1.0/.gitignore +209 -0
- firthmodels-0.1.0/.pre-commit-config.yaml +20 -0
- firthmodels-0.1.0/.python-version +1 -0
- firthmodels-0.1.0/CITATION.cff +11 -0
- firthmodels-0.1.0/LICENSE +21 -0
- firthmodels-0.1.0/PKG-INFO +188 -0
- firthmodels-0.1.0/README.md +160 -0
- firthmodels-0.1.0/pyproject.toml +74 -0
- firthmodels-0.1.0/src/firthmodels/__init__.py +5 -0
- firthmodels-0.1.0/src/firthmodels/_solvers.py +96 -0
- firthmodels-0.1.0/src/firthmodels/_utils.py +26 -0
- firthmodels-0.1.0/src/firthmodels/logistic.py +744 -0
- firthmodels-0.1.0/src/firthmodels/py.typed +0 -0
- firthmodels-0.1.0/tests/conftest.py +29 -0
- firthmodels-0.1.0/tests/test_logistic.py +110 -0
- firthmodels-0.1.0/uv.lock +715 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [ main ]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [ main ]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
lint:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v6
|
|
14
|
+
- uses: astral-sh/setup-uv@v7
|
|
15
|
+
- run: uv sync --frozen
|
|
16
|
+
- run: uv run ruff check
|
|
17
|
+
- run: uv run ruff format --check
|
|
18
|
+
|
|
19
|
+
typecheck:
|
|
20
|
+
runs-on: ubuntu-latest
|
|
21
|
+
steps:
|
|
22
|
+
- uses: actions/checkout@v6
|
|
23
|
+
- uses: astral-sh/setup-uv@v7
|
|
24
|
+
- run: uv sync --frozen
|
|
25
|
+
- run: uv run mypy src/
|
|
26
|
+
|
|
27
|
+
test:
|
|
28
|
+
runs-on: ubuntu-latest
|
|
29
|
+
strategy:
|
|
30
|
+
matrix:
|
|
31
|
+
include:
|
|
32
|
+
- python-version: "3.11"
|
|
33
|
+
uv-args: "--resolution lowest-direct"
|
|
34
|
+
- python-version: "3.12"
|
|
35
|
+
uv-args: ""
|
|
36
|
+
- python-version: "3.13"
|
|
37
|
+
uv-args: ""
|
|
38
|
+
- python-version: "3.14"
|
|
39
|
+
uv-args: ""
|
|
40
|
+
steps:
|
|
41
|
+
- uses: actions/checkout@v6
|
|
42
|
+
- uses: astral-sh/setup-uv@v7
|
|
43
|
+
with:
|
|
44
|
+
python-version: ${{ matrix.python-version }}
|
|
45
|
+
- run: uv sync ${{ matrix.uv-args }}
|
|
46
|
+
- run: uv run pytest
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
name: Publish
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*'
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
build:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v6
|
|
13
|
+
- uses: astral-sh/setup-uv@v7
|
|
14
|
+
with:
|
|
15
|
+
python-version: "3.12"
|
|
16
|
+
|
|
17
|
+
- name: Verify tag matches package version
|
|
18
|
+
run: |
|
|
19
|
+
TAG_VERSION=${GITHUB_REF_NAME#v}
|
|
20
|
+
PKG_VERSION=$(python -c "import tomllib; print(tomllib.load(open('pyproject.toml', 'rb'))['project']['version'])")
|
|
21
|
+
echo "Git Tag: $TAG_VERSION"
|
|
22
|
+
echo "Pkg Ver: $PKG_VERSION"
|
|
23
|
+
if [ "$TAG_VERSION" != "$PKG_VERSION" ]; then
|
|
24
|
+
echo "::error::Tag version ($TAG_VERSION) doesn't match pyproject.toml ($PKG_VERSION)"
|
|
25
|
+
exit 1
|
|
26
|
+
fi
|
|
27
|
+
|
|
28
|
+
- run: uv build --no-sources
|
|
29
|
+
- uses: actions/upload-artifact@v5
|
|
30
|
+
with:
|
|
31
|
+
name: dist
|
|
32
|
+
path: dist/
|
|
33
|
+
|
|
34
|
+
publish-testpypi:
|
|
35
|
+
needs: build
|
|
36
|
+
runs-on: ubuntu-latest
|
|
37
|
+
environment:
|
|
38
|
+
name: testpypi
|
|
39
|
+
url: https://test.pypi.org/project/firthmodels/
|
|
40
|
+
permissions:
|
|
41
|
+
id-token: write
|
|
42
|
+
steps:
|
|
43
|
+
- uses: actions/download-artifact@v6
|
|
44
|
+
with:
|
|
45
|
+
name: dist
|
|
46
|
+
path: dist/
|
|
47
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
48
|
+
with:
|
|
49
|
+
repository-url: https://test.pypi.org/legacy/
|
|
50
|
+
skip-existing: true
|
|
51
|
+
|
|
52
|
+
test-testpypi:
|
|
53
|
+
needs: publish-testpypi
|
|
54
|
+
runs-on: ubuntu-latest
|
|
55
|
+
steps:
|
|
56
|
+
- uses: actions/checkout@v6
|
|
57
|
+
|
|
58
|
+
- name: Set up Python
|
|
59
|
+
uses: actions/setup-python@v6
|
|
60
|
+
with:
|
|
61
|
+
python-version: "3.11"
|
|
62
|
+
|
|
63
|
+
- name: Install uv
|
|
64
|
+
uses: astral-sh/setup-uv@v7
|
|
65
|
+
|
|
66
|
+
- name: Install from TestPyPI
|
|
67
|
+
run: |
|
|
68
|
+
VERSION=${GITHUB_REF_NAME#v}
|
|
69
|
+
for i in {1..30}; do
|
|
70
|
+
if uv pip install --system \
|
|
71
|
+
--index-url https://test.pypi.org/simple/ \
|
|
72
|
+
--extra-index-url https://pypi.org/simple/ \
|
|
73
|
+
"firthmodels==$VERSION"; then
|
|
74
|
+
exit 0
|
|
75
|
+
fi
|
|
76
|
+
echo "Attempt $i failed, waiting 10s..."
|
|
77
|
+
sleep 10
|
|
78
|
+
done
|
|
79
|
+
echo "Failed to install after 30 attempts"
|
|
80
|
+
exit 1
|
|
81
|
+
|
|
82
|
+
- name: Verify import and run tests
|
|
83
|
+
run: |
|
|
84
|
+
python -c "from firthmodels import FirthLogisticRegression; print('Import OK')"
|
|
85
|
+
uv pip install --system pytest
|
|
86
|
+
pytest tests/
|
|
87
|
+
|
|
88
|
+
publish-pypi:
|
|
89
|
+
needs: test-testpypi
|
|
90
|
+
runs-on: ubuntu-latest
|
|
91
|
+
environment:
|
|
92
|
+
name: pypi
|
|
93
|
+
url: https://pypi.org/project/firthmodels/
|
|
94
|
+
permissions:
|
|
95
|
+
id-token: write
|
|
96
|
+
steps:
|
|
97
|
+
- uses: actions/download-artifact@v6
|
|
98
|
+
with:
|
|
99
|
+
name: dist
|
|
100
|
+
path: dist/
|
|
101
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[codz]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
# Usually these files are written by a python script from a template
|
|
31
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
32
|
+
*.manifest
|
|
33
|
+
*.spec
|
|
34
|
+
|
|
35
|
+
# Installer logs
|
|
36
|
+
pip-log.txt
|
|
37
|
+
pip-delete-this-directory.txt
|
|
38
|
+
|
|
39
|
+
# Unit test / coverage reports
|
|
40
|
+
htmlcov/
|
|
41
|
+
.tox/
|
|
42
|
+
.nox/
|
|
43
|
+
.coverage
|
|
44
|
+
.coverage.*
|
|
45
|
+
.cache
|
|
46
|
+
nosetests.xml
|
|
47
|
+
coverage.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py.cover
|
|
50
|
+
.hypothesis/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
cover/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
.pybuilder/
|
|
76
|
+
target/
|
|
77
|
+
|
|
78
|
+
# Jupyter Notebook
|
|
79
|
+
.ipynb_checkpoints
|
|
80
|
+
|
|
81
|
+
# IPython
|
|
82
|
+
profile_default/
|
|
83
|
+
ipython_config.py
|
|
84
|
+
|
|
85
|
+
# pyenv
|
|
86
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
87
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
88
|
+
# .python-version
|
|
89
|
+
|
|
90
|
+
# pipenv
|
|
91
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
92
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
93
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
94
|
+
# install all needed dependencies.
|
|
95
|
+
#Pipfile.lock
|
|
96
|
+
|
|
97
|
+
# UV
|
|
98
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
99
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
100
|
+
# commonly ignored for libraries.
|
|
101
|
+
#uv.lock
|
|
102
|
+
|
|
103
|
+
# poetry
|
|
104
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
105
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
106
|
+
# commonly ignored for libraries.
|
|
107
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
108
|
+
#poetry.lock
|
|
109
|
+
#poetry.toml
|
|
110
|
+
|
|
111
|
+
# pdm
|
|
112
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
113
|
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
|
114
|
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
|
115
|
+
#pdm.lock
|
|
116
|
+
#pdm.toml
|
|
117
|
+
.pdm-python
|
|
118
|
+
.pdm-build/
|
|
119
|
+
|
|
120
|
+
# pixi
|
|
121
|
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
|
122
|
+
#pixi.lock
|
|
123
|
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
|
124
|
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
|
125
|
+
.pixi
|
|
126
|
+
|
|
127
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
128
|
+
__pypackages__/
|
|
129
|
+
|
|
130
|
+
# Celery stuff
|
|
131
|
+
celerybeat-schedule
|
|
132
|
+
celerybeat.pid
|
|
133
|
+
|
|
134
|
+
# SageMath parsed files
|
|
135
|
+
*.sage.py
|
|
136
|
+
|
|
137
|
+
# Environments
|
|
138
|
+
.env
|
|
139
|
+
.envrc
|
|
140
|
+
.venv
|
|
141
|
+
env/
|
|
142
|
+
venv/
|
|
143
|
+
ENV/
|
|
144
|
+
env.bak/
|
|
145
|
+
venv.bak/
|
|
146
|
+
|
|
147
|
+
# Spyder project settings
|
|
148
|
+
.spyderproject
|
|
149
|
+
.spyproject
|
|
150
|
+
|
|
151
|
+
# Rope project settings
|
|
152
|
+
.ropeproject
|
|
153
|
+
|
|
154
|
+
# mkdocs documentation
|
|
155
|
+
/site
|
|
156
|
+
|
|
157
|
+
# mypy
|
|
158
|
+
.mypy_cache/
|
|
159
|
+
.dmypy.json
|
|
160
|
+
dmypy.json
|
|
161
|
+
|
|
162
|
+
# Pyre type checker
|
|
163
|
+
.pyre/
|
|
164
|
+
|
|
165
|
+
# pytype static type analyzer
|
|
166
|
+
.pytype/
|
|
167
|
+
|
|
168
|
+
# Cython debug symbols
|
|
169
|
+
cython_debug/
|
|
170
|
+
|
|
171
|
+
# PyCharm
|
|
172
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
173
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
174
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
175
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
176
|
+
#.idea/
|
|
177
|
+
|
|
178
|
+
# Abstra
|
|
179
|
+
# Abstra is an AI-powered process automation framework.
|
|
180
|
+
# Ignore directories containing user credentials, local state, and settings.
|
|
181
|
+
# Learn more at https://abstra.io/docs
|
|
182
|
+
.abstra/
|
|
183
|
+
|
|
184
|
+
# Visual Studio Code
|
|
185
|
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
|
186
|
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
|
187
|
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
|
188
|
+
# you could uncomment the following to ignore the entire vscode folder
|
|
189
|
+
# .vscode/
|
|
190
|
+
|
|
191
|
+
# Ruff stuff:
|
|
192
|
+
.ruff_cache/
|
|
193
|
+
|
|
194
|
+
# PyPI configuration file
|
|
195
|
+
.pypirc
|
|
196
|
+
|
|
197
|
+
# Cursor
|
|
198
|
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
|
199
|
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
|
200
|
+
# refer to https://docs.cursor.com/context/ignore-files
|
|
201
|
+
.cursorignore
|
|
202
|
+
.cursorindexingignore
|
|
203
|
+
|
|
204
|
+
# Marimo
|
|
205
|
+
marimo/_static/
|
|
206
|
+
marimo/_lsp/
|
|
207
|
+
__marimo__/
|
|
208
|
+
|
|
209
|
+
.DS_Store
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
3
|
+
rev: v6.0.0
|
|
4
|
+
hooks:
|
|
5
|
+
- id: check-yaml
|
|
6
|
+
- id: end-of-file-fixer
|
|
7
|
+
- id: trailing-whitespace
|
|
8
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
9
|
+
# Ruff version.
|
|
10
|
+
rev: v0.14.7
|
|
11
|
+
hooks:
|
|
12
|
+
# Run the linter.
|
|
13
|
+
- id: ruff-check
|
|
14
|
+
args: [ --fix ]
|
|
15
|
+
# Run the formatter.
|
|
16
|
+
- id: ruff-format
|
|
17
|
+
- repo: https://github.com/pre-commit/mirrors-mypy
|
|
18
|
+
rev: v1.19.0
|
|
19
|
+
hooks:
|
|
20
|
+
- id: mypy
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.12
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
cff-version: 1.2.0
|
|
2
|
+
title: firthmodels
|
|
3
|
+
message: "If you use this software, please cite it as below."
|
|
4
|
+
type: software
|
|
5
|
+
version: 0.1.0
|
|
6
|
+
authors:
|
|
7
|
+
- family-names: Luo
|
|
8
|
+
given-names: Jonathan Z.
|
|
9
|
+
orcid: https://orcid.org/0000-0003-2567-1516
|
|
10
|
+
repository-code: https://github.com/jzluo/firthmodels
|
|
11
|
+
license: MIT
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Jon Luo
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: firthmodels
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Firth-penalized models in Python
|
|
5
|
+
Project-URL: Homepage, https://github.com/jzluo/firthmodels
|
|
6
|
+
Project-URL: Repository, https://github.com/jzluo/firthmodels
|
|
7
|
+
Project-URL: Issues, https://github.com/jzluo/firthmodels/issues
|
|
8
|
+
Author-email: Jon Luo <20971593+jzluo@users.noreply.github.com>
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: bias reduction,bias-reduced logistic regression,complete separation,data separation,firth logistic regression,firth penalization,logistic regression,penalized likelihood,quasi-complete separation,rare events,small sample,statistics
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
22
|
+
Classifier: Topic :: Scientific/Engineering
|
|
23
|
+
Requires-Python: >=3.11
|
|
24
|
+
Requires-Dist: numpy>=1.24
|
|
25
|
+
Requires-Dist: scikit-learn>=1.6
|
|
26
|
+
Requires-Dist: scipy>=1.12
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# firthmodels
|
|
30
|
+
|
|
31
|
+
[](https://github.com/jzluo/firthmodels/actions/workflows/ci.yml)
|
|
32
|
+

|
|
33
|
+

|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
Firth-penalized logistic regression in Python.
|
|
37
|
+
|
|
38
|
+
## Why Firth penalization?
|
|
39
|
+
|
|
40
|
+
Standard maximum-likelihood logistic regression fails when your data has complete or quasi-complete separation: when a predictor (or combination of predictors) perfectly separates the outcome classes. In these cases, MLE produces infinite coefficient estimates.
|
|
41
|
+
|
|
42
|
+
Firth's method adds a penalty term that:
|
|
43
|
+
- Produces **finite, well-defined estimates** even with separated data
|
|
44
|
+
- **Reduces small-sample bias** in coefficient estimates
|
|
45
|
+
- Works as a drop-in replacement for standard logistic regression
|
|
46
|
+
|
|
47
|
+
This is common in:
|
|
48
|
+
- Case-control studies with rare exposures
|
|
49
|
+
- Small clinical trials
|
|
50
|
+
- Genome-wide or Phenome-wide association studies (GWAS/PheWAS)
|
|
51
|
+
- Any dataset where events are rare relative to predictors
|
|
52
|
+
|
|
53
|
+
## Installation
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pip install firthmodels
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Requires Python 3.11+ and depends on NumPy, SciPy, and scikit-learn.
|
|
60
|
+
|
|
61
|
+
## Quick start
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
import numpy as np
|
|
65
|
+
from firthmodels import FirthLogisticRegression
|
|
66
|
+
|
|
67
|
+
# Separated data: x=1 perfectly predicts y=1
|
|
68
|
+
X = np.array([[0], [0], [0], [1], [1], [1]])
|
|
69
|
+
y = np.array([0, 0, 0, 1, 1, 1])
|
|
70
|
+
|
|
71
|
+
# Standard logistic regression would fail here
|
|
72
|
+
model = FirthLogisticRegression().fit(X, y)
|
|
73
|
+
|
|
74
|
+
print(model.coef_) # array([3.89181893])
|
|
75
|
+
print(model.intercept_) # -2.725...
|
|
76
|
+
print(model.pvalues_) # Wald p-values
|
|
77
|
+
print(model.bse_) # Standard errors
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Features
|
|
81
|
+
|
|
82
|
+
### scikit-learn compatible
|
|
83
|
+
|
|
84
|
+
`FirthLogisticRegression` follows the scikit-learn estimator API (`fit`, `predict`, `predict_proba`, `get_params`, `set_params`, etc.), and can be used with pipelines, cross-validation, and other sklearn tools:
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
from sklearn.model_selection import cross_val_score
|
|
88
|
+
from sklearn.pipeline import make_pipeline
|
|
89
|
+
from sklearn.preprocessing import StandardScaler
|
|
90
|
+
|
|
91
|
+
pipe = make_pipeline(StandardScaler(), FirthLogisticRegression())
|
|
92
|
+
scores = cross_val_score(pipe, X, y, cv=5)
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Likelihood ratio tests
|
|
96
|
+
|
|
97
|
+
Compute LRT p-values for individual coefficients. These are more reliable than Wald p-values for small samples.
|
|
98
|
+
|
|
99
|
+
Standard errors are back-corrected from the LRT chi-squared statistic (as in regenie), ensuring that (beta/SE)² = chi². This is useful for meta-analysis where studies are weighted by 1/SE²:
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
model.fit(X, y).lrt() # Compute LRT for all features
|
|
103
|
+
|
|
104
|
+
model.lrt_pvalues_ # LRT p-values
|
|
105
|
+
model.lrt_bse_ # Back-corrected standard errors
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Each feature requires a separate constrained model fit, so you can test selectively to avoid unnecessary computation:
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
model.lrt(0) # Single feature by index
|
|
112
|
+
model.lrt([0, 2]) # Multiple features
|
|
113
|
+
model.lrt(['snp', 'age']) # By name (if fitted with DataFrame)
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### Confidence intervals
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
model.conf_int() # 95% Wald CIs
|
|
120
|
+
model.conf_int(alpha=0.1) # 90% CIs
|
|
121
|
+
model.conf_int(method='pl') # Profile likelihood CIs (more accurate)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### Sample weights and offsets
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
model.fit(X, y, sample_weight=weights)
|
|
128
|
+
model.fit(X, y, offset=offset)
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## API reference
|
|
132
|
+
|
|
133
|
+
### Parameters
|
|
134
|
+
|
|
135
|
+
| Parameter | Default | Description |
|
|
136
|
+
|-----------|---------|-------------|
|
|
137
|
+
| `fit_intercept` | `True` | Whether to fit an intercept term |
|
|
138
|
+
| `max_iter` | `25` | Maximum Newton-Raphson iterations |
|
|
139
|
+
| `tol` | `1e-4` | Convergence tolerance |
|
|
140
|
+
| `max_step` | `5.0` | Maximum step size per coefficient |
|
|
141
|
+
| `max_halfstep` | `25` | Maximum step-halvings per iteration |
|
|
142
|
+
|
|
143
|
+
### Attributes (after fitting)
|
|
144
|
+
|
|
145
|
+
| Attribute | Description |
|
|
146
|
+
|-----------|-------------|
|
|
147
|
+
| `coef_` | Coefficient estimates |
|
|
148
|
+
| `intercept_` | Intercept (0.0 if `fit_intercept=False`) |
|
|
149
|
+
| `bse_` | Wald standard errors; includes intercept if `fit_intercept=True` |
|
|
150
|
+
| `pvalues_` | Wald p-values; includes intercept if `fit_intercept=True` |
|
|
151
|
+
| `loglik_` | Penalized log-likelihood |
|
|
152
|
+
| `n_iter_` | Number of iterations |
|
|
153
|
+
| `converged_` | Whether the solver converged |
|
|
154
|
+
| `lrt_pvalues_` | LRT p-values (after calling `lrt()`); includes intercept if `fit_intercept=True` |
|
|
155
|
+
| `lrt_bse_` | Back-corrected SEs (after calling `lrt()`); includes intercept if `fit_intercept=True` |
|
|
156
|
+
|
|
157
|
+
### Methods
|
|
158
|
+
|
|
159
|
+
| Method | Description |
|
|
160
|
+
|--------|-------------|
|
|
161
|
+
| `fit(X, y)` | Fit the model |
|
|
162
|
+
| `predict(X)` | Predict class labels |
|
|
163
|
+
| `predict_proba(X)` | Predict class probabilities |
|
|
164
|
+
| `predict_log_proba(X)` | Predict log class probabilities |
|
|
165
|
+
| `decision_function(X)` | Return linear predictor values |
|
|
166
|
+
| `lrt(features)` | Compute LRT p-values; `features` can be indices or column names. If `None`, tests all features. |
|
|
167
|
+
| `conf_int(alpha, method)` | Confidence intervals; `method='wald'` (default) or `'pl'` for profile likelihood |
|
|
168
|
+
|
|
169
|
+
## Roadmap
|
|
170
|
+
|
|
171
|
+
Current implementation uses a damped Newton–Raphson solver.
|
|
172
|
+
|
|
173
|
+
Add additional solvers (IRLS, L-BFGS, etc) and models (Cox proportional hazards).
|
|
174
|
+
|
|
175
|
+
## References
|
|
176
|
+
|
|
177
|
+
Firth D (1993). Bias reduction of maximum likelihood estimates. *Biometrika* 80, 27-38.
|
|
178
|
+
|
|
179
|
+
Heinze G, Schemper M (2002). A solution to the problem of separation in logistic regression. *Statistics in Medicine* 21, 2409-2419.
|
|
180
|
+
|
|
181
|
+
Mbatchou J et al. (2021). Computationally efficient whole-genome regression for
|
|
182
|
+
quantitative and binary traits. *Nature Genetics* 53, 1097-1103.
|
|
183
|
+
|
|
184
|
+
Venzon, D.J. and Moolgavkar, S.H. (1988). "A Method for Computing Profile-Likelihood-Based Confidence Intervals." Applied Statistics, 37(1), 87-94.
|
|
185
|
+
|
|
186
|
+
## License
|
|
187
|
+
|
|
188
|
+
MIT
|