rustystats 0.1.5__cp313-cp313-manylinux_2_34_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rustystats/__init__.py +151 -0
- rustystats/_rustystats.cpython-313-x86_64-linux-gnu.so +0 -0
- rustystats/diagnostics.py +2471 -0
- rustystats/families.py +423 -0
- rustystats/formula.py +1074 -0
- rustystats/glm.py +249 -0
- rustystats/interactions.py +1246 -0
- rustystats/links.py +221 -0
- rustystats/splines.py +367 -0
- rustystats/target_encoding.py +375 -0
- rustystats-0.1.5.dist-info/METADATA +476 -0
- rustystats-0.1.5.dist-info/RECORD +14 -0
- rustystats-0.1.5.dist-info/WHEEL +4 -0
- rustystats-0.1.5.dist-info/licenses/LICENSE +21 -0
rustystats/__init__.py
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""
|
|
2
|
+
RustyStats: Fast Generalized Linear Models with a Rust Backend
|
|
3
|
+
==============================================================
|
|
4
|
+
|
|
5
|
+
A high-performance GLM library optimized for actuarial applications.
|
|
6
|
+
|
|
7
|
+
Quick Start
|
|
8
|
+
-----------
|
|
9
|
+
>>> import rustystats as rs
|
|
10
|
+
>>> import polars as pl
|
|
11
|
+
>>>
|
|
12
|
+
>>> # Load data
|
|
13
|
+
>>> data = pl.read_parquet("insurance.parquet")
|
|
14
|
+
>>>
|
|
15
|
+
>>> # Fit a Poisson GLM using the formula API
|
|
16
|
+
>>> result = rs.glm(
|
|
17
|
+
... formula="ClaimNb ~ VehPower + VehAge + C(Area) + C(Region)",
|
|
18
|
+
... data=data,
|
|
19
|
+
... family="poisson",
|
|
20
|
+
... offset="Exposure"
|
|
21
|
+
... ).fit()
|
|
22
|
+
>>>
|
|
23
|
+
>>> print(result.summary())
|
|
24
|
+
>>> print(result.coef_table())
|
|
25
|
+
|
|
26
|
+
Available Families
|
|
27
|
+
------------------
|
|
28
|
+
- **gaussian**: Continuous data, constant variance (linear regression)
|
|
29
|
+
- **poisson**: Count data, variance = mean (claim frequency)
|
|
30
|
+
- **binomial**: Binary/proportion data (logistic regression)
|
|
31
|
+
- **gamma**: Positive continuous, variance ∝ mean² (claim severity)
|
|
32
|
+
- **tweedie**: Mixed zeros and positives, variance = μ^p (pure premium)
|
|
33
|
+
- **quasipoisson**: Overdispersed count data
|
|
34
|
+
- **quasibinomial**: Overdispersed binary data
|
|
35
|
+
- **negbinomial**: Overdispersed counts with auto θ estimation
|
|
36
|
+
|
|
37
|
+
Available Link Functions
|
|
38
|
+
------------------------
|
|
39
|
+
- **identity**: η = μ (default for Gaussian)
|
|
40
|
+
- **log**: η = log(μ) (default for Poisson, Gamma)
|
|
41
|
+
- **logit**: η = log(μ/(1-μ)) (default for Binomial)
|
|
42
|
+
|
|
43
|
+
Formula Syntax
|
|
44
|
+
--------------
|
|
45
|
+
- Main effects: ``x1``, ``x2``, ``C(cat)`` (categorical)
|
|
46
|
+
- Interactions: ``x1*x2`` (main + interaction), ``x1:x2`` (interaction only)
|
|
47
|
+
- Splines: ``bs(x, df=5)``, ``ns(x, df=4)``
|
|
48
|
+
- Target encoding: ``TE(brand)`` for high-cardinality categoricals
|
|
49
|
+
|
|
50
|
+
For Actuaries
|
|
51
|
+
-------------
|
|
52
|
+
- **Claim Frequency**: Use Poisson family with log link
|
|
53
|
+
- **Claim Severity**: Use Gamma family with log link
|
|
54
|
+
- **Claim Occurrence**: Use Binomial family with logit link
|
|
55
|
+
- **Pure Premium**: Use Tweedie family with var_power=1.5
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
# Version of the package
|
|
59
|
+
__version__ = "0.1.0"
|
|
60
|
+
|
|
61
|
+
# Import the Rust extension module
|
|
62
|
+
# This contains the fast implementations
|
|
63
|
+
from rustystats._rustystats import (
|
|
64
|
+
# Link functions
|
|
65
|
+
IdentityLink,
|
|
66
|
+
LogLink,
|
|
67
|
+
LogitLink,
|
|
68
|
+
# Families
|
|
69
|
+
GaussianFamily,
|
|
70
|
+
PoissonFamily,
|
|
71
|
+
BinomialFamily,
|
|
72
|
+
GammaFamily,
|
|
73
|
+
TweedieFamily,
|
|
74
|
+
# GLM results type
|
|
75
|
+
GLMResults,
|
|
76
|
+
# Spline functions (raw Rust)
|
|
77
|
+
bs_py as _bs_rust,
|
|
78
|
+
ns_py as _ns_rust,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# Import Python wrappers
|
|
82
|
+
from rustystats import families
|
|
83
|
+
from rustystats import links
|
|
84
|
+
from rustystats.glm import summary, summary_relativities
|
|
85
|
+
|
|
86
|
+
# Formula-based API (the primary API)
|
|
87
|
+
from rustystats.formula import glm, FormulaGLM, FormulaGLMResults
|
|
88
|
+
|
|
89
|
+
# Spline basis functions (for non-linear continuous effects)
|
|
90
|
+
from rustystats.splines import bs, ns, bs_names, ns_names, SplineTerm
|
|
91
|
+
|
|
92
|
+
# Target encoding (CatBoost-style ordered target statistics)
|
|
93
|
+
from rustystats.target_encoding import (
|
|
94
|
+
target_encode,
|
|
95
|
+
apply_target_encoding,
|
|
96
|
+
TargetEncoder,
|
|
97
|
+
TargetEncodingTerm,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Model diagnostics
|
|
101
|
+
from rustystats.diagnostics import (
|
|
102
|
+
compute_diagnostics,
|
|
103
|
+
ModelDiagnostics,
|
|
104
|
+
DiagnosticsComputer,
|
|
105
|
+
explore_data,
|
|
106
|
+
DataExploration,
|
|
107
|
+
DataExplorer,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# What gets exported when someone does `from rustystats import *`
|
|
111
|
+
__all__ = [
|
|
112
|
+
# Version
|
|
113
|
+
"__version__",
|
|
114
|
+
# Formula-based API (primary interface)
|
|
115
|
+
"glm",
|
|
116
|
+
"FormulaGLM",
|
|
117
|
+
"FormulaGLMResults",
|
|
118
|
+
"GLMResults",
|
|
119
|
+
"summary",
|
|
120
|
+
"summary_relativities",
|
|
121
|
+
# Spline functions
|
|
122
|
+
"bs",
|
|
123
|
+
"ns",
|
|
124
|
+
"bs_names",
|
|
125
|
+
"ns_names",
|
|
126
|
+
"SplineTerm",
|
|
127
|
+
# Target encoding (CatBoost-style)
|
|
128
|
+
"target_encode",
|
|
129
|
+
"apply_target_encoding",
|
|
130
|
+
"TargetEncoder",
|
|
131
|
+
"TargetEncodingTerm",
|
|
132
|
+
# Sub-modules
|
|
133
|
+
"families",
|
|
134
|
+
"links",
|
|
135
|
+
# Model diagnostics
|
|
136
|
+
"compute_diagnostics",
|
|
137
|
+
"ModelDiagnostics",
|
|
138
|
+
"DiagnosticsComputer",
|
|
139
|
+
"explore_data",
|
|
140
|
+
"DataExploration",
|
|
141
|
+
"DataExplorer",
|
|
142
|
+
# Direct access to classes (for convenience)
|
|
143
|
+
"IdentityLink",
|
|
144
|
+
"LogLink",
|
|
145
|
+
"LogitLink",
|
|
146
|
+
"GaussianFamily",
|
|
147
|
+
"PoissonFamily",
|
|
148
|
+
"BinomialFamily",
|
|
149
|
+
"GammaFamily",
|
|
150
|
+
"TweedieFamily",
|
|
151
|
+
]
|
|
Binary file
|