rustystats 0.1.5__cp313-cp313-manylinux_2_34_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rustystats/__init__.py ADDED
@@ -0,0 +1,151 @@
1
+ """
2
+ RustyStats: Fast Generalized Linear Models with a Rust Backend
3
+ ==============================================================
4
+
5
+ A high-performance GLM library optimized for actuarial applications.
6
+
7
+ Quick Start
8
+ -----------
9
+ >>> import rustystats as rs
10
+ >>> import polars as pl
11
+ >>>
12
+ >>> # Load data
13
+ >>> data = pl.read_parquet("insurance.parquet")
14
+ >>>
15
+ >>> # Fit a Poisson GLM using the formula API
16
+ >>> result = rs.glm(
17
+ ... formula="ClaimNb ~ VehPower + VehAge + C(Area) + C(Region)",
18
+ ... data=data,
19
+ ... family="poisson",
20
+ ... offset="Exposure"
21
+ ... ).fit()
22
+ >>>
23
+ >>> print(result.summary())
24
+ >>> print(result.coef_table())
25
+
26
+ Available Families
27
+ ------------------
28
+ - **gaussian**: Continuous data, constant variance (linear regression)
29
+ - **poisson**: Count data, variance = mean (claim frequency)
30
+ - **binomial**: Binary/proportion data (logistic regression)
31
+ - **gamma**: Positive continuous, variance ∝ mean² (claim severity)
32
+ - **tweedie**: Mixed zeros and positives, variance = μ^p (pure premium)
33
+ - **quasipoisson**: Overdispersed count data
34
+ - **quasibinomial**: Overdispersed binary data
35
+ - **negbinomial**: Overdispersed counts with auto θ estimation
36
+
37
+ Available Link Functions
38
+ ------------------------
39
+ - **identity**: η = μ (default for Gaussian)
40
+ - **log**: η = log(μ) (default for Poisson, Gamma)
41
+ - **logit**: η = log(μ/(1-μ)) (default for Binomial)
42
+
43
+ Formula Syntax
44
+ --------------
45
+ - Main effects: ``x1``, ``x2``, ``C(cat)`` (categorical)
46
+ - Interactions: ``x1*x2`` (main + interaction), ``x1:x2`` (interaction only)
47
+ - Splines: ``bs(x, df=5)``, ``ns(x, df=4)``
48
+ - Target encoding: ``TE(brand)`` for high-cardinality categoricals
49
+
50
+ For Actuaries
51
+ -------------
52
+ - **Claim Frequency**: Use Poisson family with log link
53
+ - **Claim Severity**: Use Gamma family with log link
54
+ - **Claim Occurrence**: Use Binomial family with logit link
55
+ - **Pure Premium**: Use Tweedie family with var_power=1.5
56
+ """
57
+
58
+ # Version of the package
59
+ __version__ = "0.1.0"
60
+
61
+ # Import the Rust extension module
62
+ # This contains the fast implementations
63
+ from rustystats._rustystats import (
64
+ # Link functions
65
+ IdentityLink,
66
+ LogLink,
67
+ LogitLink,
68
+ # Families
69
+ GaussianFamily,
70
+ PoissonFamily,
71
+ BinomialFamily,
72
+ GammaFamily,
73
+ TweedieFamily,
74
+ # GLM results type
75
+ GLMResults,
76
+ # Spline functions (raw Rust)
77
+ bs_py as _bs_rust,
78
+ ns_py as _ns_rust,
79
+ )
80
+
81
+ # Import Python wrappers
82
+ from rustystats import families
83
+ from rustystats import links
84
+ from rustystats.glm import summary, summary_relativities
85
+
86
+ # Formula-based API (the primary API)
87
+ from rustystats.formula import glm, FormulaGLM, FormulaGLMResults
88
+
89
+ # Spline basis functions (for non-linear continuous effects)
90
+ from rustystats.splines import bs, ns, bs_names, ns_names, SplineTerm
91
+
92
+ # Target encoding (CatBoost-style ordered target statistics)
93
+ from rustystats.target_encoding import (
94
+ target_encode,
95
+ apply_target_encoding,
96
+ TargetEncoder,
97
+ TargetEncodingTerm,
98
+ )
99
+
100
+ # Model diagnostics
101
+ from rustystats.diagnostics import (
102
+ compute_diagnostics,
103
+ ModelDiagnostics,
104
+ DiagnosticsComputer,
105
+ explore_data,
106
+ DataExploration,
107
+ DataExplorer,
108
+ )
109
+
110
+ # What gets exported when someone does `from rustystats import *`
111
+ __all__ = [
112
+ # Version
113
+ "__version__",
114
+ # Formula-based API (primary interface)
115
+ "glm",
116
+ "FormulaGLM",
117
+ "FormulaGLMResults",
118
+ "GLMResults",
119
+ "summary",
120
+ "summary_relativities",
121
+ # Spline functions
122
+ "bs",
123
+ "ns",
124
+ "bs_names",
125
+ "ns_names",
126
+ "SplineTerm",
127
+ # Target encoding (CatBoost-style)
128
+ "target_encode",
129
+ "apply_target_encoding",
130
+ "TargetEncoder",
131
+ "TargetEncodingTerm",
132
+ # Sub-modules
133
+ "families",
134
+ "links",
135
+ # Model diagnostics
136
+ "compute_diagnostics",
137
+ "ModelDiagnostics",
138
+ "DiagnosticsComputer",
139
+ "explore_data",
140
+ "DataExploration",
141
+ "DataExplorer",
142
+ # Direct access to classes (for convenience)
143
+ "IdentityLink",
144
+ "LogLink",
145
+ "LogitLink",
146
+ "GaussianFamily",
147
+ "PoissonFamily",
148
+ "BinomialFamily",
149
+ "GammaFamily",
150
+ "TweedieFamily",
151
+ ]