mlreserving 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mlreserving-0.3.0/LICENSE +32 -0
- mlreserving-0.3.0/PKG-INFO +70 -0
- mlreserving-0.3.0/README.md +46 -0
- mlreserving-0.3.0/mlreserving/__init__.py +13 -0
- mlreserving-0.3.0/mlreserving/ml_reserving.py +381 -0
- mlreserving-0.3.0/mlreserving/utils.py +89 -0
- mlreserving-0.3.0/mlreserving.egg-info/PKG-INFO +70 -0
- mlreserving-0.3.0/mlreserving.egg-info/SOURCES.txt +12 -0
- mlreserving-0.3.0/mlreserving.egg-info/dependency_links.txt +1 -0
- mlreserving-0.3.0/mlreserving.egg-info/requires.txt +5 -0
- mlreserving-0.3.0/mlreserving.egg-info/top_level.txt +1 -0
- mlreserving-0.3.0/pyproject.toml +37 -0
- mlreserving-0.3.0/setup.cfg +4 -0
- mlreserving-0.3.0/setup.py +33 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
The Clear BSD License
|
|
2
|
+
|
|
3
|
+
Copyright (c) [2025] [T. Moudiki]
|
|
4
|
+
All rights reserved.
|
|
5
|
+
|
|
6
|
+
Redistribution and use in source and binary forms, with or without
|
|
7
|
+
modification, are permitted (subject to the limitations in the disclaimer
|
|
8
|
+
below) provided that the following conditions are met:
|
|
9
|
+
|
|
10
|
+
* Redistributions of source code must retain the above copyright notice,
|
|
11
|
+
this list of conditions and the following disclaimer.
|
|
12
|
+
|
|
13
|
+
* Redistributions in binary form must reproduce the above copyright
|
|
14
|
+
notice, this list of conditions and the following disclaimer in the
|
|
15
|
+
documentation and/or other materials provided with the distribution.
|
|
16
|
+
|
|
17
|
+
* Neither the name of the copyright holder nor the names of its
|
|
18
|
+
contributors may be used to endorse or promote products derived from this
|
|
19
|
+
software without specific prior written permission.
|
|
20
|
+
|
|
21
|
+
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
|
|
22
|
+
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
|
23
|
+
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
24
|
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
|
25
|
+
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
|
26
|
+
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
27
|
+
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
28
|
+
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
|
|
29
|
+
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
|
|
30
|
+
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
31
|
+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
32
|
+
POSSIBILITY OF SUCH DAMAGE.
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mlreserving
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Model-agnostic Probabilistic Machine Learning Reserving
|
|
5
|
+
Home-page: https://github.com/Techtonique/mlreserving
|
|
6
|
+
Author: T. Moudiki
|
|
7
|
+
Author-email: "T. Moudiki" <thierry.moudiki@gmail.com>
|
|
8
|
+
Project-URL: Repository, https://github.com/Techtonique/mlreserving
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: License :: OSI Approved :: BSD License
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Requires-Python: >=3.7
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
License-File: LICENSE
|
|
15
|
+
Requires-Dist: nnetsauce>=0.17.0
|
|
16
|
+
Requires-Dist: numpy>=1.20.0
|
|
17
|
+
Requires-Dist: pandas>=1.3.0
|
|
18
|
+
Requires-Dist: scikit-learn>=1.0.0
|
|
19
|
+
Requires-Dist: joblib>=1.1.0
|
|
20
|
+
Dynamic: author
|
|
21
|
+
Dynamic: home-page
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
Dynamic: requires-python
|
|
24
|
+
|
|
25
|
+
# MLReserving
|
|
26
|
+
|
|
27
|
+
A machine learning-based reserving model for (longitudinal data) insurance claims.
|
|
28
|
+
|
|
29
|
+
## Installation
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install mlreserving
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Usage
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
from mlreserving import MLReserving
|
|
39
|
+
import pandas as pd
|
|
40
|
+
|
|
41
|
+
# Create your triangle data
|
|
42
|
+
# Load the dataset
|
|
43
|
+
url = "https://raw.githubusercontent.com/Techtonique/datasets/refs/heads/main/tabular/triangle/raa.csv"
|
|
44
|
+
data = pd.read_csv(url)
|
|
45
|
+
|
|
46
|
+
# Initialize and fit the model
|
|
47
|
+
model = MLReserving(model=mdl,
|
|
48
|
+
level=80, # 80% confidence level
|
|
49
|
+
random_state=42)
|
|
50
|
+
model.fit(data)
|
|
51
|
+
|
|
52
|
+
# Make predictions
|
|
53
|
+
result = model.predict()
|
|
54
|
+
|
|
55
|
+
# Get IBNR, latest, and ultimate values
|
|
56
|
+
ibnr = model.get_ibnr()
|
|
57
|
+
latest = model.get_latest()
|
|
58
|
+
ultimate = model.get_ultimate()
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Features
|
|
62
|
+
|
|
63
|
+
- Machine learning based reserving model
|
|
64
|
+
- Support for prediction intervals
|
|
65
|
+
- Flexible model selection
|
|
66
|
+
- Handles both continuous and categorical features
|
|
67
|
+
|
|
68
|
+
## License
|
|
69
|
+
|
|
70
|
+
BSD Clause Clear License
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# MLReserving
|
|
2
|
+
|
|
3
|
+
A machine learning-based reserving model for (longitudinal data) insurance claims.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install mlreserving
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
from mlreserving import MLReserving
|
|
15
|
+
import pandas as pd
|
|
16
|
+
|
|
17
|
+
# Create your triangle data
|
|
18
|
+
# Load the dataset
|
|
19
|
+
url = "https://raw.githubusercontent.com/Techtonique/datasets/refs/heads/main/tabular/triangle/raa.csv"
|
|
20
|
+
data = pd.read_csv(url)
|
|
21
|
+
|
|
22
|
+
# Initialize and fit the model
|
|
23
|
+
model = MLReserving(model=mdl,
|
|
24
|
+
level=80, # 80% confidence level
|
|
25
|
+
random_state=42)
|
|
26
|
+
model.fit(data)
|
|
27
|
+
|
|
28
|
+
# Make predictions
|
|
29
|
+
result = model.predict()
|
|
30
|
+
|
|
31
|
+
# Get IBNR, latest, and ultimate values
|
|
32
|
+
ibnr = model.get_ibnr()
|
|
33
|
+
latest = model.get_latest()
|
|
34
|
+
ultimate = model.get_ultimate()
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Features
|
|
38
|
+
|
|
39
|
+
- Machine learning based reserving model
|
|
40
|
+
- Support for prediction intervals
|
|
41
|
+
- Flexible model selection
|
|
42
|
+
- Handles both continuous and categorical features
|
|
43
|
+
|
|
44
|
+
## License
|
|
45
|
+
|
|
46
|
+
BSD Clause Clear License
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Machine learning based (longitudinal) reserving model
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
__version__ = "0.2.0"
|
|
6
|
+
|
|
7
|
+
# Import main classes/functions that should be available at package level
|
|
8
|
+
# from .module_name import ClassName, function_name
|
|
9
|
+
|
|
10
|
+
from .ml_reserving import MLReserving
|
|
11
|
+
from .utils import triangle_to_df, df_to_triangle
|
|
12
|
+
|
|
13
|
+
__all__ = ["MLReserving", "triangle_to_df", "df_to_triangle", "__version__"]
|
|
@@ -0,0 +1,381 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Main implementation of machine learning reserving
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import numpy as np
|
|
7
|
+
from collections import namedtuple
|
|
8
|
+
from copy import deepcopy
|
|
9
|
+
from sklearn.ensemble import RandomForestRegressor
|
|
10
|
+
from sklearn.linear_model import RidgeCV
|
|
11
|
+
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
|
12
|
+
from nnetsauce import PredictionInterval
|
|
13
|
+
from .utils import df_to_triangle, triangle_to_df
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def arcsinh(x):
|
|
17
|
+
"""Arcsinh transformation with offset for zero values"""
|
|
18
|
+
return np.arcsinh(x + 1)
|
|
19
|
+
|
|
20
|
+
def inv_arcsinh(x):
|
|
21
|
+
"""Inverse arcsinh transformation with offset"""
|
|
22
|
+
return np.sinh(x) - 1
|
|
23
|
+
|
|
24
|
+
class MLReserving:
|
|
25
|
+
"""
|
|
26
|
+
Machine learning based reserving model
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
model : object, optional
|
|
31
|
+
model to use (must implement fit and predict methods), default is RidgeCV
|
|
32
|
+
level: a float;
|
|
33
|
+
Confidence level for prediction intervals. Default is 95,
|
|
34
|
+
equivalent to a miscoverage error of 5 (%)
|
|
35
|
+
replications: an integer;
|
|
36
|
+
Number of replications for simulated conformal (default is `None`),
|
|
37
|
+
for type_pi = "bootstrap" or "kde"
|
|
38
|
+
conformal_method: a string
|
|
39
|
+
conformal prediction method "splitconformal" or "localconformal"
|
|
40
|
+
type_pi: a string;
|
|
41
|
+
type of prediction interval: currently `None`
|
|
42
|
+
split conformal prediction without simulation, "kde" or "bootstrap"
|
|
43
|
+
use_factors : bool, default=False
|
|
44
|
+
Whether to treat origin and development years as categorical variables
|
|
45
|
+
random_state : int, default=42
|
|
46
|
+
Random state for reproducibility
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(self,
|
|
50
|
+
model=None,
|
|
51
|
+
level=95,
|
|
52
|
+
replications=None,
|
|
53
|
+
conformal_method="splitconformal",
|
|
54
|
+
type_pi=None,
|
|
55
|
+
use_factors=False,
|
|
56
|
+
random_state=42):
|
|
57
|
+
if model is None:
|
|
58
|
+
model = RidgeCV(alphas=[10**i for i in range(-5, 5)])
|
|
59
|
+
assert conformal_method in ("splitconformal", "localconformal"),\
|
|
60
|
+
"must have conformal_method in ('splitconformal', 'localconformal')"
|
|
61
|
+
self.conformal_method = conformal_method
|
|
62
|
+
self.model = PredictionInterval(model, level=level,
|
|
63
|
+
type_pi=type_pi,
|
|
64
|
+
type_split="sequential",
|
|
65
|
+
method=conformal_method,
|
|
66
|
+
replications=replications)
|
|
67
|
+
self.level = level
|
|
68
|
+
self.replications = replications
|
|
69
|
+
self.type_pi = type_pi
|
|
70
|
+
self.use_factors = use_factors
|
|
71
|
+
self.origin_col = None
|
|
72
|
+
self.development_col = None
|
|
73
|
+
self.value_col = None
|
|
74
|
+
self.max_dev = None
|
|
75
|
+
self.origin_years = None
|
|
76
|
+
self.cumulated = None
|
|
77
|
+
self.latest_ = None
|
|
78
|
+
self.ultimate_ = None
|
|
79
|
+
self.ultimate_lower_ = None
|
|
80
|
+
self.ultimate_upper_ = None
|
|
81
|
+
self.ibnr_mean_ = None
|
|
82
|
+
self.ibnr_lower_ = None
|
|
83
|
+
self.ibnr_upper_ = None
|
|
84
|
+
self.X_test_ = None
|
|
85
|
+
self.full_data_ = None
|
|
86
|
+
self.full_data_upper_ = None
|
|
87
|
+
self.full_data_lower_ = None
|
|
88
|
+
self.full_data_sims_ = []
|
|
89
|
+
self.scaler = StandardScaler()
|
|
90
|
+
self.origin_encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
|
|
91
|
+
self.dev_encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
|
|
92
|
+
|
|
93
|
+
def fit(self, data, origin_col="origin",
|
|
94
|
+
development_col="development",
|
|
95
|
+
value_col="values",
|
|
96
|
+
cumulated=True):
|
|
97
|
+
"""
|
|
98
|
+
Fit the model to the triangle data
|
|
99
|
+
|
|
100
|
+
Parameters
|
|
101
|
+
----------
|
|
102
|
+
data : pandas.DataFrame
|
|
103
|
+
Input data with origin, development, and value columns
|
|
104
|
+
origin_col : str, default="origin"
|
|
105
|
+
Name of the origin year column
|
|
106
|
+
development_col : str, default="development"
|
|
107
|
+
Name of the development year column
|
|
108
|
+
value_col : str, default="values"
|
|
109
|
+
Name of the value column
|
|
110
|
+
cumulated: bool, default=True
|
|
111
|
+
If the triangle is cumulated
|
|
112
|
+
|
|
113
|
+
Returns
|
|
114
|
+
-------
|
|
115
|
+
self : object
|
|
116
|
+
Returns self
|
|
117
|
+
"""
|
|
118
|
+
# Store column names
|
|
119
|
+
self.origin_col = origin_col
|
|
120
|
+
self.development_col = development_col
|
|
121
|
+
self.value_col = value_col
|
|
122
|
+
self.cumulated = cumulated
|
|
123
|
+
|
|
124
|
+
df = data.copy()
|
|
125
|
+
df["dev"] = df[development_col] - df[origin_col] + 1
|
|
126
|
+
df["calendar"] = df[origin_col] + df["dev"] - 1
|
|
127
|
+
df = df.sort_values([origin_col, "dev"])
|
|
128
|
+
|
|
129
|
+
# If data is cumulated, convert to incremental first
|
|
130
|
+
if self.cumulated:
|
|
131
|
+
# Calculate incremental values
|
|
132
|
+
df[value_col] = df.groupby(origin_col)[value_col].diff().fillna(method='bfill')
|
|
133
|
+
|
|
134
|
+
self.max_dev = df["dev"].max()
|
|
135
|
+
self.origin_years = df[origin_col].unique()
|
|
136
|
+
|
|
137
|
+
# Create full grid of all possible combinations
|
|
138
|
+
full_grid = pd.MultiIndex.from_product(
|
|
139
|
+
[self.origin_years, range(1, self.max_dev + 1)],
|
|
140
|
+
names=[origin_col, "dev"]
|
|
141
|
+
).to_frame(index=False)
|
|
142
|
+
|
|
143
|
+
# Merge with original data
|
|
144
|
+
full_data = pd.merge(
|
|
145
|
+
full_grid,
|
|
146
|
+
df[[origin_col, "dev", value_col]],
|
|
147
|
+
on=[origin_col, "dev"],
|
|
148
|
+
how="left"
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
# Calculate calendar year
|
|
152
|
+
full_data["calendar"] = full_data[origin_col] + full_data["dev"] - 1
|
|
153
|
+
|
|
154
|
+
# Calculate latest values for each origin year
|
|
155
|
+
self.latest_ = full_data.groupby(origin_col)[value_col].last()
|
|
156
|
+
|
|
157
|
+
# Apply transformations
|
|
158
|
+
if self.use_factors:
|
|
159
|
+
# One-hot encode origin and development years
|
|
160
|
+
origin_encoded = self.origin_encoder.fit_transform(full_data[[origin_col]])
|
|
161
|
+
dev_encoded = self.dev_encoder.fit_transform(full_data[["dev"]])
|
|
162
|
+
|
|
163
|
+
# Create feature names for the encoded columns
|
|
164
|
+
origin_feature_names = [f"origin_{year}" for year in self.origin_years]
|
|
165
|
+
dev_feature_names = [f"dev_{i}" for i in range(1, self.max_dev + 1)]
|
|
166
|
+
|
|
167
|
+
# Add encoded features to the dataframe
|
|
168
|
+
full_data = pd.concat([
|
|
169
|
+
full_data,
|
|
170
|
+
pd.DataFrame(origin_encoded, columns=origin_feature_names, index=full_data.index),
|
|
171
|
+
pd.DataFrame(dev_encoded, columns=dev_feature_names, index=full_data.index)
|
|
172
|
+
], axis=1)
|
|
173
|
+
|
|
174
|
+
# Add calendar year as a feature
|
|
175
|
+
full_data["log_calendar"] = np.log(full_data["calendar"])
|
|
176
|
+
feature_cols = origin_feature_names + dev_feature_names + ["log_calendar"]
|
|
177
|
+
else:
|
|
178
|
+
# Use log transformations
|
|
179
|
+
full_data["log_origin"] = np.log(full_data[origin_col])
|
|
180
|
+
full_data["log_dev"] = np.log(full_data["dev"])
|
|
181
|
+
full_data["log_calendar"] = np.log(full_data["calendar"])
|
|
182
|
+
feature_cols = ["log_origin", "log_dev", "log_calendar"]
|
|
183
|
+
|
|
184
|
+
# Transform response if not NaN
|
|
185
|
+
full_data[f"arcsinh_{value_col}"] = full_data[value_col].apply(
|
|
186
|
+
lambda x: arcsinh(x) if pd.notnull(x) else x
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
full_data["to_predict"] = full_data[value_col].isna()
|
|
190
|
+
|
|
191
|
+
self.full_data_ = deepcopy(full_data)
|
|
192
|
+
self.full_data_lower_ = deepcopy(full_data)
|
|
193
|
+
self.full_data_upper_ = deepcopy(full_data)
|
|
194
|
+
|
|
195
|
+
train_data = full_data[~full_data["to_predict"]]
|
|
196
|
+
test_data = full_data[full_data["to_predict"]]
|
|
197
|
+
|
|
198
|
+
# Prepare features for training
|
|
199
|
+
X_train = train_data[feature_cols].values
|
|
200
|
+
X_test = test_data[feature_cols].values
|
|
201
|
+
|
|
202
|
+
# Scale features
|
|
203
|
+
X_train_scaled = self.scaler.fit_transform(X_train)
|
|
204
|
+
self.X_test_ = self.scaler.transform(X_test)
|
|
205
|
+
|
|
206
|
+
y_train = train_data[f"arcsinh_{value_col}"].values
|
|
207
|
+
|
|
208
|
+
self.model.fit(X_train_scaled, y_train)
|
|
209
|
+
|
|
210
|
+
return self
|
|
211
|
+
|
|
212
|
+
def predict(self):
|
|
213
|
+
"""
|
|
214
|
+
Make predictions for the missing values in the triangle
|
|
215
|
+
|
|
216
|
+
Returns
|
|
217
|
+
-------
|
|
218
|
+
DescribeResult
|
|
219
|
+
Named tuple containing mean, lower, and upper triangles
|
|
220
|
+
"""
|
|
221
|
+
preds = self.model.predict(self.X_test_, return_pi=True)
|
|
222
|
+
|
|
223
|
+
to_predict = self.full_data_["to_predict"]
|
|
224
|
+
|
|
225
|
+
# Transform predictions back to original scale
|
|
226
|
+
mean_pred = inv_arcsinh(preds.mean)
|
|
227
|
+
lower_pred = inv_arcsinh(preds.lower)
|
|
228
|
+
upper_pred = inv_arcsinh(preds.upper)
|
|
229
|
+
|
|
230
|
+
# Store predictions in the full data
|
|
231
|
+
self.full_data_.loc[to_predict, self.value_col] = mean_pred
|
|
232
|
+
self.full_data_lower_.loc[to_predict, self.value_col] = lower_pred
|
|
233
|
+
self.full_data_upper_.loc[to_predict, self.value_col] = upper_pred
|
|
234
|
+
|
|
235
|
+
# Calculate IBNR based on predicted values (in incremental form)
|
|
236
|
+
test_data = self.full_data_[to_predict]
|
|
237
|
+
|
|
238
|
+
# Group by origin year and sum predictions
|
|
239
|
+
self.ibnr_mean_ = test_data.groupby(self.origin_col)[self.value_col].sum()
|
|
240
|
+
self.ibnr_lower_ = self.full_data_lower_[to_predict].groupby(self.origin_col)[self.value_col].sum()
|
|
241
|
+
self.ibnr_upper_ = self.full_data_upper_[to_predict].groupby(self.origin_col)[self.value_col].sum()
|
|
242
|
+
|
|
243
|
+
# If data was originally cumulated, convert predictions back to cumulative
|
|
244
|
+
if self.cumulated:
|
|
245
|
+
for df in [self.full_data_, self.full_data_lower_, self.full_data_upper_]:
|
|
246
|
+
# Calculate cumulative values
|
|
247
|
+
df[self.value_col] = df.groupby(self.origin_col)[self.value_col].cumsum()
|
|
248
|
+
|
|
249
|
+
# Calculate triangles using utility function
|
|
250
|
+
mean_triangle = df_to_triangle(
|
|
251
|
+
self.full_data_,
|
|
252
|
+
origin_col=self.origin_col,
|
|
253
|
+
development_col="dev",
|
|
254
|
+
value_col=self.value_col
|
|
255
|
+
)
|
|
256
|
+
lower_triangle = df_to_triangle(
|
|
257
|
+
self.full_data_lower_,
|
|
258
|
+
origin_col=self.origin_col,
|
|
259
|
+
development_col="dev",
|
|
260
|
+
value_col=self.value_col
|
|
261
|
+
)
|
|
262
|
+
upper_triangle = df_to_triangle(
|
|
263
|
+
self.full_data_upper_,
|
|
264
|
+
origin_col=self.origin_col,
|
|
265
|
+
development_col="dev",
|
|
266
|
+
value_col=self.value_col
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
# Calculate ultimate values
|
|
270
|
+
if self.cumulated:
|
|
271
|
+
# For cumulative data, ultimate is the last value in each origin year
|
|
272
|
+
self.ultimate_ = self.full_data_.groupby(self.origin_col)[self.value_col].last()
|
|
273
|
+
self.ultimate_lower_ = self.full_data_lower_.groupby(self.origin_col)[self.value_col].last()
|
|
274
|
+
self.ultimate_upper_ = self.full_data_upper_.groupby(self.origin_col)[self.value_col].last()
|
|
275
|
+
else:
|
|
276
|
+
# For incremental data, ultimate is latest + IBNR
|
|
277
|
+
self.ultimate_ = self.latest_ + self.ibnr_mean_
|
|
278
|
+
self.ultimate_lower_ = self.latest_ + self.ibnr_lower_
|
|
279
|
+
self.ultimate_upper_ = self.latest_ + self.ibnr_upper_
|
|
280
|
+
|
|
281
|
+
DescribeResult = namedtuple("DescribeResult",
|
|
282
|
+
("mean", "lower", "upper"))
|
|
283
|
+
return DescribeResult(mean_triangle.T,
|
|
284
|
+
lower_triangle.T,
|
|
285
|
+
upper_triangle.T)
|
|
286
|
+
|
|
287
|
+
def get_ibnr(self):
|
|
288
|
+
"""
|
|
289
|
+
Get the IBNR (Incurred But Not Reported) values for each origin year
|
|
290
|
+
|
|
291
|
+
Returns
|
|
292
|
+
-------
|
|
293
|
+
pandas.DataFrame
|
|
294
|
+
IBNR values (mean, lower, upper) indexed by origin year
|
|
295
|
+
"""
|
|
296
|
+
if self.ibnr_mean_ is None:
|
|
297
|
+
raise ValueError("Model must be fitted and predict() must be called before getting IBNR values")
|
|
298
|
+
|
|
299
|
+
DescribeResult = namedtuple("DescribeResult",
|
|
300
|
+
("mean", "lower", "upper"))
|
|
301
|
+
|
|
302
|
+
return DescribeResult(self.ibnr_mean_, self.ibnr_lower_, self.ibnr_upper_)
|
|
303
|
+
|
|
304
|
+
def get_latest(self):
|
|
305
|
+
"""
|
|
306
|
+
Get the latest known values for each origin year
|
|
307
|
+
|
|
308
|
+
Returns
|
|
309
|
+
-------
|
|
310
|
+
pandas.Series
|
|
311
|
+
Latest known values indexed by origin year
|
|
312
|
+
"""
|
|
313
|
+
if self.latest_ is None:
|
|
314
|
+
raise ValueError("Model must be fitted before getting latest values")
|
|
315
|
+
return self.latest_
|
|
316
|
+
|
|
317
|
+
def get_ultimate(self):
|
|
318
|
+
"""
|
|
319
|
+
Get the ultimate loss estimates for each origin year
|
|
320
|
+
|
|
321
|
+
Returns
|
|
322
|
+
-------
|
|
323
|
+
pandas.DataFrame
|
|
324
|
+
Ultimate loss estimates (mean, lower, upper) indexed by origin year
|
|
325
|
+
"""
|
|
326
|
+
if self.ultimate_ is None:
|
|
327
|
+
raise ValueError("Model must be fitted before getting ultimate values")
|
|
328
|
+
|
|
329
|
+
DescribeResult = namedtuple("DescribeResult",
|
|
330
|
+
("mean", "lower", "upper"))
|
|
331
|
+
|
|
332
|
+
return DescribeResult(self.ultimate_,
|
|
333
|
+
self.ultimate_lower_,
|
|
334
|
+
self.ultimate_upper_)
|
|
335
|
+
|
|
336
|
+
def get_summary(self):
|
|
337
|
+
"""
|
|
338
|
+
Get a summary of reserving results including latest values, ultimate estimates,
|
|
339
|
+
and IBNR values with confidence intervals.
|
|
340
|
+
|
|
341
|
+
Returns
|
|
342
|
+
-------
|
|
343
|
+
dict
|
|
344
|
+
Dictionary containing two keys:
|
|
345
|
+
- 'ByOrigin': DataFrame with results by origin year
|
|
346
|
+
- 'Totals': Series with total values
|
|
347
|
+
"""
|
|
348
|
+
if self.ultimate_ is None:
|
|
349
|
+
raise ValueError("Model must be fitted before getting summary")
|
|
350
|
+
|
|
351
|
+
# Get latest values
|
|
352
|
+
latest = self.get_latest()
|
|
353
|
+
|
|
354
|
+
# Get ultimate values
|
|
355
|
+
ultimate = self.get_ultimate()
|
|
356
|
+
|
|
357
|
+
# Get IBNR values
|
|
358
|
+
ibnr = self.get_ibnr()
|
|
359
|
+
|
|
360
|
+
# Create summary by origin
|
|
361
|
+
summary_by_origin = pd.DataFrame({
|
|
362
|
+
'Latest': latest,
|
|
363
|
+
'Mean Ultimate': ultimate.mean,
|
|
364
|
+
'Mean IBNR': ibnr.mean,
|
|
365
|
+
f'IBNR {self.level}%': ibnr.upper,
|
|
366
|
+
f'Ultimate Lo{self.level}': ultimate.lower,
|
|
367
|
+
f'Ultimate Hi{self.level}': ultimate.upper
|
|
368
|
+
})
|
|
369
|
+
|
|
370
|
+
# Calculate totals
|
|
371
|
+
totals = pd.Series({
|
|
372
|
+
'Latest': latest.sum(),
|
|
373
|
+
'Mean Ultimate': ultimate.mean.sum(),
|
|
374
|
+
'Mean IBNR': ibnr.mean.sum(),
|
|
375
|
+
f'Total IBNR {self.level}%': ibnr.upper.sum()
|
|
376
|
+
})
|
|
377
|
+
|
|
378
|
+
return {
|
|
379
|
+
'ByOrigin': summary_by_origin,
|
|
380
|
+
'Totals': totals
|
|
381
|
+
}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utility functions for mlreserving package
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def df_to_triangle(df, origin_col="origin", development_col="development", value_col="values"):
|
|
10
|
+
"""
|
|
11
|
+
Convert a data frame with origin, development, and value columns into a triangle format
|
|
12
|
+
|
|
13
|
+
Parameters
|
|
14
|
+
----------
|
|
15
|
+
df : pandas.DataFrame
|
|
16
|
+
Input data with origin, development, and value columns
|
|
17
|
+
origin_col : str, default="origin"
|
|
18
|
+
Name of the origin year column
|
|
19
|
+
development_col : str, default="development"
|
|
20
|
+
Name of the development year column
|
|
21
|
+
value_col : str, default="values"
|
|
22
|
+
Name of the value column
|
|
23
|
+
|
|
24
|
+
Returns
|
|
25
|
+
-------
|
|
26
|
+
pandas.DataFrame
|
|
27
|
+
Triangle format with origin years as index and development years as columns
|
|
28
|
+
"""
|
|
29
|
+
# Calculate development lag and calendar year
|
|
30
|
+
df = df.copy()
|
|
31
|
+
|
|
32
|
+
# If development_col is not 'dev', calculate it
|
|
33
|
+
if development_col != "dev":
|
|
34
|
+
df["dev"] = df[development_col] - df[origin_col] + 1
|
|
35
|
+
|
|
36
|
+
df["calendar"] = df[origin_col] + df["dev"] - 1
|
|
37
|
+
|
|
38
|
+
# Create triangle
|
|
39
|
+
triangle = df.pivot(
|
|
40
|
+
index=origin_col,
|
|
41
|
+
columns="dev",
|
|
42
|
+
values=value_col
|
|
43
|
+
).sort_index()
|
|
44
|
+
|
|
45
|
+
return triangle
|
|
46
|
+
|
|
47
|
+
def triangle_to_df(triangle, origin_col="origin",
|
|
48
|
+
development_col="development",
|
|
49
|
+
value_col="values"):
|
|
50
|
+
"""
|
|
51
|
+
Convert a triangle format into a data frame with origin, development, and value columns
|
|
52
|
+
|
|
53
|
+
Parameters
|
|
54
|
+
----------
|
|
55
|
+
triangle : pandas.DataFrame
|
|
56
|
+
Triangle format with origin years as index and development years as columns
|
|
57
|
+
origin_col : str, default="origin"
|
|
58
|
+
Name of the origin year column
|
|
59
|
+
development_col : str, default="development"
|
|
60
|
+
Name of the development year column
|
|
61
|
+
value_col : str, default="values"
|
|
62
|
+
Name of the value column
|
|
63
|
+
|
|
64
|
+
Returns
|
|
65
|
+
-------
|
|
66
|
+
pandas.DataFrame
|
|
67
|
+
Data frame with origin, development, and value columns
|
|
68
|
+
"""
|
|
69
|
+
# Reset index to get origin years as a column
|
|
70
|
+
df = triangle.reset_index()
|
|
71
|
+
|
|
72
|
+
# Melt the development columns into rows
|
|
73
|
+
df = pd.melt(
|
|
74
|
+
df,
|
|
75
|
+
id_vars=[origin_col],
|
|
76
|
+
var_name="dev",
|
|
77
|
+
value_name=value_col
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# Calculate development year and calendar year
|
|
81
|
+
df[development_col] = df[origin_col] + df["dev"] - 1
|
|
82
|
+
df["calendar"] = df[origin_col] + df["dev"] - 1
|
|
83
|
+
|
|
84
|
+
# Reorder columns and sort by calendar year
|
|
85
|
+
df = df[[origin_col, development_col, "dev", "calendar", value_col]]
|
|
86
|
+
|
|
87
|
+
df.sort_values("calendar", inplace=True)
|
|
88
|
+
|
|
89
|
+
return df
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mlreserving
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Model-agnostic Probabilistic Machine Learning Reserving
|
|
5
|
+
Home-page: https://github.com/Techtonique/mlreserving
|
|
6
|
+
Author: T. Moudiki
|
|
7
|
+
Author-email: "T. Moudiki" <thierry.moudiki@gmail.com>
|
|
8
|
+
Project-URL: Repository, https://github.com/Techtonique/mlreserving
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: License :: OSI Approved :: BSD License
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Requires-Python: >=3.7
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
License-File: LICENSE
|
|
15
|
+
Requires-Dist: nnetsauce>=0.17.0
|
|
16
|
+
Requires-Dist: numpy>=1.20.0
|
|
17
|
+
Requires-Dist: pandas>=1.3.0
|
|
18
|
+
Requires-Dist: scikit-learn>=1.0.0
|
|
19
|
+
Requires-Dist: joblib>=1.1.0
|
|
20
|
+
Dynamic: author
|
|
21
|
+
Dynamic: home-page
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
Dynamic: requires-python
|
|
24
|
+
|
|
25
|
+
# MLReserving
|
|
26
|
+
|
|
27
|
+
A machine learning-based reserving model for (longitudinal data) insurance claims.
|
|
28
|
+
|
|
29
|
+
## Installation
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install mlreserving
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Usage
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
from mlreserving import MLReserving
|
|
39
|
+
import pandas as pd
|
|
40
|
+
|
|
41
|
+
# Create your triangle data
|
|
42
|
+
# Load the dataset
|
|
43
|
+
url = "https://raw.githubusercontent.com/Techtonique/datasets/refs/heads/main/tabular/triangle/raa.csv"
|
|
44
|
+
data = pd.read_csv(url)
|
|
45
|
+
|
|
46
|
+
# Initialize and fit the model
|
|
47
|
+
model = MLReserving(model=mdl,
|
|
48
|
+
level=80, # 80% confidence level
|
|
49
|
+
random_state=42)
|
|
50
|
+
model.fit(data)
|
|
51
|
+
|
|
52
|
+
# Make predictions
|
|
53
|
+
result = model.predict()
|
|
54
|
+
|
|
55
|
+
# Get IBNR, latest, and ultimate values
|
|
56
|
+
ibnr = model.get_ibnr()
|
|
57
|
+
latest = model.get_latest()
|
|
58
|
+
ultimate = model.get_ultimate()
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Features
|
|
62
|
+
|
|
63
|
+
- Machine learning based reserving model
|
|
64
|
+
- Support for prediction intervals
|
|
65
|
+
- Flexible model selection
|
|
66
|
+
- Handles both continuous and categorical features
|
|
67
|
+
|
|
68
|
+
## License
|
|
69
|
+
|
|
70
|
+
BSD Clause Clear License
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
setup.py
|
|
5
|
+
mlreserving/__init__.py
|
|
6
|
+
mlreserving/ml_reserving.py
|
|
7
|
+
mlreserving/utils.py
|
|
8
|
+
mlreserving.egg-info/PKG-INFO
|
|
9
|
+
mlreserving.egg-info/SOURCES.txt
|
|
10
|
+
mlreserving.egg-info/dependency_links.txt
|
|
11
|
+
mlreserving.egg-info/requires.txt
|
|
12
|
+
mlreserving.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
mlreserving
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "mlreserving"
|
|
3
|
+
version = "0.3.0"
|
|
4
|
+
description = "Model-agnostic Probabilistic Machine Learning Reserving"
|
|
5
|
+
requires-python = ">=3.7"
|
|
6
|
+
authors = [
|
|
7
|
+
{name = "T. Moudiki", email = "thierry.moudiki@gmail.com"},
|
|
8
|
+
]
|
|
9
|
+
dependencies = [
|
|
10
|
+
"nnetsauce>=0.17.0",
|
|
11
|
+
"numpy>=1.20.0",
|
|
12
|
+
"pandas>=1.3.0",
|
|
13
|
+
"scikit-learn>=1.0.0",
|
|
14
|
+
"joblib>=1.1.0"
|
|
15
|
+
]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"License :: OSI Approved :: BSD License",
|
|
19
|
+
"Operating System :: OS Independent",
|
|
20
|
+
]
|
|
21
|
+
readme = "README.md"
|
|
22
|
+
|
|
23
|
+
[project.urls]
|
|
24
|
+
Repository = "https://github.com/Techtonique/mlreserving"
|
|
25
|
+
|
|
26
|
+
[build-system]
|
|
27
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
28
|
+
build-backend = "setuptools.build_meta"
|
|
29
|
+
|
|
30
|
+
[tool.setuptools]
|
|
31
|
+
packages = ["mlreserving"]
|
|
32
|
+
|
|
33
|
+
[tool.setuptools.dynamic]
|
|
34
|
+
version = {attr = "mlreserving.__version__"}
|
|
35
|
+
|
|
36
|
+
[tool.setuptools.package-data]
|
|
37
|
+
mlreserving = ["*.py"]
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
setup(
|
|
4
|
+
name="mlreserving",
|
|
5
|
+
version="0.3.0",
|
|
6
|
+
packages=find_packages(),
|
|
7
|
+
install_requires=[
|
|
8
|
+
"nnetsauce",
|
|
9
|
+
],
|
|
10
|
+
extras_require={
|
|
11
|
+
"dev": [
|
|
12
|
+
"pytest>=7.0.0",
|
|
13
|
+
"black>=22.0.0",
|
|
14
|
+
"flake8>=4.0.0",
|
|
15
|
+
"sphinx>=4.0.0",
|
|
16
|
+
],
|
|
17
|
+
},
|
|
18
|
+
python_requires=">=3.7",
|
|
19
|
+
author="T. Moudiki",
|
|
20
|
+
author_email="thierry.moudiki@gmail.com",
|
|
21
|
+
description="Model-agnostic Probabilistic Machine Learning Reserving",
|
|
22
|
+
long_description=open("README.md").read(),
|
|
23
|
+
long_description_content_type="text/markdown",
|
|
24
|
+
url="https://github.com/Techtonique/mlreserving",
|
|
25
|
+
classifiers=[
|
|
26
|
+
"Programming Language :: Python :: 3",
|
|
27
|
+
"License :: OSI Approved :: MIT License",
|
|
28
|
+
"Operating System :: OS Independent",
|
|
29
|
+
"Development Status :: 3 - Alpha",
|
|
30
|
+
"Intended Audience :: Science/Research",
|
|
31
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
32
|
+
],
|
|
33
|
+
)
|