scikit-survival 0.23.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. scikit_survival-0.23.1.dist-info/COPYING +674 -0
  2. scikit_survival-0.23.1.dist-info/METADATA +888 -0
  3. scikit_survival-0.23.1.dist-info/RECORD +55 -0
  4. scikit_survival-0.23.1.dist-info/WHEEL +5 -0
  5. scikit_survival-0.23.1.dist-info/top_level.txt +1 -0
  6. sksurv/__init__.py +138 -0
  7. sksurv/base.py +103 -0
  8. sksurv/bintrees/__init__.py +15 -0
  9. sksurv/bintrees/_binarytrees.cp313-win_amd64.pyd +0 -0
  10. sksurv/column.py +201 -0
  11. sksurv/compare.py +123 -0
  12. sksurv/datasets/__init__.py +10 -0
  13. sksurv/datasets/base.py +436 -0
  14. sksurv/datasets/data/GBSG2.arff +700 -0
  15. sksurv/datasets/data/actg320.arff +1169 -0
  16. sksurv/datasets/data/breast_cancer_GSE7390-metastasis.arff +283 -0
  17. sksurv/datasets/data/flchain.arff +7887 -0
  18. sksurv/datasets/data/veteran.arff +148 -0
  19. sksurv/datasets/data/whas500.arff +520 -0
  20. sksurv/ensemble/__init__.py +2 -0
  21. sksurv/ensemble/_coxph_loss.cp313-win_amd64.pyd +0 -0
  22. sksurv/ensemble/boosting.py +1610 -0
  23. sksurv/ensemble/forest.py +947 -0
  24. sksurv/ensemble/survival_loss.py +151 -0
  25. sksurv/exceptions.py +18 -0
  26. sksurv/functions.py +114 -0
  27. sksurv/io/__init__.py +2 -0
  28. sksurv/io/arffread.py +58 -0
  29. sksurv/io/arffwrite.py +145 -0
  30. sksurv/kernels/__init__.py +1 -0
  31. sksurv/kernels/_clinical_kernel.cp313-win_amd64.pyd +0 -0
  32. sksurv/kernels/clinical.py +328 -0
  33. sksurv/linear_model/__init__.py +3 -0
  34. sksurv/linear_model/_coxnet.cp313-win_amd64.pyd +0 -0
  35. sksurv/linear_model/aft.py +205 -0
  36. sksurv/linear_model/coxnet.py +543 -0
  37. sksurv/linear_model/coxph.py +618 -0
  38. sksurv/meta/__init__.py +4 -0
  39. sksurv/meta/base.py +35 -0
  40. sksurv/meta/ensemble_selection.py +642 -0
  41. sksurv/meta/stacking.py +349 -0
  42. sksurv/metrics.py +996 -0
  43. sksurv/nonparametric.py +588 -0
  44. sksurv/preprocessing.py +155 -0
  45. sksurv/svm/__init__.py +11 -0
  46. sksurv/svm/_minlip.cp313-win_amd64.pyd +0 -0
  47. sksurv/svm/_prsvm.cp313-win_amd64.pyd +0 -0
  48. sksurv/svm/minlip.py +606 -0
  49. sksurv/svm/naive_survival_svm.py +221 -0
  50. sksurv/svm/survival_svm.py +1228 -0
  51. sksurv/testing.py +108 -0
  52. sksurv/tree/__init__.py +1 -0
  53. sksurv/tree/_criterion.cp313-win_amd64.pyd +0 -0
  54. sksurv/tree/tree.py +703 -0
  55. sksurv/util.py +333 -0
sksurv/util.py ADDED
@@ -0,0 +1,333 @@
1
+ # This program is free software: you can redistribute it and/or modify
2
+ # it under the terms of the GNU General Public License as published by
3
+ # the Free Software Foundation, either version 3 of the License, or
4
+ # (at your option) any later version.
5
+ #
6
+ # This program is distributed in the hope that it will be useful,
7
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
8
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9
+ # GNU General Public License for more details.
10
+ #
11
+ # You should have received a copy of the GNU General Public License
12
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
13
+ import numpy as np
14
+ import pandas as pd
15
+ from pandas.api.types import CategoricalDtype
16
+ from sklearn.utils import check_array, check_consistent_length
17
+
18
+ __all__ = ["check_array_survival", "check_y_survival", "safe_concat", "Surv"]
19
+
20
+
21
+ class Surv:
22
+ """
23
+ Helper class to construct structured array of event indicator and observed time.
24
+ """
25
+
26
+ @staticmethod
27
+ def from_arrays(event, time, name_event=None, name_time=None):
28
+ """Create structured array.
29
+
30
+ Parameters
31
+ ----------
32
+ event : array-like
33
+ Event indicator. A boolean array or array with values 0/1.
34
+ time : array-like
35
+ Observed time.
36
+ name_event : str|None
37
+ Name of event, optional, default: 'event'
38
+ name_time : str|None
39
+ Name of observed time, optional, default: 'time'
40
+
41
+ Returns
42
+ -------
43
+ y : np.array
44
+ Structured array with two fields.
45
+ """
46
+ name_event = name_event or "event"
47
+ name_time = name_time or "time"
48
+ if name_time == name_event:
49
+ raise ValueError("name_time must be different from name_event")
50
+
51
+ time = np.asanyarray(time, dtype=float)
52
+ y = np.empty(time.shape[0], dtype=[(name_event, bool), (name_time, float)])
53
+ y[name_time] = time
54
+
55
+ event = np.asanyarray(event)
56
+ check_consistent_length(time, event)
57
+
58
+ if np.issubdtype(event.dtype, np.bool_):
59
+ y[name_event] = event
60
+ else:
61
+ events = np.unique(event)
62
+ events.sort()
63
+ if len(events) != 2:
64
+ raise ValueError("event indicator must be binary")
65
+
66
+ if np.all(events == np.array([0, 1], dtype=events.dtype)):
67
+ y[name_event] = event.astype(bool)
68
+ else:
69
+ raise ValueError("non-boolean event indicator must contain 0 and 1 only")
70
+
71
+ return y
72
+
73
+ @staticmethod
74
+ def from_dataframe(event, time, data):
75
+ """Create structured array from data frame.
76
+
77
+ Parameters
78
+ ----------
79
+ event : object
80
+ Identifier of column containing event indicator.
81
+ time : object
82
+ Identifier of column containing time.
83
+ data : pandas.DataFrame
84
+ Dataset.
85
+
86
+ Returns
87
+ -------
88
+ y : np.array
89
+ Structured array with two fields.
90
+ """
91
+ if not isinstance(data, pd.DataFrame):
92
+ raise TypeError(f"expected pandas.DataFrame, but got {type(data)!r}")
93
+
94
+ return Surv.from_arrays(
95
+ data.loc[:, event].values, data.loc[:, time].values, name_event=str(event), name_time=str(time)
96
+ )
97
+
98
+
99
+ def check_y_survival(y_or_event, *args, allow_all_censored=False, allow_time_zero=True):
100
+ """Check that array correctly represents an outcome for survival analysis.
101
+
102
+ Parameters
103
+ ----------
104
+ y_or_event : structured array with two fields, or boolean array
105
+ If a structured array, it must contain the binary event indicator
106
+ as first field, and time of event or time of censoring as
107
+ second field. Otherwise, it is assumed that a boolean array
108
+ representing the event indicator is passed.
109
+
110
+ *args : list of array-likes
111
+ Any number of array-like objects representing time information.
112
+ Elements that are `None` are passed along in the return value.
113
+
114
+ allow_all_censored : bool, optional, default: False
115
+ Whether to allow all events to be censored.
116
+
117
+ allow_time_zero : bool, optional, default: True
118
+ Whether to allow event times to be zero.
119
+
120
+ Returns
121
+ -------
122
+ event : array, shape=[n_samples,], dtype=bool
123
+ Binary event indicator.
124
+
125
+ time : array, shape=[n_samples,], dtype=float
126
+ Time of event or censoring.
127
+ """
128
+ if len(args) == 0:
129
+ y = y_or_event
130
+
131
+ if not isinstance(y, np.ndarray) or y.dtype.fields is None or len(y.dtype.fields) != 2:
132
+ raise ValueError(
133
+ "y must be a structured array with the first field"
134
+ " being a binary class event indicator and the second field"
135
+ " the time of the event/censoring"
136
+ )
137
+
138
+ event_field, time_field = y.dtype.names
139
+ y_event = y[event_field]
140
+ time_args = (y[time_field],)
141
+ else:
142
+ y_event = np.asanyarray(y_or_event)
143
+ time_args = args
144
+
145
+ event = check_array(y_event, ensure_2d=False)
146
+ if not np.issubdtype(event.dtype, np.bool_):
147
+ raise ValueError(f"elements of event indicator must be boolean, but found {event.dtype}")
148
+
149
+ if not (allow_all_censored or np.any(event)):
150
+ raise ValueError("all samples are censored")
151
+
152
+ return_val = [event]
153
+ for i, yt in enumerate(time_args):
154
+ if yt is None:
155
+ return_val.append(yt)
156
+ continue
157
+
158
+ yt = check_array(yt, ensure_2d=False)
159
+ if not np.issubdtype(yt.dtype, np.number):
160
+ raise ValueError(f"time must be numeric, but found {yt.dtype} for argument {i + 2}")
161
+
162
+ if allow_time_zero:
163
+ cond = yt < 0
164
+ msg = "observed time contains values smaller zero"
165
+ else:
166
+ cond = yt <= 0
167
+ msg = "observed time contains values smaller or equal to zero"
168
+ if np.any(cond):
169
+ raise ValueError(msg)
170
+
171
+ return_val.append(yt)
172
+
173
+ return tuple(return_val)
174
+
175
+
176
+ def check_array_survival(X, y, **kwargs):
177
+ """Check that all arrays have consistent first dimensions.
178
+
179
+ Parameters
180
+ ----------
181
+ X : array-like
182
+ Data matrix containing feature vectors.
183
+
184
+ y : structured array with two fields
185
+ A structured array containing the binary event indicator
186
+ as first field, and time of event or time of censoring as
187
+ second field.
188
+
189
+ kwargs : dict
190
+ Additional arguments passed to :func:`check_y_survival`.
191
+
192
+ Returns
193
+ -------
194
+ event : array, shape=[n_samples,], dtype=bool
195
+ Binary event indicator.
196
+
197
+ time : array, shape=[n_samples,], dtype=float
198
+ Time of event or censoring.
199
+ """
200
+ event, time = check_y_survival(y, **kwargs)
201
+ check_consistent_length(X, event, time)
202
+ return event, time
203
+
204
+
205
+ def safe_concat(objs, *args, **kwargs):
206
+ """Alternative to :func:`pandas.concat` that preserves categorical variables.
207
+
208
+ Parameters
209
+ ----------
210
+ objs : a sequence or mapping of Series, DataFrame, or Panel objects
211
+ If a dict is passed, the sorted keys will be used as the `keys`
212
+ argument, unless it is passed, in which case the values will be
213
+ selected (see below). Any None objects will be dropped silently unless
214
+ they are all None in which case a ValueError will be raised
215
+ axis : {0, 1, ...}, default 0
216
+ The axis to concatenate along
217
+ join : {'inner', 'outer'}, default 'outer'
218
+ How to handle indexes on other axis(es)
219
+ join_axes : list of Index objects
220
+ Specific indexes to use for the other n - 1 axes instead of performing
221
+ inner/outer set logic
222
+ verify_integrity : boolean, default False
223
+ Check whether the new concatenated axis contains duplicates. This can
224
+ be very expensive relative to the actual data concatenation
225
+ keys : sequence, default None
226
+ If multiple levels passed, should contain tuples. Construct
227
+ hierarchical index using the passed keys as the outermost level
228
+ levels : list of sequences, default None
229
+ Specific levels (unique values) to use for constructing a
230
+ MultiIndex. Otherwise they will be inferred from the keys
231
+ names : list, default None
232
+ Names for the levels in the resulting hierarchical index
233
+ ignore_index : boolean, default False
234
+ If True, do not use the index values along the concatenation axis. The
235
+ resulting axis will be labeled 0, ..., n - 1. This is useful if you are
236
+ concatenating objects where the concatenation axis does not have
237
+ meaningful indexing information. Note the the index values on the other
238
+ axes are still respected in the join.
239
+ copy : boolean, default True
240
+ If False, do not copy data unnecessarily
241
+
242
+ Notes
243
+ -----
244
+ The keys, levels, and names arguments are all optional
245
+
246
+ Returns
247
+ -------
248
+ concatenated : type of objects
249
+ """
250
+ axis = kwargs.pop("axis", 0)
251
+ categories = {}
252
+ for df in objs:
253
+ if isinstance(df, pd.Series):
254
+ if isinstance(df.dtype, CategoricalDtype):
255
+ categories[df.name] = {"categories": df.cat.categories, "ordered": df.cat.ordered}
256
+ else:
257
+ dfc = df.select_dtypes(include=["category"])
258
+ for name, s in dfc.items():
259
+ if name in categories:
260
+ if axis == 1:
261
+ raise ValueError(f"duplicate columns {name}")
262
+ if not categories[name]["categories"].equals(s.cat.categories):
263
+ raise ValueError(f"categories for column {name} do not match")
264
+ else:
265
+ categories[name] = {"categories": s.cat.categories, "ordered": s.cat.ordered}
266
+ df[name] = df[name].astype(object)
267
+
268
+ concatenated = pd.concat(objs, *args, axis=axis, **kwargs)
269
+
270
+ for name, params in categories.items():
271
+ concatenated[name] = pd.Categorical(concatenated[name], **params)
272
+
273
+ return concatenated
274
+
275
+
276
+ class _PropertyAvailableIfDescriptor:
277
+ """Implements a conditional property using the descriptor protocol based on the property decorator.
278
+
279
+ The corresponding class in scikit-learn (`_AvailableIfDescriptor`) only supports callables.
280
+ This class adopts the property decorator as described in the descriptor guide in the offical Python documentation.
281
+
282
+ See also
283
+ --------
284
+ https://docs.python.org/3/howto/descriptor.html
285
+ Descriptor HowTo Guide
286
+
287
+ :class:`sklearn.utils.available_if._AvailableIfDescriptor`
288
+ The original class in scikit-learn.
289
+ """
290
+
291
+ def __init__(self, check, fget, doc=None):
292
+ self.check = check
293
+ self.fget = fget
294
+ if doc is None and fget is not None:
295
+ doc = fget.__doc__
296
+ self.__doc__ = doc
297
+ self._name = ""
298
+
299
+ def __set_name__(self, owner, name):
300
+ self._name = name
301
+
302
+ def __get__(self, obj, objtype=None):
303
+ if obj is None:
304
+ return self
305
+
306
+ attr_err = AttributeError(f"This {obj!r} has no attribute {self._name!r}")
307
+ if not self.check(obj):
308
+ raise attr_err
309
+
310
+ if self.fget is None:
311
+ raise AttributeError(f"property '{self._name}' has no getter")
312
+ return self.fget(obj)
313
+
314
+
315
+ def property_available_if(check):
316
+ """A property attribute that is available only if check returns a truthy value.
317
+
318
+ Only supports getting an attribute value, setting or deleting an attribute value are not supported.
319
+
320
+ Parameters
321
+ ----------
322
+ check : callable
323
+ When passed the object of the decorated method, this should return
324
+ `True` if the property attribute is available, and either return `False`
325
+ or raise an `AttributeError` if not available.
326
+
327
+ Returns
328
+ -------
329
+ callable
330
+ Callable makes the decorated property available if `check` returns
331
+ `True`, otherwise the decorated property is unavailable.
332
+ """
333
+ return lambda fn: _PropertyAvailableIfDescriptor(check=check, fget=fn)