scikit-survival 0.25.0__cp313-cp313-macosx_11_0_arm64.whl → 0.27.0__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scikit-survival
3
- Version: 0.25.0
3
+ Version: 0.27.0
4
4
  Summary: Survival analysis built on top of scikit-learn
5
5
  Author-email: Sebastian Pölsterl <sebp@k-d-w.org>
6
6
  License-Expression: GPL-3.0-or-later
@@ -19,28 +19,28 @@ Classifier: Programming Language :: C++
19
19
  Classifier: Programming Language :: Cython
20
20
  Classifier: Programming Language :: Python
21
21
  Classifier: Programming Language :: Python :: 3
22
- Classifier: Programming Language :: Python :: 3.10
23
22
  Classifier: Programming Language :: Python :: 3.11
24
23
  Classifier: Programming Language :: Python :: 3.12
25
24
  Classifier: Programming Language :: Python :: 3.13
25
+ Classifier: Programming Language :: Python :: 3.14
26
26
  Classifier: Topic :: Software Development
27
27
  Classifier: Topic :: Scientific/Engineering
28
- Requires-Python: >=3.10
28
+ Requires-Python: >=3.11
29
29
  Description-Content-Type: text/x-rst
30
30
  License-File: COPYING
31
31
  Requires-Dist: ecos
32
32
  Requires-Dist: joblib
33
33
  Requires-Dist: numexpr
34
- Requires-Dist: numpy
35
- Requires-Dist: osqp<1.0.0,>=0.6.3
36
- Requires-Dist: pandas>=1.4.0
37
- Requires-Dist: scipy>=1.3.2
38
- Requires-Dist: scikit-learn<1.8,>=1.6.1
34
+ Requires-Dist: numpy>=2.0.0
35
+ Requires-Dist: osqp>=1.0.2
36
+ Requires-Dist: pandas>=2.2.0
37
+ Requires-Dist: scipy>=1.13.0
38
+ Requires-Dist: scikit-learn<1.9,>=1.8.0
39
39
  Dynamic: license-file
40
40
 
41
41
  |License| |Docs| |DOI|
42
42
 
43
- |build-tests| |build-windows| |Codecov| |Codacy|
43
+ |build-tests| |Codecov| |Codacy|
44
44
 
45
45
  ***************
46
46
  scikit-survival
@@ -72,14 +72,14 @@ this unique characteristic of such a dataset into account.
72
72
  Requirements
73
73
  ============
74
74
 
75
- - Python 3.10 or later
75
+ - Python 3.11 or later
76
76
  - ecos
77
77
  - joblib
78
78
  - numexpr
79
- - numpy
79
+ - numpy 2.0.0 or later
80
80
  - osqp
81
- - pandas 1.4.0 or later
82
- - scikit-learn 1.6 or 1.7
81
+ - pandas 2.2.0 or later
82
+ - scikit-learn 1.8
83
83
  - scipy
84
84
  - C/C++ compiler
85
85
 
@@ -178,8 +178,4 @@ Please cite the following paper if you are using **scikit-survival**.
178
178
  :target: https://github.com/sebp/scikit-survival/actions?query=workflow%3Atests+branch%3Amaster
179
179
  :alt: GitHub Actions Tests Status
180
180
 
181
- .. |build-windows| image:: https://ci.appveyor.com/api/projects/status/github/sebp/scikit-survival?branch=master&svg=true
182
- :target: https://ci.appveyor.com/project/sebp/scikit-survival
183
- :alt: Windows Build Status on AppVeyor
184
-
185
181
  .. _survival analysis: https://en.wikipedia.org/wiki/Survival_analysis
@@ -1,35 +1,35 @@
1
- scikit_survival-0.25.0.dist-info/RECORD,,
2
- scikit_survival-0.25.0.dist-info/WHEEL,sha256=oqGJCpG61FZJmvyZ3C_0aCv-2mdfcY9e3fXvyUNmWfM,136
3
- scikit_survival-0.25.0.dist-info/top_level.txt,sha256=fPkcFA-XQGbwnD_ZXOvaOWmSd34Qezr26Mn99nYPvAg,7
4
- scikit_survival-0.25.0.dist-info/METADATA,sha256=gDfqAfi65Ozo4Ak5qArzmaEGnhAVbjoRFBEM8xtI0Ww,7187
5
- scikit_survival-0.25.0.dist-info/licenses/COPYING,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
1
+ scikit_survival-0.27.0.dist-info/RECORD,,
2
+ scikit_survival-0.27.0.dist-info/WHEEL,sha256=6IL0WRXMJwUnm4EmfBfLXRzeRewZIa5V-Dek5EQDpZ4,137
3
+ scikit_survival-0.27.0.dist-info/top_level.txt,sha256=fPkcFA-XQGbwnD_ZXOvaOWmSd34Qezr26Mn99nYPvAg,7
4
+ scikit_survival-0.27.0.dist-info/METADATA,sha256=dPPT8BmqDfq_N5pEWc2GPDgGr16wgXY5cvTt-BcxZjE,6950
5
+ scikit_survival-0.27.0.dist-info/licenses/COPYING,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
6
6
  sksurv/functions.py,sha256=e0jVqnEtyHoI7qjn18gHD2oRTCoOOA3i6p90tDgMWKs,3898
7
- sksurv/metrics.py,sha256=C8vWJEQ1CysbaG4KRnQA7cHOttDZsLGNAaL1DSVgccI,41241
8
- sksurv/nonparametric.py,sha256=XNATA2vYpspXqzflT8ckR3zuOqRwBI50zMcLwvs5JxY,31715
9
- sksurv/util.py,sha256=wbLvsOh5Ta3myMRVmlBazCTcMzV8G_nv1VF4Y1twY-I,15745
7
+ sksurv/metrics.py,sha256=a75hixN4WZm8vlNmn61dksLdRVEQUEKg1htbDrIa7JU,41249
8
+ sksurv/nonparametric.py,sha256=Bhvtr_j3gozbcnMcXJ_55BbfAEua-n4-hzTiVtBYt6M,31748
9
+ sksurv/util.py,sha256=1zX5DcUCw-9oM2uwMPrC5C736g3VsmxgL_w5piTuBew,15809
10
10
  sksurv/__init__.py,sha256=eRitrwFtAUadhvZtcasgO443RRMaPTmJHCph3dWkHSg,5153
11
11
  sksurv/docstrings.py,sha256=PJTe7sts8j6x3Gck_18buulAr2HIMOF6GnWDtrLQtIw,3301
12
- sksurv/preprocessing.py,sha256=rCy0BOvniqfN14XAJqYGu0ihmumB3-gY14UUmO2lf38,6508
12
+ sksurv/preprocessing.py,sha256=proeFKkPiyx4UDCwEnaOo0jLUTqA3jbnSP28R2izbPw,6974
13
13
  sksurv/exceptions.py,sha256=CRun7zrKzcZ9zinni5b2cMaV-pU-pw1UnXpRV2h3z_4,801
14
- sksurv/testing.py,sha256=2oeCsTzEiVRKDRb3iSJLKn03hBO2IrUq-2U5TfvOYK4,4295
15
- sksurv/compare.py,sha256=k610CG3y4OnUkuIhR4hnd_kaLUHNi1qsmL4EBYQ8rLc,4440
14
+ sksurv/testing.py,sha256=qyhAvOz_Q3lQ6D7hMUuqy-CbpjpwkXwmb1l8mlpoCk4,6097
15
+ sksurv/compare.py,sha256=RCfPU-EhM1C_8BMhNV5eb34ZrTZX6ODkB6KxTHQh-6E,4420
16
16
  sksurv/base.py,sha256=JGjekQGBRQdwS6AlI6uuNowT3KOpgBHCzxJGq6dsgew,4373
17
- sksurv/column.py,sha256=D52_WjVEvKPuA-pQdYtbh5hJagCrT8Dg8jaiFfJRHnU,6908
17
+ sksurv/column.py,sha256=ptWBSh2llhSPeGOxxmoAYhlI2KXXRJjJEZO9K9-eZXQ,6869
18
18
  sksurv/tree/tree.py,sha256=uvCcwIGVqx2x39ycIsLtJSKWBhty37uDKr85zQOBR9U,31992
19
- sksurv/tree/_criterion.cpython-313-darwin.so,sha256=LxHEzFNv8X6dBsF0yyi3vIiBnl6_6SkylDgukVgkdxo,227584
19
+ sksurv/tree/_criterion.cpython-313-darwin.so,sha256=bwiRuMxY7vayD_gKOoPHuMDiGT_gvatXn3TQbmnRICk,254176
20
20
  sksurv/tree/__init__.py,sha256=7RUjPZtGrVYiHY4roDXdEDM7RVBSsbY_CXWmyqZk2ts,64
21
21
  sksurv/ensemble/boosting.py,sha256=zLsJdjgPuEunYzPy-xlsmdNAI2U97YnX6aWN3ksFIrM,61572
22
22
  sksurv/ensemble/__init__.py,sha256=7kZAzxFpJGtgLQfhoOqZUyGUubIs_Kw3RgyUsAd1Fq0,191
23
- sksurv/ensemble/_coxph_loss.cpython-313-darwin.so,sha256=1iPNdygjlbSX6D8j8qLmn3zVxRNO0GQtm9eHQt5b-1Q,206464
23
+ sksurv/ensemble/_coxph_loss.cpython-313-darwin.so,sha256=W8sw-AJbny_y1RawziGGEv2cyK_KBP_TUzCPKsQMv3g,225984
24
24
  sksurv/ensemble/survival_loss.py,sha256=mhIbuOqz7t-nuygswZD0d0are2R0EQ3d3yHMRdxOKIk,5942
25
25
  sksurv/ensemble/forest.py,sha256=zAo-Txbqc5GjnbfI5fJCUfUHG2NFdFS6dDQhADrBnuM,35268
26
- sksurv/kernels/_clinical_kernel.cpython-313-darwin.so,sha256=TRp_1pSu1uMuSpnmQnuKNRoaZILa5l1MtyupwCzLtFk,206968
27
- sksurv/kernels/clinical.py,sha256=uqwjrmo0ZHpqZQ7oWw_xWl4A47ZO19WsYJWe6zRzPrY,11439
26
+ sksurv/kernels/_clinical_kernel.cpython-313-darwin.so,sha256=zsFMXU2Lbwss4GONW9ughZXJkDShdA41gNIE8-jVPCc,228168
27
+ sksurv/kernels/clinical.py,sha256=oNIXgBIivmgjM927qNpspfI3HivC_ORqFvdsTb_NHTA,11452
28
28
  sksurv/kernels/__init__.py,sha256=_aZIFutv7kUTlkHJlP52zBDkpUXnKIlPPf3cikuAmCA,77
29
- sksurv/bintrees/_binarytrees.cpython-313-darwin.so,sha256=OEYfuVsBvUjQ65pLqweApss3IliBTeiFg1qvj6ZkxY0,112992
29
+ sksurv/bintrees/_binarytrees.cpython-313-darwin.so,sha256=MDKQGWVi9FWiSIxNMUwZ970iuM8YwcK3wVxaksDbfzQ,118512
30
30
  sksurv/bintrees/__init__.py,sha256=l6Fe4PAMByrABpGzZ5W8KHieEYG-rh-DADo2QiEMLrU,727
31
31
  sksurv/datasets/__init__.py,sha256=EPzJ50wd-cZ6mWuHFPRRRMqgt14WzM32HGxDrlOp9Q4,361
32
- sksurv/datasets/base.py,sha256=q6xtOdE-y5WvevZsDidwE_imFtWozUQaWKbAhpPbw7Q,25611
32
+ sksurv/datasets/base.py,sha256=C9fnZJXLBTw1mvzc7GqGB4LdS6_8pQqPIAzKBo5jDpM,25622
33
33
  sksurv/datasets/data/cgvhd.arff,sha256=0lxUqY74JaMpC_vWJC4RWJy6vTmQwCg1yrUxjX65VX8,5214
34
34
  sksurv/datasets/data/GBSG2.arff,sha256=jBuh302AIWtYaV1rvJ9RKEZkqzcSThAdVt8ImFFkWwQ,26204
35
35
  sksurv/datasets/data/actg320.arff,sha256=8GE2kIU8Nvx7m5Ns-uTJW6Rgtk3xmJzBzMEmtynq5FU,45446
@@ -38,21 +38,21 @@ sksurv/datasets/data/breast_cancer_GSE7390-metastasis.arff,sha256=Iz9MHAay7imf_8
38
38
  sksurv/datasets/data/flchain.arff,sha256=vyYA7EN90ZBx9zva2C3mgXgEV9EUHsNu1VGwAm5uV3M,343058
39
39
  sksurv/datasets/data/whas500.arff,sha256=9kBAyROYh1E3gi7KMGqScgjfaJaAjNl2SvcGVyL6U9Y,27772
40
40
  sksurv/datasets/data/veteran.arff,sha256=cdvJ4jXzzC7RCzolTjn5hcCSNG0chFc27SGxP74mNFY,5260
41
- sksurv/io/arffwrite.py,sha256=fRJJ6h8Q4z5h9PNgzQgjLStYbVw1L38J2Qc3OKXFoWY,5431
41
+ sksurv/io/arffwrite.py,sha256=klmpbAaksrOOPu0Ftl6cVwgEhQNTsv_Okxkjn9J2Wc0,5430
42
42
  sksurv/io/__init__.py,sha256=LacpKG9UKO_RefPXc6umPaGFGPOGzA-FZra_MCRWCxk,92
43
- sksurv/io/arffread.py,sha256=Tz7D7BgsEcsC-7NRJjFziXyOO-jwVoj-QNRMmQkORPM,2638
43
+ sksurv/io/arffread.py,sha256=zc18B_CWVPr5_6Q_8IjohSmHL6rdTwTOsMqygyM945Y,2750
44
44
  sksurv/meta/__init__.py,sha256=VLA0VhLxZhF3z35md5Z4-nhw6BSSCfR6L7YOBGk1w1A,216
45
45
  sksurv/meta/stacking.py,sha256=7dROmB9H-qfwWeCf9ueu9IEEsxDQOTNPK82nmH-EFlg,13164
46
46
  sksurv/meta/ensemble_selection.py,sha256=cy4szNkw6KABLE7QjVkb6nMKV8YEWAunalM8SK0aSu8,26568
47
47
  sksurv/meta/base.py,sha256=mV6653v4txKKHJqcJXVT-J-ARNN9rDfzIq02xoEy93I,1437
48
48
  sksurv/linear_model/coxph.py,sha256=KFzVDP1TrNr9Hv08bCGsacTX0w_aE2jwsgMpCHe3R8A,22189
49
- sksurv/linear_model/_coxnet.cpython-313-darwin.so,sha256=KBXVjuQntRLnBq02jgq6tBrUc2rkOjgJqU6YDCHsIr0,131712
49
+ sksurv/linear_model/_coxnet.cpython-313-darwin.so,sha256=dQNVtKIwLT65ArbcM3xas4iQLHf4Ok3euwB9ighwYMs,134496
50
50
  sksurv/linear_model/__init__.py,sha256=58Lt5Tj3xGqRS4uZfVR5avKQNZubHD6RSknVDyzLTso,152
51
51
  sksurv/linear_model/coxnet.py,sha256=RgIomES97BcaM-RWmxmrP6AE3vkDaBsy4of727VsVfQ,22556
52
52
  sksurv/linear_model/aft.py,sha256=1Vn_V-e5ffQhbIed34MZzZBt4RzvAcLaxI1VTOZrBEY,7558
53
53
  sksurv/svm/naive_survival_svm.py,sha256=hx1C__lOT8hSV0g-YBI5reEgp9v4qQXOnvUlbVlHPwc,9319
54
- sksurv/svm/_minlip.cpython-313-darwin.so,sha256=Wl1ZyIVbl4E2sKlYxygzVU9gXyPTGeZomgnbrzBLh7g,206912
55
- sksurv/svm/_prsvm.cpython-313-darwin.so,sha256=ONCcBf9zFSuSaOmjpnl5cxAt_75s9gJe_GNcOaPv85I,206832
54
+ sksurv/svm/_minlip.cpython-313-darwin.so,sha256=r7WUbkQb3zgFUQZR9_OKzHLiZfo2KZe9jdNXcSyKjVA,228576
55
+ sksurv/svm/_prsvm.cpython-313-darwin.so,sha256=taYwEV4XFgj4p_GOcE_DlRhrBHfTD_CHDuegCVxzR8U,225008
56
56
  sksurv/svm/__init__.py,sha256=7BRFkatw9wbtsY-aes9cnz31VPpIjZ-383LuDmucDsw,328
57
57
  sksurv/svm/survival_svm.py,sha256=JGgUSft8p999DvZ0e617Ui2IEopt8kG3xspAJHt8CbU,44986
58
- sksurv/svm/minlip.py,sha256=Hnx6t2jV1s-p1puebvsHImRCUuv5HpJ0u-5bC4Sh6A0,24771
58
+ sksurv/svm/minlip.py,sha256=B6nfOSxxWdXAB_Ym4AXxileEkyffVpRAnr34Yqvh9C8,24976
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp313-cp313-macosx_11_0_arm64
5
5
  Generator: delocate 0.13.0
sksurv/column.py CHANGED
@@ -14,7 +14,7 @@ import logging
14
14
 
15
15
  import numpy as np
16
16
  import pandas as pd
17
- from pandas.api.types import CategoricalDtype, is_object_dtype
17
+ from pandas.api.types import CategoricalDtype, is_string_dtype
18
18
 
19
19
  __all__ = ["categorical_to_numeric", "encode_categorical", "standardize"]
20
20
 
@@ -118,12 +118,12 @@ def encode_categorical(table, columns=None, **kwargs):
118
118
  Numeric columns in the input table remain unchanged.
119
119
  """
120
120
  if isinstance(table, pd.Series):
121
- if not isinstance(table.dtype, CategoricalDtype) and not is_object_dtype(table.dtype):
121
+ if not isinstance(table.dtype, CategoricalDtype) and not is_string_dtype(table.dtype):
122
122
  raise TypeError(f"series must be of categorical dtype, but was {table.dtype}")
123
123
  return _encode_categorical_series(table, **kwargs)
124
124
 
125
125
  def _is_categorical_or_object(series):
126
- return isinstance(series.dtype, CategoricalDtype) or is_object_dtype(series.dtype)
126
+ return isinstance(series.dtype, CategoricalDtype) or is_string_dtype(series.dtype)
127
127
 
128
128
  if columns is None:
129
129
  # for columns containing categories
@@ -187,13 +187,12 @@ def categorical_to_numeric(table):
187
187
  def transform(column):
188
188
  if isinstance(column.dtype, CategoricalDtype):
189
189
  return column.cat.codes
190
- if is_object_dtype(column.dtype):
190
+ if is_string_dtype(column.dtype):
191
191
  try:
192
192
  nc = column.astype(np.int64)
193
193
  except ValueError:
194
194
  classes = column.dropna().unique()
195
- classes.sort(kind="mergesort")
196
- nc = column.map(dict(zip(classes, range(classes.shape[0]))))
195
+ nc = column.map(dict(zip(sorted(classes), range(classes.shape[0]))))
197
196
  return nc
198
197
  if column.dtype == bool:
199
198
  return column.astype(np.int64)
sksurv/compare.py CHANGED
@@ -117,7 +117,7 @@ def compare_survival(y, group_indicator, return_stats=False):
117
117
  table["expected"] = expected
118
118
  table["statistic"] = observed - expected
119
119
  table = pd.DataFrame.from_dict(table)
120
- table.index = pd.Index(groups, name="group", dtype=groups.dtype)
120
+ table.index = pd.Index(groups, name="group")
121
121
  return chisq, pval, table, covar
122
122
 
123
123
  return chisq, pval
sksurv/datasets/base.py CHANGED
@@ -36,10 +36,10 @@ def _get_x_y_survival(dataset, col_event, col_time, val_outcome, competing_risks
36
36
  event_type = np.int64 if competing_risks else bool
37
37
  y = np.empty(dtype=[(col_event, event_type), (col_time, np.float64)], shape=dataset.shape[0])
38
38
  if competing_risks:
39
- y[col_event] = dataset[col_event].values
39
+ y[col_event] = dataset[col_event].to_numpy()
40
40
  else:
41
- y[col_event] = (dataset[col_event] == val_outcome).values
42
- y[col_time] = dataset[col_time].values
41
+ y[col_event] = (dataset[col_event] == val_outcome).to_numpy()
42
+ y[col_time] = dataset[col_time].to_numpy()
43
43
 
44
44
  x_frame = dataset.drop([col_event, col_time], axis=1)
45
45
 
@@ -116,7 +116,7 @@ def _loadarff_with_index(filename):
116
116
  if isinstance(dataset["index"].dtype, CategoricalDtype):
117
117
  # concatenating categorical index may raise TypeError
118
118
  # see https://github.com/pandas-dev/pandas/issues/14586
119
- dataset["index"] = dataset["index"].astype(object)
119
+ dataset = dataset.astype({"index": "str"})
120
120
  dataset.set_index("index", inplace=True)
121
121
  return dataset
122
122
 
@@ -512,7 +512,7 @@ def load_bmt():
512
512
  """
513
513
  full_path = _get_data_path("bmt.arff")
514
514
  data = loadarff(full_path)
515
- data["ftime"] = data["ftime"].astype(int)
515
+ data = data.astype({"ftime": int})
516
516
  return get_x_y(data, attr_labels=["status", "ftime"], competing_risks=True)
517
517
 
518
518
 
@@ -603,8 +603,8 @@ def load_cgvhd():
603
603
  """
604
604
  full_path = _get_data_path("cgvhd.arff")
605
605
  data = loadarff(full_path)
606
- data["ftime"] = data[["survtime", "reltime", "cgvhtime"]].min(axis=1)
607
- data["status"] = (
606
+ data.loc[:, "ftime"] = data[["survtime", "reltime", "cgvhtime"]].min(axis=1)
607
+ data.loc[:, "status"] = (
608
608
  ((data["ftime"] == data["cgvhtime"]) & (data["cgvh"] == "1")).astype(int)
609
609
  + 2 * ((data["ftime"] == data["reltime"]) & (data["rcens"] == "1")).astype(int)
610
610
  + 3 * ((data["ftime"] == data["survtime"]) & (data["stat"] == "1")).astype(int)
sksurv/io/arffread.py CHANGED
@@ -12,6 +12,7 @@
12
12
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
13
13
  import numpy as np
14
14
  import pandas as pd
15
+ from pandas.api.types import is_string_dtype
15
16
  from scipy.io.arff import loadarff as scipy_loadarff
16
17
 
17
18
  __all__ = ["loadarff"]
@@ -34,7 +35,8 @@ def _to_pandas(data, meta):
34
35
  data_dict[name] = pd.Categorical(raw, categories=attr_format, ordered=False)
35
36
  else:
36
37
  arr = data[name]
37
- p = pd.Series(arr, dtype=arr.dtype)
38
+ dtype = "str" if is_string_dtype(arr.dtype) else arr.dtype
39
+ p = pd.Series(arr, dtype=dtype)
38
40
  data_dict[name] = p
39
41
 
40
42
  # currently, this step converts all pandas.Categorial columns back to pandas.Series
sksurv/io/arffwrite.py CHANGED
@@ -15,7 +15,7 @@ import re
15
15
 
16
16
  import numpy as np
17
17
  import pandas as pd
18
- from pandas.api.types import CategoricalDtype, is_object_dtype
18
+ from pandas.api.types import CategoricalDtype, is_string_dtype
19
19
 
20
20
  _ILLEGAL_CHARACTER_PAT = re.compile(r"[^-_=\w\d\(\)<>\.]")
21
21
 
@@ -106,7 +106,7 @@ def _write_header(data, fp, relation_name, index):
106
106
  name = attribute_names[column]
107
107
  fp.write(f"@attribute {name}\t")
108
108
 
109
- if isinstance(series.dtype, CategoricalDtype) or is_object_dtype(series):
109
+ if isinstance(series.dtype, CategoricalDtype) or is_string_dtype(series.dtype):
110
110
  _write_attribute_categorical(series, fp)
111
111
  elif np.issubdtype(series.dtype, np.floating):
112
112
  fp.write("real")
@@ -168,11 +168,11 @@ def _write_data(data, fp):
168
168
  fp.write("@data\n")
169
169
 
170
170
  def to_str(x):
171
- if pd.isnull(x):
171
+ if pd.isna(x):
172
172
  return "?"
173
173
  return str(x)
174
174
 
175
- data = data.applymap(to_str)
175
+ data = data.map(to_str)
176
176
  n_rows = data.shape[0]
177
177
  for i in range(n_rows):
178
178
  str_values = list(data.iloc[i, :].apply(_check_str_array))
@@ -41,7 +41,7 @@ def _get_continuous_and_ordinal_array(x):
41
41
  ordinal_columns = pd.Index([v for v in nominal_columns if x[v].cat.ordered])
42
42
  continuous_columns = x.select_dtypes(include=[np.number]).columns
43
43
 
44
- x_num = x.loc[:, continuous_columns].astype(np.float64).values
44
+ x_num = x.loc[:, continuous_columns].to_numpy(dtype=np.float64)
45
45
  if len(ordinal_columns) > 0:
46
46
  x = _ordinal_as_numeric(x, ordinal_columns)
47
47
 
@@ -123,7 +123,7 @@ def clinical_kernel(x, y=None):
123
123
  y_numeric = x_numeric
124
124
 
125
125
  continuous_ordinal_kernel(x_numeric, y_numeric, mat)
126
- _nominal_kernel(x.loc[:, nominal_columns].values, y.loc[:, nominal_columns].values, mat)
126
+ _nominal_kernel(x.loc[:, nominal_columns].to_numpy(), y.loc[:, nominal_columns].to_numpy(), mat)
127
127
  mat /= x.shape[1]
128
128
  return mat
129
129
 
@@ -210,7 +210,7 @@ class ClinicalKernelTransform(BaseEstimator, TransformerMixin):
210
210
  else:
211
211
  raise TypeError(f"unsupported dtype: {dt!r}")
212
212
 
213
- fit_data[:, i] = col.values
213
+ fit_data[:, i] = col.to_numpy()
214
214
 
215
215
  self._numeric_columns = np.asarray(numeric_columns)
216
216
  self._nominal_columns = np.asarray(nominal_columns)
sksurv/metrics.py CHANGED
@@ -510,7 +510,7 @@ def cumulative_dynamic_auc(survival_train, survival_test, estimate, times, tied_
510
510
  # to make sure that the curve starts at (0, 0)
511
511
  tp_no_ties = np.r_[0, tp_no_ties]
512
512
  fp_no_ties = np.r_[0, fp_no_ties]
513
- scores[i] = np.trapz(tp_no_ties, fp_no_ties)
513
+ scores[i] = np.trapezoid(tp_no_ties, fp_no_ties)
514
514
 
515
515
  if n_times == 1:
516
516
  mean_auc = scores[0]
@@ -780,7 +780,7 @@ def integrated_brier_score(survival_train, survival_test, estimate, times):
780
780
  raise ValueError("At least two time points must be given")
781
781
 
782
782
  # Computing the IBS
783
- ibs_value = np.trapz(brier_scores, times) / (times[-1] - times[0])
783
+ ibs_value = np.trapezoid(brier_scores, times) / (times[-1] - times[0])
784
784
 
785
785
  return ibs_value
786
786
 
sksurv/nonparametric.py CHANGED
@@ -321,7 +321,7 @@ def kaplan_meier_estimator(
321
321
  >>> plt.step(time, prob_surv, where="post")
322
322
  [...]
323
323
  >>> plt.fill_between(time, conf_int[0], conf_int[1], alpha=0.25, step="post")
324
- <matplotlib.collections.PolyCollection object at 0x...>
324
+ <matplotlib.collections.FillBetweenPolyCollection object at 0x...>
325
325
  >>> plt.ylim(0, 1)
326
326
  (0.0, 1.0)
327
327
  >>> plt.show() # doctest: +SKIP
@@ -757,12 +757,12 @@ def cumulative_incidence_competing_risks(
757
757
  >>> plt.step(x, y[0], where="post", label="Total risk")
758
758
  [...]
759
759
  >>> plt.fill_between(x, conf_int[0, 0], conf_int[0, 1], alpha=0.25, step="post")
760
- <matplotlib.collections.PolyCollection object at 0x...>
760
+ <matplotlib.collections.FillBetweenPolyCollection object at 0x...>
761
761
  >>> for i in range(1, n_risks + 1):
762
762
  ... plt.step(x, y[i], where="post", label=f"{i}-risk")
763
763
  ... plt.fill_between(x, conf_int[i, 0], conf_int[i, 1], alpha=0.25, step="post")
764
764
  [...]
765
- <matplotlib.collections.PolyCollection object at 0x...>
765
+ <matplotlib.collections.FillBetweenPolyCollection object at 0x...>
766
766
  >>> plt.ylim(0, 1)
767
767
  (0.0, 1.0)
768
768
  >>> plt.legend()
sksurv/preprocessing.py CHANGED
@@ -10,6 +10,8 @@
10
10
  #
11
11
  # You should have received a copy of the GNU General Public License
12
12
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
13
+ import pandas as pd
14
+ from pandas.api.types import CategoricalDtype, is_string_dtype
13
15
  from sklearn.base import BaseEstimator, TransformerMixin
14
16
  from sklearn.utils.validation import _check_feature_names, _check_feature_names_in, _check_n_features, check_is_fitted
15
17
 
@@ -127,12 +129,24 @@ class OneHotEncoder(BaseEstimator, TransformerMixin):
127
129
  """
128
130
  _check_feature_names(self, X, reset=True)
129
131
  _check_n_features(self, X, reset=True)
130
- columns_to_encode = X.select_dtypes(include=["object", "category"]).columns
132
+
133
+ def is_string_or_categorical_dtype(dtype):
134
+ return is_string_dtype(dtype) or isinstance(dtype, CategoricalDtype)
135
+
136
+ columns_to_encode = pd.Index(
137
+ [name for name, dtype in X.dtypes.items() if is_string_or_categorical_dtype(dtype)]
138
+ )
131
139
  x_dummy = self._encode(X, columns_to_encode)
132
140
 
133
141
  self.feature_names_ = columns_to_encode
134
- self.categories_ = {k: X[k].cat.categories for k in columns_to_encode}
135
- self.encoded_columns_ = x_dummy.columns
142
+ cat_cols = {}
143
+ for col_name in columns_to_encode:
144
+ col = X[col_name]
145
+ if not isinstance(col.dtype, CategoricalDtype):
146
+ col = col.astype("category")
147
+ cat_cols[col_name] = col.cat.categories
148
+ self.categories_ = cat_cols
149
+ self.encoded_columns_ = x_dummy.columns.copy()
136
150
  return x_dummy
137
151
 
138
152
  def transform(self, X):
@@ -152,9 +166,7 @@ class OneHotEncoder(BaseEstimator, TransformerMixin):
152
166
  _check_n_features(self, X, reset=False)
153
167
  check_columns_exist(X.columns, self.feature_names_)
154
168
 
155
- Xt = X.copy()
156
- for col, cat in self.categories_.items():
157
- Xt[col] = Xt[col].cat.set_categories(cat)
169
+ Xt = X.astype({col: CategoricalDtype(cat) for col, cat in self.categories_.items()})
158
170
 
159
171
  new_data = self._encode(Xt, self.feature_names_)
160
172
  return new_data.loc[:, self.encoded_columns_]
@@ -180,4 +192,4 @@ class OneHotEncoder(BaseEstimator, TransformerMixin):
180
192
  check_is_fitted(self, "encoded_columns_")
181
193
  input_features = _check_feature_names_in(self, input_features)
182
194
 
183
- return self.encoded_columns_.values.copy()
195
+ return self.encoded_columns_.to_numpy(copy=True)
Binary file
Binary file
sksurv/svm/minlip.py CHANGED
@@ -81,17 +81,22 @@ class OsqpSolver(QPSolver):
81
81
 
82
82
  solver_opts = self._get_options()
83
83
  m = osqp.OSQP()
84
- m.setup(P=sparse.csc_matrix(P), q=q, A=G, u=h, **solver_opts) # noqa: E741
85
- results = m.solve()
84
+ m.setup(P=sparse.csc_matrix(P), q=q, A=G, l=None, u=h, **solver_opts) # noqa: E741
85
+ results = m.solve(raise_error=False)
86
86
 
87
- if results.info.status_val == -2: # max iter reached
87
+ solved_codes = (
88
+ osqp.SolverStatus.OSQP_SOLVED,
89
+ osqp.SolverStatus.OSQP_SOLVED_INACCURATE,
90
+ )
91
+
92
+ if results.info.status_val == osqp.SolverStatus.OSQP_MAX_ITER_REACHED: # max iter reached
88
93
  warnings.warn(
89
94
  (f"OSQP solver did not converge: {results.info.status}"),
90
95
  category=ConvergenceWarning,
91
96
  stacklevel=2,
92
97
  )
93
- elif results.info.status_val not in (1, 2): # pragma: no cover
94
- # non of solved, solved inaccurate
98
+ elif results.info.status_val not in solved_codes: # pragma: no cover
99
+ # none of SOLVED, SOLVED_INACCURATE
95
100
  raise RuntimeError(f"OSQP solver failed: {results.info.status}")
96
101
 
97
102
  n_iter = results.info.iter
@@ -103,7 +108,7 @@ class OsqpSolver(QPSolver):
103
108
  "eps_abs": 1e-5,
104
109
  "eps_rel": 1e-5,
105
110
  "max_iter": self.max_iter or 4000,
106
- "polish": True,
111
+ "polishing": True,
107
112
  "verbose": self.verbose,
108
113
  }
109
114
  return solver_opts
sksurv/testing.py CHANGED
@@ -10,13 +10,17 @@
10
10
  #
11
11
  # You should have received a copy of the GNU General Public License
12
12
  # along with this program. If not, see <http://www.gnu.org/licenses/>.
13
+ from contextlib import nullcontext
13
14
  from importlib import import_module
15
+ from importlib.metadata import PackageNotFoundError, version
14
16
  import inspect
15
17
  from pathlib import Path
16
18
  import pkgutil
17
19
 
18
20
  import numpy as np
19
21
  from numpy.testing import assert_almost_equal, assert_array_equal
22
+ from packaging.version import parse
23
+ import pandas as pd
20
24
  import pytest
21
25
  from sklearn.base import BaseEstimator, TransformerMixin
22
26
 
@@ -106,3 +110,51 @@ class FixtureParameterFactory:
106
110
  values = func()
107
111
  cases.append(pytest.param(*values, id=name))
108
112
  return cases
113
+
114
+ def get_cases_func(self):
115
+ cases = []
116
+ for name, func in inspect.getmembers(self):
117
+ if name.startswith("data_"):
118
+ cases.append(pytest.param(func, id=name))
119
+ return cases
120
+
121
+
122
+ def check_module_minimum_version(module, min_version_str, max_version_str=None):
123
+ """
124
+ Check whether a module of a specified minimum version is available.
125
+
126
+ Parameters
127
+ ----------
128
+ module : str
129
+ Name of the module.
130
+ min_version_str : str
131
+ Minimum version of the module.
132
+ max_version_str : str, optional
133
+ Maximum version of the module (excluding).
134
+
135
+ Returns
136
+ -------
137
+ available : bool
138
+ True if the module is available and its version is >= `version_str`.
139
+ """
140
+ try:
141
+ module_version = parse(version(module))
142
+ required_min_version = parse(min_version_str)
143
+ if max_version_str is None:
144
+ return module_version >= required_min_version
145
+ required_max_version = parse(max_version_str)
146
+ return required_min_version <= module_version < required_max_version
147
+ except PackageNotFoundError: # pragma: no cover
148
+ return False
149
+
150
+
151
+ def get_pandas_infer_string_context():
152
+ if check_module_minimum_version("pandas", "2.3.0", "3.0.0"):
153
+ return (
154
+ pytest.param(pd.option_context("future.infer_string", False), id="infer_string=False"),
155
+ pytest.param(pd.option_context("future.infer_string", True), id="infer_string=True"),
156
+ )
157
+ return (
158
+ pytest.param(nullcontext(), id="pandas default options"),
159
+ pytest.param(nullcontext(), marks=pytest.mark.skip("no pandas 2.3.0")),
160
+ )
sksurv/util.py CHANGED
@@ -142,7 +142,7 @@ class Surv:
142
142
  raise TypeError(f"expected pandas.DataFrame, but got {type(data)!r}")
143
143
 
144
144
  return Surv.from_arrays(
145
- data.loc[:, event].values, data.loc[:, time].values, name_event=str(event), name_time=str(time)
145
+ data.loc[:, event].to_numpy(), data.loc[:, time].to_numpy(), name_event=str(event), name_time=str(time)
146
146
  )
147
147
 
148
148
 
@@ -337,6 +337,7 @@ def safe_concat(objs, *args, **kwargs):
337
337
  categories[df.name] = {"categories": df.cat.categories, "ordered": df.cat.ordered}
338
338
  else:
339
339
  dfc = df.select_dtypes(include=["category"])
340
+ new_dtypes = {}
340
341
  for name, s in dfc.items():
341
342
  if name in categories:
342
343
  if axis == 1:
@@ -345,12 +346,12 @@ def safe_concat(objs, *args, **kwargs):
345
346
  raise ValueError(f"categories for column {name} do not match")
346
347
  else:
347
348
  categories[name] = {"categories": s.cat.categories, "ordered": s.cat.ordered}
348
- df[name] = df[name].astype(object)
349
+ new_dtypes[name] = "str"
350
+ df = df.astype(new_dtypes)
349
351
 
350
352
  concatenated = pd.concat(objs, *args, axis=axis, **kwargs)
351
353
 
352
- for name, params in categories.items():
353
- concatenated[name] = pd.Categorical(concatenated[name], **params)
354
+ concatenated = concatenated.astype({name: pd.CategoricalDtype(**params) for name, params in categories.items()})
354
355
 
355
356
  return concatenated
356
357