pySEQTarget 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pySEQTarget/SEQopts.py +197 -0
- pySEQTarget/SEQoutput.py +163 -0
- pySEQTarget/SEQuential.py +375 -0
- pySEQTarget/__init__.py +5 -0
- pySEQTarget/analysis/__init__.py +8 -0
- pySEQTarget/analysis/_hazard.py +211 -0
- pySEQTarget/analysis/_outcome_fit.py +75 -0
- pySEQTarget/analysis/_risk_estimates.py +136 -0
- pySEQTarget/analysis/_subgroup_fit.py +30 -0
- pySEQTarget/analysis/_survival_pred.py +372 -0
- pySEQTarget/data/__init__.py +19 -0
- pySEQTarget/error/__init__.py +2 -0
- pySEQTarget/error/_datachecker.py +38 -0
- pySEQTarget/error/_param_checker.py +50 -0
- pySEQTarget/expansion/__init__.py +5 -0
- pySEQTarget/expansion/_binder.py +98 -0
- pySEQTarget/expansion/_diagnostics.py +53 -0
- pySEQTarget/expansion/_dynamic.py +73 -0
- pySEQTarget/expansion/_mapper.py +44 -0
- pySEQTarget/expansion/_selection.py +31 -0
- pySEQTarget/helpers/__init__.py +8 -0
- pySEQTarget/helpers/_bootstrap.py +111 -0
- pySEQTarget/helpers/_col_string.py +6 -0
- pySEQTarget/helpers/_format_time.py +6 -0
- pySEQTarget/helpers/_output_files.py +167 -0
- pySEQTarget/helpers/_pad.py +7 -0
- pySEQTarget/helpers/_predict_model.py +9 -0
- pySEQTarget/helpers/_prepare_data.py +19 -0
- pySEQTarget/initialization/__init__.py +5 -0
- pySEQTarget/initialization/_censoring.py +53 -0
- pySEQTarget/initialization/_denominator.py +39 -0
- pySEQTarget/initialization/_numerator.py +37 -0
- pySEQTarget/initialization/_outcome.py +56 -0
- pySEQTarget/plot/__init__.py +1 -0
- pySEQTarget/plot/_survival_plot.py +104 -0
- pySEQTarget/weighting/__init__.py +8 -0
- pySEQTarget/weighting/_weight_bind.py +86 -0
- pySEQTarget/weighting/_weight_data.py +47 -0
- pySEQTarget/weighting/_weight_fit.py +99 -0
- pySEQTarget/weighting/_weight_pred.py +192 -0
- pySEQTarget/weighting/_weight_stats.py +23 -0
- pyseqtarget-0.10.0.dist-info/METADATA +98 -0
- pyseqtarget-0.10.0.dist-info/RECORD +46 -0
- pyseqtarget-0.10.0.dist-info/WHEEL +5 -0
- pyseqtarget-0.10.0.dist-info/licenses/LICENSE +21 -0
- pyseqtarget-0.10.0.dist-info/top_level.txt +1 -0
pySEQTarget/SEQopts.py
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
import multiprocessing
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import List, Literal, Optional
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class SEQopts:
|
|
8
|
+
"""
|
|
9
|
+
Parameter builder for ``pySEQTarget.SEQuential`` analysis
|
|
10
|
+
|
|
11
|
+
:param bootstrap_nboot: Number of bootstraps to preform
|
|
12
|
+
:type bootstrap_nboot: int
|
|
13
|
+
:param bootstrap_sample: Subsampling proportion of ID-Trials gathered for each bootstrapping iteration
|
|
14
|
+
:type bootstrap_sample: float
|
|
15
|
+
:param bootstrap_CI: If bootstrapped, confidence interval level
|
|
16
|
+
:type bootstrap_CI: float
|
|
17
|
+
:param bootstrap_CI_method: If bootstrapped, confidence method generation method ['SE' or 'percentile']
|
|
18
|
+
:type bootstrap_CI_method: str
|
|
19
|
+
:param cense_colname: Column name for censoring effect (LTFU, etc.)
|
|
20
|
+
:type cense_colname: str
|
|
21
|
+
:param cense_denominator: Override to specify denominator patsy formula for censoring models
|
|
22
|
+
:type cense_denominator: Optional[str] or None
|
|
23
|
+
:param cense_numerator: Override to specify numerator patsy formula for censoring models
|
|
24
|
+
:type cense_numerator: Optional[str] or None
|
|
25
|
+
:param cense_eligible_colname: Column name to identify which rows are eligible for censoring model fitting
|
|
26
|
+
:type cense_eligible_colname: Optional[str] or None
|
|
27
|
+
:param compevent_colname: Column name specifying a competing event to the outcome
|
|
28
|
+
:type compevent_colname: str
|
|
29
|
+
:param covariates: Override to specify the outcome patsy formula for outcome model fitting
|
|
30
|
+
:type covariates: Optional[str] or None
|
|
31
|
+
:param denominator: Override to specify the outcome patsy formula for denominator model fitting
|
|
32
|
+
:type denominator: Optional[str] or None
|
|
33
|
+
:param excused: Boolean to allow excused conditions when method is censoring
|
|
34
|
+
:type excused: bool
|
|
35
|
+
:param excused_colnames: Column names (at the same length of treatment_level) specifying excused conditions
|
|
36
|
+
:type excused_colnames: List[str] or []
|
|
37
|
+
:param followup_class: Boolean to force followup values to be treated as classes
|
|
38
|
+
:type followup_class: bool
|
|
39
|
+
:param followup_include: Boolean to force regular followup values into model covariates
|
|
40
|
+
:type followup_include: bool
|
|
41
|
+
:param followup_spline: Boolean to force followup values to be fit to cubic spline
|
|
42
|
+
:type followup_spline: bool
|
|
43
|
+
:param followup_max: Maximum allowed followup in analysis
|
|
44
|
+
:type followup_max: int or None
|
|
45
|
+
:param followup_min: Minimum allowed followup in analysis
|
|
46
|
+
:type followup_min: int
|
|
47
|
+
:param hazard_estimate: Boolean to create hazard estimates
|
|
48
|
+
:type hazard_estimate: bool
|
|
49
|
+
:param indicator_baseline: How to indicate baseline columns in models
|
|
50
|
+
:type indicator_baseline: str
|
|
51
|
+
:param indicator_squared: How to indicate squared columns in models
|
|
52
|
+
:type indicator_baseline: str
|
|
53
|
+
:param km_curves: Boolean to create survival, risk, and incidence (if applicable) estimates
|
|
54
|
+
:type km_curves: bool
|
|
55
|
+
:param ncores: Number of cores to use if running in parallel
|
|
56
|
+
:type ncores: int
|
|
57
|
+
:param numerator: Override to specify the outcome patsy formula for numerator models
|
|
58
|
+
:type numerator: str
|
|
59
|
+
:param parallel: Boolean to run model fitting in parallel
|
|
60
|
+
:type parallel: bool
|
|
61
|
+
:param plot_colors: List of colors for KM plots, if applicable
|
|
62
|
+
:type plot_colors: List[str]
|
|
63
|
+
:param plot_labels: List of length treat_level to specify treatment labeling
|
|
64
|
+
:type plot_labels: List[str]
|
|
65
|
+
:param plot_title: Plot title
|
|
66
|
+
:type plot_title: str
|
|
67
|
+
:param plot_type: Type of plot to show ["risk", "survival" or "incidence" if compevent is specified]
|
|
68
|
+
:type plot_type: str
|
|
69
|
+
:param seed: RNG seed
|
|
70
|
+
:type seed: int
|
|
71
|
+
:param selection_first_trial: Boolean to only use first trial for analysis (similar to non-expanded)
|
|
72
|
+
:type selection_first_trial: bool
|
|
73
|
+
:param selection_sample: Subsampling proportion of ID-trials which did not initiate a treatment
|
|
74
|
+
:type selection_sample: float
|
|
75
|
+
:param selection_random: Boolean to randomly downsample ID-trials which did not initiate a treatment
|
|
76
|
+
:type selection_random: bool
|
|
77
|
+
:param subgroup_colname: Column name for subgroups to share the same weighting but different outcome model fits
|
|
78
|
+
:type subgroup_colname: str
|
|
79
|
+
:param treatment_level: List of eligible treatment levels within treatment_col
|
|
80
|
+
:type treatment_level: List[int]
|
|
81
|
+
:param trial_include: Boolean to force trial values into model covariates
|
|
82
|
+
:type trial_include: bool
|
|
83
|
+
:param weight_eligible_colnames: List of column names of length treatment_level to identify which rows are eligible for weight fitting
|
|
84
|
+
:type weight_eligible_colnames: List[str]
|
|
85
|
+
:param weight_min: Minimum weight
|
|
86
|
+
:type weight_min: float
|
|
87
|
+
:param weight_max: Maximum weight
|
|
88
|
+
:type weight_max: float or None
|
|
89
|
+
:param weight_lag_condition: Boolean to fit weights based on their treatment lag
|
|
90
|
+
:type weight_lag_condition: bool
|
|
91
|
+
:param weight_p99: Boolean to force weight min and max to be 1st and 99th percentile respectively
|
|
92
|
+
:type weight_p99: bool
|
|
93
|
+
:param weight_preexpansion: Boolean to fit weights on preexpanded data
|
|
94
|
+
:type weight_preexpansion: bool
|
|
95
|
+
:param weighted: Boolean to weight analysis
|
|
96
|
+
:type weighted: bool
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
bootstrap_nboot: int = 0
|
|
100
|
+
bootstrap_sample: float = 0.8
|
|
101
|
+
bootstrap_CI: float = 0.95
|
|
102
|
+
bootstrap_CI_method: Literal["se", "percentile"] = "se"
|
|
103
|
+
cense_colname: Optional[str] = None
|
|
104
|
+
cense_denominator: Optional[str] = None
|
|
105
|
+
cense_numerator: Optional[str] = None
|
|
106
|
+
cense_eligible_colname: Optional[str] = None
|
|
107
|
+
compevent_colname: Optional[str] = None
|
|
108
|
+
covariates: Optional[str] = None
|
|
109
|
+
denominator: Optional[str] = None
|
|
110
|
+
excused: bool = False
|
|
111
|
+
excused_colnames: List[str] = field(default_factory=lambda: [])
|
|
112
|
+
followup_class: bool = False
|
|
113
|
+
followup_include: bool = True
|
|
114
|
+
followup_max: int = None
|
|
115
|
+
followup_min: int = 0
|
|
116
|
+
followup_spline: bool = False
|
|
117
|
+
hazard_estimate: bool = False
|
|
118
|
+
indicator_baseline: str = "_bas"
|
|
119
|
+
indicator_squared: str = "_sq"
|
|
120
|
+
km_curves: bool = False
|
|
121
|
+
ncores: int = multiprocessing.cpu_count()
|
|
122
|
+
numerator: Optional[str] = None
|
|
123
|
+
parallel: bool = False
|
|
124
|
+
plot_colors: List[str] = field(
|
|
125
|
+
default_factory=lambda: ["#F8766D", "#00BFC4", "#555555"]
|
|
126
|
+
)
|
|
127
|
+
plot_labels: List[str] = field(default_factory=lambda: [])
|
|
128
|
+
plot_title: str = None
|
|
129
|
+
plot_type: Literal["risk", "survival", "incidence"] = "risk"
|
|
130
|
+
seed: Optional[int] = None
|
|
131
|
+
selection_first_trial: bool = False
|
|
132
|
+
selection_sample: float = 0.8
|
|
133
|
+
selection_random: bool = False
|
|
134
|
+
subgroup_colname: str = None
|
|
135
|
+
treatment_level: List[int] = field(default_factory=lambda: [0, 1])
|
|
136
|
+
trial_include: bool = True
|
|
137
|
+
visit_colname: str = None
|
|
138
|
+
weight_eligible_colnames: List[str] = field(default_factory=lambda: [])
|
|
139
|
+
weight_min: float = 0.0
|
|
140
|
+
weight_max: float = None
|
|
141
|
+
weight_lag_condition: bool = True
|
|
142
|
+
weight_p99: bool = False
|
|
143
|
+
weight_preexpansion: bool = False
|
|
144
|
+
weighted: bool = False
|
|
145
|
+
|
|
146
|
+
def __post_init__(self):
|
|
147
|
+
bools = [
|
|
148
|
+
"excused",
|
|
149
|
+
"followup_class",
|
|
150
|
+
"followup_include",
|
|
151
|
+
"followup_spline",
|
|
152
|
+
"hazard_estimate",
|
|
153
|
+
"km_curves",
|
|
154
|
+
"parallel",
|
|
155
|
+
"selection_first_trial",
|
|
156
|
+
"selection_random",
|
|
157
|
+
"trial_include",
|
|
158
|
+
"weight_lag_condition",
|
|
159
|
+
"weight_p99",
|
|
160
|
+
"weight_preexpansion",
|
|
161
|
+
"weighted",
|
|
162
|
+
]
|
|
163
|
+
for i in bools:
|
|
164
|
+
if not isinstance(getattr(self, i), bool):
|
|
165
|
+
raise TypeError(f"{i} must be a boolean value.")
|
|
166
|
+
|
|
167
|
+
if not isinstance(self.bootstrap_nboot, int) or self.bootstrap_nboot < 0:
|
|
168
|
+
raise ValueError("bootstrap_nboot must be a positive integer.")
|
|
169
|
+
|
|
170
|
+
if self.ncores < 1 or not isinstance(self.ncores, int):
|
|
171
|
+
raise ValueError("ncores must be a positive integer.")
|
|
172
|
+
|
|
173
|
+
if not (0.0 <= self.bootstrap_sample <= 1.0):
|
|
174
|
+
raise ValueError("bootstrap_sample must be between 0 and 1.")
|
|
175
|
+
if not (0.0 < self.bootstrap_CI < 1.0):
|
|
176
|
+
raise ValueError("bootstrap_CI must be between 0 and 1.")
|
|
177
|
+
if not (0.0 <= self.selection_sample <= 1.0):
|
|
178
|
+
raise ValueError("selection_sample must be between 0 and 1.")
|
|
179
|
+
|
|
180
|
+
if self.plot_type not in ["risk", "survival", "incidence"]:
|
|
181
|
+
raise ValueError(
|
|
182
|
+
"plot_type must be either 'risk', 'survival', or 'incidence'."
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
if self.bootstrap_CI_method not in ["se", "percentile"]:
|
|
186
|
+
raise ValueError("bootstrap_CI_method must be one of 'se' or 'percentile'")
|
|
187
|
+
|
|
188
|
+
for i in (
|
|
189
|
+
"covariates",
|
|
190
|
+
"numerator",
|
|
191
|
+
"denominator",
|
|
192
|
+
"cense_numerator",
|
|
193
|
+
"cense_denominator",
|
|
194
|
+
):
|
|
195
|
+
attr = getattr(self, i)
|
|
196
|
+
if attr is not None and not isinstance(attr, list):
|
|
197
|
+
setattr(self, i, "".join(attr.split()))
|
pySEQTarget/SEQoutput.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import tempfile
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import List, Literal, Optional
|
|
5
|
+
|
|
6
|
+
import matplotlib.figure
|
|
7
|
+
import polars as pl
|
|
8
|
+
from statsmodels.base.wrapper import ResultsWrapper
|
|
9
|
+
|
|
10
|
+
from .helpers import _build_md, _build_pdf
|
|
11
|
+
from .SEQopts import SEQopts
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class SEQoutput:
|
|
16
|
+
"""
|
|
17
|
+
Collector class for results from ``SEQuential``
|
|
18
|
+
|
|
19
|
+
:param options: Options used in the SEQuential process
|
|
20
|
+
:type options: SEQopts or None
|
|
21
|
+
:param method: Method of analysis ['ITT', 'dose-response', or 'censoring']
|
|
22
|
+
:type method: str
|
|
23
|
+
:param numerator_models: Numerator models, if applicable, from the weighting process
|
|
24
|
+
:type numerator_models: List[ResultsWrapper] or None
|
|
25
|
+
:param denominator_models: Denominator models, if applicable, from the weighting process
|
|
26
|
+
:type denominator_models: List[ResultsWrapper] or None
|
|
27
|
+
:param compevent_models: Competing event models, if applicable
|
|
28
|
+
:type compevent_models: List[ResultsWrapper] or None
|
|
29
|
+
:param weight_statistics: Weight statistics once returned back to the expanded dataset
|
|
30
|
+
:type weight_statistics: dict or None
|
|
31
|
+
:param hazard: Hazard ratio if applicable
|
|
32
|
+
:type hazard: pl.DataFrame or None
|
|
33
|
+
:param km_data: Dataframe of risk, survival, and incidence data if applicable at all followups
|
|
34
|
+
:type km_data: pl.DataFrame or None
|
|
35
|
+
:param km_graph: Figure of survival, risk, or incidence over followup times
|
|
36
|
+
:type km_graph: matplotlib.figure.Figure or None
|
|
37
|
+
:param risk_ratio: Dataframe of risk ratios, compared between treatments and subgroups
|
|
38
|
+
:type risk_ratio: pl.DataFrame or None
|
|
39
|
+
:param risk_difference: Dataframe of risk differences, compared between treatments and subgroups
|
|
40
|
+
:type risk_difference: pl.DataFrame or None
|
|
41
|
+
:param time: Timings for every step of the process completed thus far
|
|
42
|
+
:type time: dict or None
|
|
43
|
+
:param diagnostic_tables: Diagnostic tables for unique and nonunique outcome events and treatment switches
|
|
44
|
+
:type diagnostic_tables: dict or None
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
options: SEQopts = None
|
|
48
|
+
method: str = None
|
|
49
|
+
numerator_models: List[ResultsWrapper] = None
|
|
50
|
+
denominator_models: List[ResultsWrapper] = None
|
|
51
|
+
outcome_models: List[List[ResultsWrapper]] = None
|
|
52
|
+
compevent_models: List[List[ResultsWrapper]] = None
|
|
53
|
+
weight_statistics: pl.DataFrame = None
|
|
54
|
+
hazard: pl.DataFrame = None
|
|
55
|
+
km_data: pl.DataFrame = None
|
|
56
|
+
km_graph: matplotlib.figure.Figure = None
|
|
57
|
+
risk_ratio: pl.DataFrame = None
|
|
58
|
+
risk_difference: pl.DataFrame = None
|
|
59
|
+
time: dict = None
|
|
60
|
+
diagnostic_tables: dict = None
|
|
61
|
+
|
|
62
|
+
def plot(self) -> None:
|
|
63
|
+
"""
|
|
64
|
+
Prints the kaplan-meier graph
|
|
65
|
+
"""
|
|
66
|
+
print(self.km_graph)
|
|
67
|
+
|
|
68
|
+
def summary(
|
|
69
|
+
self, type=Optional[Literal["numerator", "denominator", "outcome", "compevent"]]
|
|
70
|
+
) -> List:
|
|
71
|
+
"""
|
|
72
|
+
Returns a list of model summaries of either the numerator, denominator, outcome, or competing event models
|
|
73
|
+
:param type: Indicator for which model list you would like returned
|
|
74
|
+
:type type: str
|
|
75
|
+
"""
|
|
76
|
+
match type:
|
|
77
|
+
case "numerator":
|
|
78
|
+
models = self.numerator_models
|
|
79
|
+
case "denominator":
|
|
80
|
+
models = self.denominator_models
|
|
81
|
+
case "compevent":
|
|
82
|
+
models = self.compevent_models
|
|
83
|
+
case _:
|
|
84
|
+
models = self.outcome_models
|
|
85
|
+
|
|
86
|
+
return [model.summary() for model in models]
|
|
87
|
+
|
|
88
|
+
def retrieve_data(
|
|
89
|
+
self,
|
|
90
|
+
type=Optional[
|
|
91
|
+
Literal[
|
|
92
|
+
"km_data",
|
|
93
|
+
"hazard",
|
|
94
|
+
"risk_ratio",
|
|
95
|
+
"risk_difference",
|
|
96
|
+
"unique_outcomes",
|
|
97
|
+
"nonunique_outcomes",
|
|
98
|
+
"unique_switches",
|
|
99
|
+
"nonunique_switches",
|
|
100
|
+
]
|
|
101
|
+
],
|
|
102
|
+
) -> pl.DataFrame:
|
|
103
|
+
"""
|
|
104
|
+
Getter for data stored within ``SEQoutput``
|
|
105
|
+
:param type: Data which you would like to access, ['km_data', 'hazard', 'risk_ratio', 'risk_difference', 'unique_outcomes', 'nonunique_outcomes', 'unique_switches', 'nonunique_switches']
|
|
106
|
+
:type type: str
|
|
107
|
+
"""
|
|
108
|
+
match type:
|
|
109
|
+
case "hazard":
|
|
110
|
+
data = self.hazard
|
|
111
|
+
case "risk_ratio":
|
|
112
|
+
data = self.risk_ratio
|
|
113
|
+
case "risk_difference":
|
|
114
|
+
data = self.risk_difference
|
|
115
|
+
case "unique_outcomes":
|
|
116
|
+
data = self.diagnostic_tables["unique_outcomes"]
|
|
117
|
+
case "nonunique_outcomes":
|
|
118
|
+
data = self.diagnostic_tables["nonunique_outcomes"]
|
|
119
|
+
case "unique_switches":
|
|
120
|
+
if self.diagnostic_tables.has_key("unique_switches"):
|
|
121
|
+
data = self.diagnostic_tables["unique_switches"]
|
|
122
|
+
else:
|
|
123
|
+
data = None
|
|
124
|
+
case "nonunique_switches":
|
|
125
|
+
if self.diagnostic_tables.has_key("nonunique_switches"):
|
|
126
|
+
data = self.diagnostic_tables["nonunique_switches"]
|
|
127
|
+
else:
|
|
128
|
+
data = None
|
|
129
|
+
case _:
|
|
130
|
+
data = self.km_data
|
|
131
|
+
if data is None:
|
|
132
|
+
raise ValueError("Data {type} was not created in the SEQuential process")
|
|
133
|
+
return data
|
|
134
|
+
|
|
135
|
+
def to_md(self, filename="SEQuential_results.md") -> None:
|
|
136
|
+
"""Generates a markdown report of the SEQuential analysis results."""
|
|
137
|
+
|
|
138
|
+
img_path = None
|
|
139
|
+
if self.options.km_curves and self.km_graph is not None:
|
|
140
|
+
img_path = Path(filename).with_suffix(".png")
|
|
141
|
+
self.km_graph.savefig(img_path, dpi=300, bbox_inches="tight")
|
|
142
|
+
img_path = img_path.name
|
|
143
|
+
|
|
144
|
+
with open(filename, "w") as f:
|
|
145
|
+
f.write(_build_md(self, img_path))
|
|
146
|
+
|
|
147
|
+
print(f"Results saved to {filename}")
|
|
148
|
+
|
|
149
|
+
def to_pdf(self, filename="SEQuential_results.pdf") -> None:
|
|
150
|
+
"""Generates a PDF report of the SEQuential analysis results."""
|
|
151
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
152
|
+
tmp_md = Path(tmpdir) / "report.md"
|
|
153
|
+
self.to_md(str(tmp_md))
|
|
154
|
+
|
|
155
|
+
with open(tmp_md, "r") as f:
|
|
156
|
+
md_content = f.read()
|
|
157
|
+
|
|
158
|
+
tmp_img = tmp_md.with_suffix(".png")
|
|
159
|
+
img_abs_path = str(tmp_img.absolute()) if tmp_img.exists() else None
|
|
160
|
+
|
|
161
|
+
_build_pdf(md_content, filename, img_abs_path)
|
|
162
|
+
|
|
163
|
+
print(f"Results saved to {filename}")
|