pySEQTarget 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. pySEQTarget/SEQopts.py +197 -0
  2. pySEQTarget/SEQoutput.py +163 -0
  3. pySEQTarget/SEQuential.py +375 -0
  4. pySEQTarget/__init__.py +5 -0
  5. pySEQTarget/analysis/__init__.py +8 -0
  6. pySEQTarget/analysis/_hazard.py +211 -0
  7. pySEQTarget/analysis/_outcome_fit.py +75 -0
  8. pySEQTarget/analysis/_risk_estimates.py +136 -0
  9. pySEQTarget/analysis/_subgroup_fit.py +30 -0
  10. pySEQTarget/analysis/_survival_pred.py +372 -0
  11. pySEQTarget/data/__init__.py +19 -0
  12. pySEQTarget/error/__init__.py +2 -0
  13. pySEQTarget/error/_datachecker.py +38 -0
  14. pySEQTarget/error/_param_checker.py +50 -0
  15. pySEQTarget/expansion/__init__.py +5 -0
  16. pySEQTarget/expansion/_binder.py +98 -0
  17. pySEQTarget/expansion/_diagnostics.py +53 -0
  18. pySEQTarget/expansion/_dynamic.py +73 -0
  19. pySEQTarget/expansion/_mapper.py +44 -0
  20. pySEQTarget/expansion/_selection.py +31 -0
  21. pySEQTarget/helpers/__init__.py +8 -0
  22. pySEQTarget/helpers/_bootstrap.py +111 -0
  23. pySEQTarget/helpers/_col_string.py +6 -0
  24. pySEQTarget/helpers/_format_time.py +6 -0
  25. pySEQTarget/helpers/_output_files.py +167 -0
  26. pySEQTarget/helpers/_pad.py +7 -0
  27. pySEQTarget/helpers/_predict_model.py +9 -0
  28. pySEQTarget/helpers/_prepare_data.py +19 -0
  29. pySEQTarget/initialization/__init__.py +5 -0
  30. pySEQTarget/initialization/_censoring.py +53 -0
  31. pySEQTarget/initialization/_denominator.py +39 -0
  32. pySEQTarget/initialization/_numerator.py +37 -0
  33. pySEQTarget/initialization/_outcome.py +56 -0
  34. pySEQTarget/plot/__init__.py +1 -0
  35. pySEQTarget/plot/_survival_plot.py +104 -0
  36. pySEQTarget/weighting/__init__.py +8 -0
  37. pySEQTarget/weighting/_weight_bind.py +86 -0
  38. pySEQTarget/weighting/_weight_data.py +47 -0
  39. pySEQTarget/weighting/_weight_fit.py +99 -0
  40. pySEQTarget/weighting/_weight_pred.py +192 -0
  41. pySEQTarget/weighting/_weight_stats.py +23 -0
  42. pyseqtarget-0.10.0.dist-info/METADATA +98 -0
  43. pyseqtarget-0.10.0.dist-info/RECORD +46 -0
  44. pyseqtarget-0.10.0.dist-info/WHEEL +5 -0
  45. pyseqtarget-0.10.0.dist-info/licenses/LICENSE +21 -0
  46. pyseqtarget-0.10.0.dist-info/top_level.txt +1 -0
pySEQTarget/SEQopts.py ADDED
@@ -0,0 +1,197 @@
1
+ import multiprocessing
2
+ from dataclasses import dataclass, field
3
+ from typing import List, Literal, Optional
4
+
5
+
6
+ @dataclass
7
+ class SEQopts:
8
+ """
9
+ Parameter builder for ``pySEQTarget.SEQuential`` analysis
10
+
11
+ :param bootstrap_nboot: Number of bootstraps to preform
12
+ :type bootstrap_nboot: int
13
+ :param bootstrap_sample: Subsampling proportion of ID-Trials gathered for each bootstrapping iteration
14
+ :type bootstrap_sample: float
15
+ :param bootstrap_CI: If bootstrapped, confidence interval level
16
+ :type bootstrap_CI: float
17
+ :param bootstrap_CI_method: If bootstrapped, confidence method generation method ['SE' or 'percentile']
18
+ :type bootstrap_CI_method: str
19
+ :param cense_colname: Column name for censoring effect (LTFU, etc.)
20
+ :type cense_colname: str
21
+ :param cense_denominator: Override to specify denominator patsy formula for censoring models
22
+ :type cense_denominator: Optional[str] or None
23
+ :param cense_numerator: Override to specify numerator patsy formula for censoring models
24
+ :type cense_numerator: Optional[str] or None
25
+ :param cense_eligible_colname: Column name to identify which rows are eligible for censoring model fitting
26
+ :type cense_eligible_colname: Optional[str] or None
27
+ :param compevent_colname: Column name specifying a competing event to the outcome
28
+ :type compevent_colname: str
29
+ :param covariates: Override to specify the outcome patsy formula for outcome model fitting
30
+ :type covariates: Optional[str] or None
31
+ :param denominator: Override to specify the outcome patsy formula for denominator model fitting
32
+ :type denominator: Optional[str] or None
33
+ :param excused: Boolean to allow excused conditions when method is censoring
34
+ :type excused: bool
35
+ :param excused_colnames: Column names (at the same length of treatment_level) specifying excused conditions
36
+ :type excused_colnames: List[str] or []
37
+ :param followup_class: Boolean to force followup values to be treated as classes
38
+ :type followup_class: bool
39
+ :param followup_include: Boolean to force regular followup values into model covariates
40
+ :type followup_include: bool
41
+ :param followup_spline: Boolean to force followup values to be fit to cubic spline
42
+ :type followup_spline: bool
43
+ :param followup_max: Maximum allowed followup in analysis
44
+ :type followup_max: int or None
45
+ :param followup_min: Minimum allowed followup in analysis
46
+ :type followup_min: int
47
+ :param hazard_estimate: Boolean to create hazard estimates
48
+ :type hazard_estimate: bool
49
+ :param indicator_baseline: How to indicate baseline columns in models
50
+ :type indicator_baseline: str
51
+ :param indicator_squared: How to indicate squared columns in models
52
+ :type indicator_baseline: str
53
+ :param km_curves: Boolean to create survival, risk, and incidence (if applicable) estimates
54
+ :type km_curves: bool
55
+ :param ncores: Number of cores to use if running in parallel
56
+ :type ncores: int
57
+ :param numerator: Override to specify the outcome patsy formula for numerator models
58
+ :type numerator: str
59
+ :param parallel: Boolean to run model fitting in parallel
60
+ :type parallel: bool
61
+ :param plot_colors: List of colors for KM plots, if applicable
62
+ :type plot_colors: List[str]
63
+ :param plot_labels: List of length treat_level to specify treatment labeling
64
+ :type plot_labels: List[str]
65
+ :param plot_title: Plot title
66
+ :type plot_title: str
67
+ :param plot_type: Type of plot to show ["risk", "survival" or "incidence" if compevent is specified]
68
+ :type plot_type: str
69
+ :param seed: RNG seed
70
+ :type seed: int
71
+ :param selection_first_trial: Boolean to only use first trial for analysis (similar to non-expanded)
72
+ :type selection_first_trial: bool
73
+ :param selection_sample: Subsampling proportion of ID-trials which did not initiate a treatment
74
+ :type selection_sample: float
75
+ :param selection_random: Boolean to randomly downsample ID-trials which did not initiate a treatment
76
+ :type selection_random: bool
77
+ :param subgroup_colname: Column name for subgroups to share the same weighting but different outcome model fits
78
+ :type subgroup_colname: str
79
+ :param treatment_level: List of eligible treatment levels within treatment_col
80
+ :type treatment_level: List[int]
81
+ :param trial_include: Boolean to force trial values into model covariates
82
+ :type trial_include: bool
83
+ :param weight_eligible_colnames: List of column names of length treatment_level to identify which rows are eligible for weight fitting
84
+ :type weight_eligible_colnames: List[str]
85
+ :param weight_min: Minimum weight
86
+ :type weight_min: float
87
+ :param weight_max: Maximum weight
88
+ :type weight_max: float or None
89
+ :param weight_lag_condition: Boolean to fit weights based on their treatment lag
90
+ :type weight_lag_condition: bool
91
+ :param weight_p99: Boolean to force weight min and max to be 1st and 99th percentile respectively
92
+ :type weight_p99: bool
93
+ :param weight_preexpansion: Boolean to fit weights on preexpanded data
94
+ :type weight_preexpansion: bool
95
+ :param weighted: Boolean to weight analysis
96
+ :type weighted: bool
97
+ """
98
+
99
+ bootstrap_nboot: int = 0
100
+ bootstrap_sample: float = 0.8
101
+ bootstrap_CI: float = 0.95
102
+ bootstrap_CI_method: Literal["se", "percentile"] = "se"
103
+ cense_colname: Optional[str] = None
104
+ cense_denominator: Optional[str] = None
105
+ cense_numerator: Optional[str] = None
106
+ cense_eligible_colname: Optional[str] = None
107
+ compevent_colname: Optional[str] = None
108
+ covariates: Optional[str] = None
109
+ denominator: Optional[str] = None
110
+ excused: bool = False
111
+ excused_colnames: List[str] = field(default_factory=lambda: [])
112
+ followup_class: bool = False
113
+ followup_include: bool = True
114
+ followup_max: int = None
115
+ followup_min: int = 0
116
+ followup_spline: bool = False
117
+ hazard_estimate: bool = False
118
+ indicator_baseline: str = "_bas"
119
+ indicator_squared: str = "_sq"
120
+ km_curves: bool = False
121
+ ncores: int = multiprocessing.cpu_count()
122
+ numerator: Optional[str] = None
123
+ parallel: bool = False
124
+ plot_colors: List[str] = field(
125
+ default_factory=lambda: ["#F8766D", "#00BFC4", "#555555"]
126
+ )
127
+ plot_labels: List[str] = field(default_factory=lambda: [])
128
+ plot_title: str = None
129
+ plot_type: Literal["risk", "survival", "incidence"] = "risk"
130
+ seed: Optional[int] = None
131
+ selection_first_trial: bool = False
132
+ selection_sample: float = 0.8
133
+ selection_random: bool = False
134
+ subgroup_colname: str = None
135
+ treatment_level: List[int] = field(default_factory=lambda: [0, 1])
136
+ trial_include: bool = True
137
+ visit_colname: str = None
138
+ weight_eligible_colnames: List[str] = field(default_factory=lambda: [])
139
+ weight_min: float = 0.0
140
+ weight_max: float = None
141
+ weight_lag_condition: bool = True
142
+ weight_p99: bool = False
143
+ weight_preexpansion: bool = False
144
+ weighted: bool = False
145
+
146
+ def __post_init__(self):
147
+ bools = [
148
+ "excused",
149
+ "followup_class",
150
+ "followup_include",
151
+ "followup_spline",
152
+ "hazard_estimate",
153
+ "km_curves",
154
+ "parallel",
155
+ "selection_first_trial",
156
+ "selection_random",
157
+ "trial_include",
158
+ "weight_lag_condition",
159
+ "weight_p99",
160
+ "weight_preexpansion",
161
+ "weighted",
162
+ ]
163
+ for i in bools:
164
+ if not isinstance(getattr(self, i), bool):
165
+ raise TypeError(f"{i} must be a boolean value.")
166
+
167
+ if not isinstance(self.bootstrap_nboot, int) or self.bootstrap_nboot < 0:
168
+ raise ValueError("bootstrap_nboot must be a positive integer.")
169
+
170
+ if self.ncores < 1 or not isinstance(self.ncores, int):
171
+ raise ValueError("ncores must be a positive integer.")
172
+
173
+ if not (0.0 <= self.bootstrap_sample <= 1.0):
174
+ raise ValueError("bootstrap_sample must be between 0 and 1.")
175
+ if not (0.0 < self.bootstrap_CI < 1.0):
176
+ raise ValueError("bootstrap_CI must be between 0 and 1.")
177
+ if not (0.0 <= self.selection_sample <= 1.0):
178
+ raise ValueError("selection_sample must be between 0 and 1.")
179
+
180
+ if self.plot_type not in ["risk", "survival", "incidence"]:
181
+ raise ValueError(
182
+ "plot_type must be either 'risk', 'survival', or 'incidence'."
183
+ )
184
+
185
+ if self.bootstrap_CI_method not in ["se", "percentile"]:
186
+ raise ValueError("bootstrap_CI_method must be one of 'se' or 'percentile'")
187
+
188
+ for i in (
189
+ "covariates",
190
+ "numerator",
191
+ "denominator",
192
+ "cense_numerator",
193
+ "cense_denominator",
194
+ ):
195
+ attr = getattr(self, i)
196
+ if attr is not None and not isinstance(attr, list):
197
+ setattr(self, i, "".join(attr.split()))
@@ -0,0 +1,163 @@
1
+ import tempfile
2
+ from dataclasses import dataclass
3
+ from pathlib import Path
4
+ from typing import List, Literal, Optional
5
+
6
+ import matplotlib.figure
7
+ import polars as pl
8
+ from statsmodels.base.wrapper import ResultsWrapper
9
+
10
+ from .helpers import _build_md, _build_pdf
11
+ from .SEQopts import SEQopts
12
+
13
+
14
+ @dataclass
15
+ class SEQoutput:
16
+ """
17
+ Collector class for results from ``SEQuential``
18
+
19
+ :param options: Options used in the SEQuential process
20
+ :type options: SEQopts or None
21
+ :param method: Method of analysis ['ITT', 'dose-response', or 'censoring']
22
+ :type method: str
23
+ :param numerator_models: Numerator models, if applicable, from the weighting process
24
+ :type numerator_models: List[ResultsWrapper] or None
25
+ :param denominator_models: Denominator models, if applicable, from the weighting process
26
+ :type denominator_models: List[ResultsWrapper] or None
27
+ :param compevent_models: Competing event models, if applicable
28
+ :type compevent_models: List[ResultsWrapper] or None
29
+ :param weight_statistics: Weight statistics once returned back to the expanded dataset
30
+ :type weight_statistics: dict or None
31
+ :param hazard: Hazard ratio if applicable
32
+ :type hazard: pl.DataFrame or None
33
+ :param km_data: Dataframe of risk, survival, and incidence data if applicable at all followups
34
+ :type km_data: pl.DataFrame or None
35
+ :param km_graph: Figure of survival, risk, or incidence over followup times
36
+ :type km_graph: matplotlib.figure.Figure or None
37
+ :param risk_ratio: Dataframe of risk ratios, compared between treatments and subgroups
38
+ :type risk_ratio: pl.DataFrame or None
39
+ :param risk_difference: Dataframe of risk differences, compared between treatments and subgroups
40
+ :type risk_difference: pl.DataFrame or None
41
+ :param time: Timings for every step of the process completed thus far
42
+ :type time: dict or None
43
+ :param diagnostic_tables: Diagnostic tables for unique and nonunique outcome events and treatment switches
44
+ :type diagnostic_tables: dict or None
45
+ """
46
+
47
+ options: SEQopts = None
48
+ method: str = None
49
+ numerator_models: List[ResultsWrapper] = None
50
+ denominator_models: List[ResultsWrapper] = None
51
+ outcome_models: List[List[ResultsWrapper]] = None
52
+ compevent_models: List[List[ResultsWrapper]] = None
53
+ weight_statistics: pl.DataFrame = None
54
+ hazard: pl.DataFrame = None
55
+ km_data: pl.DataFrame = None
56
+ km_graph: matplotlib.figure.Figure = None
57
+ risk_ratio: pl.DataFrame = None
58
+ risk_difference: pl.DataFrame = None
59
+ time: dict = None
60
+ diagnostic_tables: dict = None
61
+
62
+ def plot(self) -> None:
63
+ """
64
+ Prints the kaplan-meier graph
65
+ """
66
+ print(self.km_graph)
67
+
68
+ def summary(
69
+ self, type=Optional[Literal["numerator", "denominator", "outcome", "compevent"]]
70
+ ) -> List:
71
+ """
72
+ Returns a list of model summaries of either the numerator, denominator, outcome, or competing event models
73
+ :param type: Indicator for which model list you would like returned
74
+ :type type: str
75
+ """
76
+ match type:
77
+ case "numerator":
78
+ models = self.numerator_models
79
+ case "denominator":
80
+ models = self.denominator_models
81
+ case "compevent":
82
+ models = self.compevent_models
83
+ case _:
84
+ models = self.outcome_models
85
+
86
+ return [model.summary() for model in models]
87
+
88
+ def retrieve_data(
89
+ self,
90
+ type=Optional[
91
+ Literal[
92
+ "km_data",
93
+ "hazard",
94
+ "risk_ratio",
95
+ "risk_difference",
96
+ "unique_outcomes",
97
+ "nonunique_outcomes",
98
+ "unique_switches",
99
+ "nonunique_switches",
100
+ ]
101
+ ],
102
+ ) -> pl.DataFrame:
103
+ """
104
+ Getter for data stored within ``SEQoutput``
105
+ :param type: Data which you would like to access, ['km_data', 'hazard', 'risk_ratio', 'risk_difference', 'unique_outcomes', 'nonunique_outcomes', 'unique_switches', 'nonunique_switches']
106
+ :type type: str
107
+ """
108
+ match type:
109
+ case "hazard":
110
+ data = self.hazard
111
+ case "risk_ratio":
112
+ data = self.risk_ratio
113
+ case "risk_difference":
114
+ data = self.risk_difference
115
+ case "unique_outcomes":
116
+ data = self.diagnostic_tables["unique_outcomes"]
117
+ case "nonunique_outcomes":
118
+ data = self.diagnostic_tables["nonunique_outcomes"]
119
+ case "unique_switches":
120
+ if self.diagnostic_tables.has_key("unique_switches"):
121
+ data = self.diagnostic_tables["unique_switches"]
122
+ else:
123
+ data = None
124
+ case "nonunique_switches":
125
+ if self.diagnostic_tables.has_key("nonunique_switches"):
126
+ data = self.diagnostic_tables["nonunique_switches"]
127
+ else:
128
+ data = None
129
+ case _:
130
+ data = self.km_data
131
+ if data is None:
132
+ raise ValueError("Data {type} was not created in the SEQuential process")
133
+ return data
134
+
135
+ def to_md(self, filename="SEQuential_results.md") -> None:
136
+ """Generates a markdown report of the SEQuential analysis results."""
137
+
138
+ img_path = None
139
+ if self.options.km_curves and self.km_graph is not None:
140
+ img_path = Path(filename).with_suffix(".png")
141
+ self.km_graph.savefig(img_path, dpi=300, bbox_inches="tight")
142
+ img_path = img_path.name
143
+
144
+ with open(filename, "w") as f:
145
+ f.write(_build_md(self, img_path))
146
+
147
+ print(f"Results saved to {filename}")
148
+
149
+ def to_pdf(self, filename="SEQuential_results.pdf") -> None:
150
+ """Generates a PDF report of the SEQuential analysis results."""
151
+ with tempfile.TemporaryDirectory() as tmpdir:
152
+ tmp_md = Path(tmpdir) / "report.md"
153
+ self.to_md(str(tmp_md))
154
+
155
+ with open(tmp_md, "r") as f:
156
+ md_content = f.read()
157
+
158
+ tmp_img = tmp_md.with_suffix(".png")
159
+ img_abs_path = str(tmp_img.absolute()) if tmp_img.exists() else None
160
+
161
+ _build_pdf(md_content, filename, img_abs_path)
162
+
163
+ print(f"Results saved to {filename}")