pySEQTarget 0.9.0__tar.gz → 0.10.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/PKG-INFO +16 -8
- {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/README.md +13 -7
- pyseqtarget-0.10.1/pySEQTarget/SEQopts.py +197 -0
- pyseqtarget-0.10.1/pySEQTarget/SEQoutput.py +163 -0
- {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/pySEQTarget/SEQuential.py +77 -17
- pyseqtarget-0.10.1/pySEQTarget/analysis/__init__.py +9 -0
- pyseqtarget-0.10.1/pySEQTarget/analysis/_hazard.py +211 -0
- pyseqtarget-0.10.1/pySEQTarget/analysis/_outcome_fit.py +75 -0
- pyseqtarget-0.10.1/pySEQTarget/analysis/_risk_estimates.py +136 -0
- pyseqtarget-0.10.1/pySEQTarget/analysis/_subgroup_fit.py +30 -0
- pyseqtarget-0.10.1/pySEQTarget/analysis/_survival_pred.py +380 -0
- pyseqtarget-0.10.1/pySEQTarget/error/__init__.py +2 -0
- pyseqtarget-0.10.1/pySEQTarget/error/_data_checker.py +38 -0
- pyseqtarget-0.10.1/pySEQTarget/error/_param_checker.py +50 -0
- pyseqtarget-0.10.1/pySEQTarget/expansion/__init__.py +5 -0
- pyseqtarget-0.10.1/pySEQTarget/expansion/_binder.py +98 -0
- pyseqtarget-0.10.1/pySEQTarget/expansion/_diagnostics.py +53 -0
- pyseqtarget-0.10.1/pySEQTarget/expansion/_dynamic.py +73 -0
- pyseqtarget-0.10.1/pySEQTarget/expansion/_mapper.py +44 -0
- pyseqtarget-0.10.1/pySEQTarget/expansion/_selection.py +44 -0
- pyseqtarget-0.10.1/pySEQTarget/helpers/__init__.py +8 -0
- pyseqtarget-0.10.1/pySEQTarget/helpers/_bootstrap.py +112 -0
- pyseqtarget-0.10.1/pySEQTarget/helpers/_col_string.py +6 -0
- pyseqtarget-0.10.1/pySEQTarget/helpers/_format_time.py +6 -0
- pyseqtarget-0.10.1/pySEQTarget/helpers/_output_files.py +167 -0
- pyseqtarget-0.10.1/pySEQTarget/helpers/_pad.py +7 -0
- pyseqtarget-0.10.1/pySEQTarget/helpers/_predict_model.py +9 -0
- pyseqtarget-0.10.1/pySEQTarget/helpers/_prepare_data.py +19 -0
- pyseqtarget-0.10.1/pySEQTarget/initialization/__init__.py +5 -0
- pyseqtarget-0.10.1/pySEQTarget/initialization/_censoring.py +53 -0
- pyseqtarget-0.10.1/pySEQTarget/initialization/_denominator.py +39 -0
- pyseqtarget-0.10.1/pySEQTarget/initialization/_numerator.py +37 -0
- pyseqtarget-0.10.1/pySEQTarget/initialization/_outcome.py +56 -0
- pyseqtarget-0.10.1/pySEQTarget/plot/__init__.py +1 -0
- pyseqtarget-0.10.1/pySEQTarget/plot/_survival_plot.py +104 -0
- pyseqtarget-0.10.1/pySEQTarget/weighting/__init__.py +8 -0
- pyseqtarget-0.10.1/pySEQTarget/weighting/_weight_bind.py +86 -0
- pyseqtarget-0.10.1/pySEQTarget/weighting/_weight_data.py +47 -0
- pyseqtarget-0.10.1/pySEQTarget/weighting/_weight_fit.py +99 -0
- pyseqtarget-0.10.1/pySEQTarget/weighting/_weight_pred.py +192 -0
- pyseqtarget-0.10.1/pySEQTarget/weighting/_weight_stats.py +23 -0
- {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/pySEQTarget.egg-info/PKG-INFO +16 -8
- pyseqtarget-0.10.1/pySEQTarget.egg-info/SOURCES.txt +56 -0
- {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/pyproject.toml +25 -5
- {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/tests/test_coefficients.py +109 -71
- {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/tests/test_parallel.py +15 -12
- pyseqtarget-0.9.0/pySEQTarget/SEQopts.py +0 -105
- pyseqtarget-0.9.0/pySEQTarget/SEQoutput.py +0 -86
- pyseqtarget-0.9.0/pySEQTarget.egg-info/SOURCES.txt +0 -20
- {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/LICENSE +0 -0
- {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/pySEQTarget/__init__.py +0 -0
- {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/pySEQTarget/data/__init__.py +0 -0
- {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/pySEQTarget.egg-info/dependency_links.txt +0 -0
- {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/pySEQTarget.egg-info/requires.txt +0 -0
- {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/pySEQTarget.egg-info/top_level.txt +0 -0
- {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/setup.cfg +0 -0
- {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/tests/test_accessor.py +0 -0
- {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/tests/test_covariates.py +0 -0
- {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/tests/test_followup_options.py +0 -0
- {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/tests/test_hazard.py +0 -0
- {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/tests/test_survival.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pySEQTarget
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.10.1
|
|
4
4
|
Summary: Sequentially Nested Target Trial Emulation
|
|
5
5
|
Author-email: Ryan O'Dea <ryan.odea@psi.ch>, Alejandro Szmulewicz <aszmulewicz@hsph.harvard.edu>, Tom Palmer <tom.palmer@bristol.ac.uk>, Miguel Hernan <mhernan@hsph.harvard.edu>
|
|
6
6
|
Maintainer-email: Ryan O'Dea <ryan.odea@psi.ch>
|
|
@@ -21,6 +21,8 @@ Classifier: Programming Language :: Python :: 3
|
|
|
21
21
|
Classifier: Programming Language :: Python :: 3.10
|
|
22
22
|
Classifier: Programming Language :: Python :: 3.11
|
|
23
23
|
Classifier: Programming Language :: Python :: 3.12
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
24
26
|
Requires-Python: >=3.10
|
|
25
27
|
Description-Content-Type: text/markdown
|
|
26
28
|
License-File: LICENSE
|
|
@@ -34,11 +36,16 @@ Requires-Dist: lifelines
|
|
|
34
36
|
Dynamic: license-file
|
|
35
37
|
|
|
36
38
|
# pySEQTarget - Sequentially Nested Target Trial Emulation
|
|
39
|
+
[](https://pypi.org/project/pySEQTarget)
|
|
40
|
+
[](https://pepy.tech/project/pySEQTarget)
|
|
41
|
+
[](https://codecov.io/gh/CausalInference/pySEQTarget)[](https://opensource.org/licenses/MIT)
|
|
42
|
+

|
|
43
|
+
[](https://pySEQTarget.readthedocs.io)
|
|
37
44
|
|
|
38
45
|
Implementation of sequential trial emulation for the analysis of
|
|
39
|
-
observational databases. The
|
|
46
|
+
observational databases. The `SEQTaRget` software accommodates
|
|
40
47
|
time-varying treatments and confounders, as well as binary and failure
|
|
41
|
-
time outcomes.
|
|
48
|
+
time outcomes. `SEQTaRget` allows to compare both static and dynamic
|
|
42
49
|
strategies, can be used to estimate observational analogs of
|
|
43
50
|
intention-to-treat and per-protocol effects, and can adjust for
|
|
44
51
|
potential selection bias.
|
|
@@ -61,8 +68,9 @@ From the user side, this amounts to creating a dataclass, `SEQopts`, and then fe
|
|
|
61
68
|
```python
|
|
62
69
|
import polars as pl
|
|
63
70
|
from pySEQTarget import SEQuential, SEQopts
|
|
71
|
+
from pySEQTarget.data import load_data
|
|
64
72
|
|
|
65
|
-
data =
|
|
73
|
+
data = load_data("SEQdata")
|
|
66
74
|
options = SEQopts(km_curves = True)
|
|
67
75
|
|
|
68
76
|
# Initiate the class
|
|
@@ -70,22 +78,22 @@ model = SEQuential(data,
|
|
|
70
78
|
id_col = "ID",
|
|
71
79
|
time_col = "time",
|
|
72
80
|
eligible_col = "eligible",
|
|
81
|
+
treatment_col = "tx_init",
|
|
82
|
+
outcome_col = "outcome",
|
|
73
83
|
time_varying_cols = ["N", "L", "P"],
|
|
74
84
|
fixed_cols = ["sex"],
|
|
75
85
|
method = "ITT",
|
|
76
|
-
|
|
86
|
+
parameters = options)
|
|
77
87
|
model.expand() # Construct the nested structure
|
|
78
88
|
model.bootstrap(bootstrap_nboot = 20) # Run 20 bootstrap samples
|
|
79
89
|
model.fit() # Fit the model
|
|
80
90
|
model.survival() # Create survival curves
|
|
81
91
|
model.plot() # Create and show a plot of the survival curves
|
|
82
92
|
model.collect() # Collection of important information
|
|
83
|
-
|
|
84
93
|
```
|
|
85
94
|
|
|
86
95
|
## Assumptions
|
|
87
96
|
There are several key assumptions in this package -
|
|
88
97
|
1. User provided `time_col` begins at 0 per unique `id_col`, we also assume this column contains only integers and continues by 1 for every time step, e.g. (0, 1, 2, 3, 4, ...) is allowed and (0, 1, 2, 2.5, ...) or (0, 1, 4, 5) are not
|
|
89
98
|
1. Provided `time_col` entries may be out of order at intake as a sort is enforced at expansion.
|
|
90
|
-
2. `eligible_col
|
|
91
|
-
|
|
99
|
+
2. `eligible_col` and elements of `excused_colnames` are once 1, only 1 (with respect to `time_col`) flag variables.
|
|
@@ -1,9 +1,14 @@
|
|
|
1
1
|
# pySEQTarget - Sequentially Nested Target Trial Emulation
|
|
2
|
+
[](https://pypi.org/project/pySEQTarget)
|
|
3
|
+
[](https://pepy.tech/project/pySEQTarget)
|
|
4
|
+
[](https://codecov.io/gh/CausalInference/pySEQTarget)[](https://opensource.org/licenses/MIT)
|
|
5
|
+

|
|
6
|
+
[](https://pySEQTarget.readthedocs.io)
|
|
2
7
|
|
|
3
8
|
Implementation of sequential trial emulation for the analysis of
|
|
4
|
-
observational databases. The
|
|
9
|
+
observational databases. The `SEQTaRget` software accommodates
|
|
5
10
|
time-varying treatments and confounders, as well as binary and failure
|
|
6
|
-
time outcomes.
|
|
11
|
+
time outcomes. `SEQTaRget` allows to compare both static and dynamic
|
|
7
12
|
strategies, can be used to estimate observational analogs of
|
|
8
13
|
intention-to-treat and per-protocol effects, and can adjust for
|
|
9
14
|
potential selection bias.
|
|
@@ -26,8 +31,9 @@ From the user side, this amounts to creating a dataclass, `SEQopts`, and then fe
|
|
|
26
31
|
```python
|
|
27
32
|
import polars as pl
|
|
28
33
|
from pySEQTarget import SEQuential, SEQopts
|
|
34
|
+
from pySEQTarget.data import load_data
|
|
29
35
|
|
|
30
|
-
data =
|
|
36
|
+
data = load_data("SEQdata")
|
|
31
37
|
options = SEQopts(km_curves = True)
|
|
32
38
|
|
|
33
39
|
# Initiate the class
|
|
@@ -35,22 +41,22 @@ model = SEQuential(data,
|
|
|
35
41
|
id_col = "ID",
|
|
36
42
|
time_col = "time",
|
|
37
43
|
eligible_col = "eligible",
|
|
44
|
+
treatment_col = "tx_init",
|
|
45
|
+
outcome_col = "outcome",
|
|
38
46
|
time_varying_cols = ["N", "L", "P"],
|
|
39
47
|
fixed_cols = ["sex"],
|
|
40
48
|
method = "ITT",
|
|
41
|
-
|
|
49
|
+
parameters = options)
|
|
42
50
|
model.expand() # Construct the nested structure
|
|
43
51
|
model.bootstrap(bootstrap_nboot = 20) # Run 20 bootstrap samples
|
|
44
52
|
model.fit() # Fit the model
|
|
45
53
|
model.survival() # Create survival curves
|
|
46
54
|
model.plot() # Create and show a plot of the survival curves
|
|
47
55
|
model.collect() # Collection of important information
|
|
48
|
-
|
|
49
56
|
```
|
|
50
57
|
|
|
51
58
|
## Assumptions
|
|
52
59
|
There are several key assumptions in this package -
|
|
53
60
|
1. User provided `time_col` begins at 0 per unique `id_col`, we also assume this column contains only integers and continues by 1 for every time step, e.g. (0, 1, 2, 3, 4, ...) is allowed and (0, 1, 2, 2.5, ...) or (0, 1, 4, 5) are not
|
|
54
61
|
1. Provided `time_col` entries may be out of order at intake as a sort is enforced at expansion.
|
|
55
|
-
2. `eligible_col
|
|
56
|
-
|
|
62
|
+
2. `eligible_col` and elements of `excused_colnames` are once 1, only 1 (with respect to `time_col`) flag variables.
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
import multiprocessing
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import List, Literal, Optional
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class SEQopts:
|
|
8
|
+
"""
|
|
9
|
+
Parameter builder for ``pySEQTarget.SEQuential`` analysis
|
|
10
|
+
|
|
11
|
+
:param bootstrap_nboot: Number of bootstraps to preform
|
|
12
|
+
:type bootstrap_nboot: int
|
|
13
|
+
:param bootstrap_sample: Subsampling proportion of ID-Trials gathered for each bootstrapping iteration
|
|
14
|
+
:type bootstrap_sample: float
|
|
15
|
+
:param bootstrap_CI: If bootstrapped, confidence interval level
|
|
16
|
+
:type bootstrap_CI: float
|
|
17
|
+
:param bootstrap_CI_method: If bootstrapped, confidence method generation method ['SE' or 'percentile']
|
|
18
|
+
:type bootstrap_CI_method: str
|
|
19
|
+
:param cense_colname: Column name for censoring effect (LTFU, etc.)
|
|
20
|
+
:type cense_colname: str
|
|
21
|
+
:param cense_denominator: Override to specify denominator patsy formula for censoring models
|
|
22
|
+
:type cense_denominator: Optional[str] or None
|
|
23
|
+
:param cense_numerator: Override to specify numerator patsy formula for censoring models
|
|
24
|
+
:type cense_numerator: Optional[str] or None
|
|
25
|
+
:param cense_eligible_colname: Column name to identify which rows are eligible for censoring model fitting
|
|
26
|
+
:type cense_eligible_colname: Optional[str] or None
|
|
27
|
+
:param compevent_colname: Column name specifying a competing event to the outcome
|
|
28
|
+
:type compevent_colname: str
|
|
29
|
+
:param covariates: Override to specify the outcome patsy formula for outcome model fitting
|
|
30
|
+
:type covariates: Optional[str] or None
|
|
31
|
+
:param denominator: Override to specify the outcome patsy formula for denominator model fitting
|
|
32
|
+
:type denominator: Optional[str] or None
|
|
33
|
+
:param excused: Boolean to allow excused conditions when method is censoring
|
|
34
|
+
:type excused: bool
|
|
35
|
+
:param excused_colnames: Column names (at the same length of treatment_level) specifying excused conditions
|
|
36
|
+
:type excused_colnames: List[str] or []
|
|
37
|
+
:param followup_class: Boolean to force followup values to be treated as classes
|
|
38
|
+
:type followup_class: bool
|
|
39
|
+
:param followup_include: Boolean to force regular followup values into model covariates
|
|
40
|
+
:type followup_include: bool
|
|
41
|
+
:param followup_spline: Boolean to force followup values to be fit to cubic spline
|
|
42
|
+
:type followup_spline: bool
|
|
43
|
+
:param followup_max: Maximum allowed followup in analysis
|
|
44
|
+
:type followup_max: int or None
|
|
45
|
+
:param followup_min: Minimum allowed followup in analysis
|
|
46
|
+
:type followup_min: int
|
|
47
|
+
:param hazard_estimate: Boolean to create hazard estimates
|
|
48
|
+
:type hazard_estimate: bool
|
|
49
|
+
:param indicator_baseline: How to indicate baseline columns in models
|
|
50
|
+
:type indicator_baseline: str
|
|
51
|
+
:param indicator_squared: How to indicate squared columns in models
|
|
52
|
+
:type indicator_baseline: str
|
|
53
|
+
:param km_curves: Boolean to create survival, risk, and incidence (if applicable) estimates
|
|
54
|
+
:type km_curves: bool
|
|
55
|
+
:param ncores: Number of cores to use if running in parallel
|
|
56
|
+
:type ncores: int
|
|
57
|
+
:param numerator: Override to specify the outcome patsy formula for numerator models
|
|
58
|
+
:type numerator: str
|
|
59
|
+
:param parallel: Boolean to run model fitting in parallel
|
|
60
|
+
:type parallel: bool
|
|
61
|
+
:param plot_colors: List of colors for KM plots, if applicable
|
|
62
|
+
:type plot_colors: List[str]
|
|
63
|
+
:param plot_labels: List of length treat_level to specify treatment labeling
|
|
64
|
+
:type plot_labels: List[str]
|
|
65
|
+
:param plot_title: Plot title
|
|
66
|
+
:type plot_title: str
|
|
67
|
+
:param plot_type: Type of plot to show ["risk", "survival" or "incidence" if compevent is specified]
|
|
68
|
+
:type plot_type: str
|
|
69
|
+
:param seed: RNG seed
|
|
70
|
+
:type seed: int
|
|
71
|
+
:param selection_first_trial: Boolean to only use first trial for analysis (similar to non-expanded)
|
|
72
|
+
:type selection_first_trial: bool
|
|
73
|
+
:param selection_sample: Subsampling proportion of ID-trials which did not initiate a treatment
|
|
74
|
+
:type selection_sample: float
|
|
75
|
+
:param selection_random: Boolean to randomly downsample ID-trials which did not initiate a treatment
|
|
76
|
+
:type selection_random: bool
|
|
77
|
+
:param subgroup_colname: Column name for subgroups to share the same weighting but different outcome model fits
|
|
78
|
+
:type subgroup_colname: str
|
|
79
|
+
:param treatment_level: List of eligible treatment levels within treatment_col
|
|
80
|
+
:type treatment_level: List[int]
|
|
81
|
+
:param trial_include: Boolean to force trial values into model covariates
|
|
82
|
+
:type trial_include: bool
|
|
83
|
+
:param weight_eligible_colnames: List of column names of length treatment_level to identify which rows are eligible for weight fitting
|
|
84
|
+
:type weight_eligible_colnames: List[str]
|
|
85
|
+
:param weight_min: Minimum weight
|
|
86
|
+
:type weight_min: float
|
|
87
|
+
:param weight_max: Maximum weight
|
|
88
|
+
:type weight_max: float or None
|
|
89
|
+
:param weight_lag_condition: Boolean to fit weights based on their treatment lag
|
|
90
|
+
:type weight_lag_condition: bool
|
|
91
|
+
:param weight_p99: Boolean to force weight min and max to be 1st and 99th percentile respectively
|
|
92
|
+
:type weight_p99: bool
|
|
93
|
+
:param weight_preexpansion: Boolean to fit weights on preexpanded data
|
|
94
|
+
:type weight_preexpansion: bool
|
|
95
|
+
:param weighted: Boolean to weight analysis
|
|
96
|
+
:type weighted: bool
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
bootstrap_nboot: int = 0
|
|
100
|
+
bootstrap_sample: float = 0.8
|
|
101
|
+
bootstrap_CI: float = 0.95
|
|
102
|
+
bootstrap_CI_method: Literal["se", "percentile"] = "se"
|
|
103
|
+
cense_colname: Optional[str] = None
|
|
104
|
+
cense_denominator: Optional[str] = None
|
|
105
|
+
cense_numerator: Optional[str] = None
|
|
106
|
+
cense_eligible_colname: Optional[str] = None
|
|
107
|
+
compevent_colname: Optional[str] = None
|
|
108
|
+
covariates: Optional[str] = None
|
|
109
|
+
denominator: Optional[str] = None
|
|
110
|
+
excused: bool = False
|
|
111
|
+
excused_colnames: List[str] = field(default_factory=lambda: [])
|
|
112
|
+
followup_class: bool = False
|
|
113
|
+
followup_include: bool = True
|
|
114
|
+
followup_max: int = None
|
|
115
|
+
followup_min: int = 0
|
|
116
|
+
followup_spline: bool = False
|
|
117
|
+
hazard_estimate: bool = False
|
|
118
|
+
indicator_baseline: str = "_bas"
|
|
119
|
+
indicator_squared: str = "_sq"
|
|
120
|
+
km_curves: bool = False
|
|
121
|
+
ncores: int = multiprocessing.cpu_count()
|
|
122
|
+
numerator: Optional[str] = None
|
|
123
|
+
parallel: bool = False
|
|
124
|
+
plot_colors: List[str] = field(
|
|
125
|
+
default_factory=lambda: ["#F8766D", "#00BFC4", "#555555"]
|
|
126
|
+
)
|
|
127
|
+
plot_labels: List[str] = field(default_factory=lambda: [])
|
|
128
|
+
plot_title: str = None
|
|
129
|
+
plot_type: Literal["risk", "survival", "incidence"] = "risk"
|
|
130
|
+
seed: Optional[int] = None
|
|
131
|
+
selection_first_trial: bool = False
|
|
132
|
+
selection_sample: float = 0.8
|
|
133
|
+
selection_random: bool = False
|
|
134
|
+
subgroup_colname: str = None
|
|
135
|
+
treatment_level: List[int] = field(default_factory=lambda: [0, 1])
|
|
136
|
+
trial_include: bool = True
|
|
137
|
+
visit_colname: str = None
|
|
138
|
+
weight_eligible_colnames: List[str] = field(default_factory=lambda: [])
|
|
139
|
+
weight_min: float = 0.0
|
|
140
|
+
weight_max: float = None
|
|
141
|
+
weight_lag_condition: bool = True
|
|
142
|
+
weight_p99: bool = False
|
|
143
|
+
weight_preexpansion: bool = False
|
|
144
|
+
weighted: bool = False
|
|
145
|
+
|
|
146
|
+
def __post_init__(self):
|
|
147
|
+
bools = [
|
|
148
|
+
"excused",
|
|
149
|
+
"followup_class",
|
|
150
|
+
"followup_include",
|
|
151
|
+
"followup_spline",
|
|
152
|
+
"hazard_estimate",
|
|
153
|
+
"km_curves",
|
|
154
|
+
"parallel",
|
|
155
|
+
"selection_first_trial",
|
|
156
|
+
"selection_random",
|
|
157
|
+
"trial_include",
|
|
158
|
+
"weight_lag_condition",
|
|
159
|
+
"weight_p99",
|
|
160
|
+
"weight_preexpansion",
|
|
161
|
+
"weighted",
|
|
162
|
+
]
|
|
163
|
+
for i in bools:
|
|
164
|
+
if not isinstance(getattr(self, i), bool):
|
|
165
|
+
raise TypeError(f"{i} must be a boolean value.")
|
|
166
|
+
|
|
167
|
+
if not isinstance(self.bootstrap_nboot, int) or self.bootstrap_nboot < 0:
|
|
168
|
+
raise ValueError("bootstrap_nboot must be a positive integer.")
|
|
169
|
+
|
|
170
|
+
if self.ncores < 1 or not isinstance(self.ncores, int):
|
|
171
|
+
raise ValueError("ncores must be a positive integer.")
|
|
172
|
+
|
|
173
|
+
if not (0.0 <= self.bootstrap_sample <= 1.0):
|
|
174
|
+
raise ValueError("bootstrap_sample must be between 0 and 1.")
|
|
175
|
+
if not (0.0 < self.bootstrap_CI < 1.0):
|
|
176
|
+
raise ValueError("bootstrap_CI must be between 0 and 1.")
|
|
177
|
+
if not (0.0 <= self.selection_sample <= 1.0):
|
|
178
|
+
raise ValueError("selection_sample must be between 0 and 1.")
|
|
179
|
+
|
|
180
|
+
if self.plot_type not in ["risk", "survival", "incidence"]:
|
|
181
|
+
raise ValueError(
|
|
182
|
+
"plot_type must be either 'risk', 'survival', or 'incidence'."
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
if self.bootstrap_CI_method not in ["se", "percentile"]:
|
|
186
|
+
raise ValueError("bootstrap_CI_method must be one of 'se' or 'percentile'")
|
|
187
|
+
|
|
188
|
+
for i in (
|
|
189
|
+
"covariates",
|
|
190
|
+
"numerator",
|
|
191
|
+
"denominator",
|
|
192
|
+
"cense_numerator",
|
|
193
|
+
"cense_denominator",
|
|
194
|
+
):
|
|
195
|
+
attr = getattr(self, i)
|
|
196
|
+
if attr is not None and not isinstance(attr, list):
|
|
197
|
+
setattr(self, i, "".join(attr.split()))
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import tempfile
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import List, Literal, Optional
|
|
5
|
+
|
|
6
|
+
import matplotlib.figure
|
|
7
|
+
import polars as pl
|
|
8
|
+
from statsmodels.base.wrapper import ResultsWrapper
|
|
9
|
+
|
|
10
|
+
from .helpers import _build_md, _build_pdf
|
|
11
|
+
from .SEQopts import SEQopts
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class SEQoutput:
|
|
16
|
+
"""
|
|
17
|
+
Collector class for results from ``SEQuential``
|
|
18
|
+
|
|
19
|
+
:param options: Options used in the SEQuential process
|
|
20
|
+
:type options: SEQopts or None
|
|
21
|
+
:param method: Method of analysis ['ITT', 'dose-response', or 'censoring']
|
|
22
|
+
:type method: str
|
|
23
|
+
:param numerator_models: Numerator models, if applicable, from the weighting process
|
|
24
|
+
:type numerator_models: List[ResultsWrapper] or None
|
|
25
|
+
:param denominator_models: Denominator models, if applicable, from the weighting process
|
|
26
|
+
:type denominator_models: List[ResultsWrapper] or None
|
|
27
|
+
:param compevent_models: Competing event models, if applicable
|
|
28
|
+
:type compevent_models: List[ResultsWrapper] or None
|
|
29
|
+
:param weight_statistics: Weight statistics once returned back to the expanded dataset
|
|
30
|
+
:type weight_statistics: dict or None
|
|
31
|
+
:param hazard: Hazard ratio if applicable
|
|
32
|
+
:type hazard: pl.DataFrame or None
|
|
33
|
+
:param km_data: Dataframe of risk, survival, and incidence data if applicable at all followups
|
|
34
|
+
:type km_data: pl.DataFrame or None
|
|
35
|
+
:param km_graph: Figure of survival, risk, or incidence over followup times
|
|
36
|
+
:type km_graph: matplotlib.figure.Figure or None
|
|
37
|
+
:param risk_ratio: Dataframe of risk ratios, compared between treatments and subgroups
|
|
38
|
+
:type risk_ratio: pl.DataFrame or None
|
|
39
|
+
:param risk_difference: Dataframe of risk differences, compared between treatments and subgroups
|
|
40
|
+
:type risk_difference: pl.DataFrame or None
|
|
41
|
+
:param time: Timings for every step of the process completed thus far
|
|
42
|
+
:type time: dict or None
|
|
43
|
+
:param diagnostic_tables: Diagnostic tables for unique and nonunique outcome events and treatment switches
|
|
44
|
+
:type diagnostic_tables: dict or None
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
options: SEQopts = None
|
|
48
|
+
method: str = None
|
|
49
|
+
numerator_models: List[ResultsWrapper] = None
|
|
50
|
+
denominator_models: List[ResultsWrapper] = None
|
|
51
|
+
outcome_models: List[List[ResultsWrapper]] = None
|
|
52
|
+
compevent_models: List[List[ResultsWrapper]] = None
|
|
53
|
+
weight_statistics: pl.DataFrame = None
|
|
54
|
+
hazard: pl.DataFrame = None
|
|
55
|
+
km_data: pl.DataFrame = None
|
|
56
|
+
km_graph: matplotlib.figure.Figure = None
|
|
57
|
+
risk_ratio: pl.DataFrame = None
|
|
58
|
+
risk_difference: pl.DataFrame = None
|
|
59
|
+
time: dict = None
|
|
60
|
+
diagnostic_tables: dict = None
|
|
61
|
+
|
|
62
|
+
def plot(self) -> None:
|
|
63
|
+
"""
|
|
64
|
+
Prints the kaplan-meier graph
|
|
65
|
+
"""
|
|
66
|
+
print(self.km_graph)
|
|
67
|
+
|
|
68
|
+
def summary(
|
|
69
|
+
self, type=Optional[Literal["numerator", "denominator", "outcome", "compevent"]]
|
|
70
|
+
) -> List:
|
|
71
|
+
"""
|
|
72
|
+
Returns a list of model summaries of either the numerator, denominator, outcome, or competing event models
|
|
73
|
+
:param type: Indicator for which model list you would like returned
|
|
74
|
+
:type type: str
|
|
75
|
+
"""
|
|
76
|
+
match type:
|
|
77
|
+
case "numerator":
|
|
78
|
+
models = self.numerator_models
|
|
79
|
+
case "denominator":
|
|
80
|
+
models = self.denominator_models
|
|
81
|
+
case "compevent":
|
|
82
|
+
models = self.compevent_models
|
|
83
|
+
case _:
|
|
84
|
+
models = self.outcome_models
|
|
85
|
+
|
|
86
|
+
return [model.summary() for model in models]
|
|
87
|
+
|
|
88
|
+
def retrieve_data(
|
|
89
|
+
self,
|
|
90
|
+
type=Optional[
|
|
91
|
+
Literal[
|
|
92
|
+
"km_data",
|
|
93
|
+
"hazard",
|
|
94
|
+
"risk_ratio",
|
|
95
|
+
"risk_difference",
|
|
96
|
+
"unique_outcomes",
|
|
97
|
+
"nonunique_outcomes",
|
|
98
|
+
"unique_switches",
|
|
99
|
+
"nonunique_switches",
|
|
100
|
+
]
|
|
101
|
+
],
|
|
102
|
+
) -> pl.DataFrame:
|
|
103
|
+
"""
|
|
104
|
+
Getter for data stored within ``SEQoutput``
|
|
105
|
+
:param type: Data which you would like to access, ['km_data', 'hazard', 'risk_ratio', 'risk_difference', 'unique_outcomes', 'nonunique_outcomes', 'unique_switches', 'nonunique_switches']
|
|
106
|
+
:type type: str
|
|
107
|
+
"""
|
|
108
|
+
match type:
|
|
109
|
+
case "hazard":
|
|
110
|
+
data = self.hazard
|
|
111
|
+
case "risk_ratio":
|
|
112
|
+
data = self.risk_ratio
|
|
113
|
+
case "risk_difference":
|
|
114
|
+
data = self.risk_difference
|
|
115
|
+
case "unique_outcomes":
|
|
116
|
+
data = self.diagnostic_tables["unique_outcomes"]
|
|
117
|
+
case "nonunique_outcomes":
|
|
118
|
+
data = self.diagnostic_tables["nonunique_outcomes"]
|
|
119
|
+
case "unique_switches":
|
|
120
|
+
if self.diagnostic_tables.has_key("unique_switches"):
|
|
121
|
+
data = self.diagnostic_tables["unique_switches"]
|
|
122
|
+
else:
|
|
123
|
+
data = None
|
|
124
|
+
case "nonunique_switches":
|
|
125
|
+
if self.diagnostic_tables.has_key("nonunique_switches"):
|
|
126
|
+
data = self.diagnostic_tables["nonunique_switches"]
|
|
127
|
+
else:
|
|
128
|
+
data = None
|
|
129
|
+
case _:
|
|
130
|
+
data = self.km_data
|
|
131
|
+
if data is None:
|
|
132
|
+
raise ValueError("Data {type} was not created in the SEQuential process")
|
|
133
|
+
return data
|
|
134
|
+
|
|
135
|
+
def to_md(self, filename="SEQuential_results.md") -> None:
|
|
136
|
+
"""Generates a markdown report of the SEQuential analysis results."""
|
|
137
|
+
|
|
138
|
+
img_path = None
|
|
139
|
+
if self.options.km_curves and self.km_graph is not None:
|
|
140
|
+
img_path = Path(filename).with_suffix(".png")
|
|
141
|
+
self.km_graph.savefig(img_path, dpi=300, bbox_inches="tight")
|
|
142
|
+
img_path = img_path.name
|
|
143
|
+
|
|
144
|
+
with open(filename, "w") as f:
|
|
145
|
+
f.write(_build_md(self, img_path))
|
|
146
|
+
|
|
147
|
+
print(f"Results saved to {filename}")
|
|
148
|
+
|
|
149
|
+
def to_pdf(self, filename="SEQuential_results.pdf") -> None:
|
|
150
|
+
"""Generates a PDF report of the SEQuential analysis results."""
|
|
151
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
152
|
+
tmp_md = Path(tmpdir) / "report.md"
|
|
153
|
+
self.to_md(str(tmp_md))
|
|
154
|
+
|
|
155
|
+
with open(tmp_md, "r") as f:
|
|
156
|
+
md_content = f.read()
|
|
157
|
+
|
|
158
|
+
tmp_img = tmp_md.with_suffix(".png")
|
|
159
|
+
img_abs_path = str(tmp_img.absolute()) if tmp_img.exists() else None
|
|
160
|
+
|
|
161
|
+
_build_pdf(md_content, filename, img_abs_path)
|
|
162
|
+
|
|
163
|
+
print(f"Results saved to {filename}")
|
|
@@ -7,9 +7,10 @@ from typing import List, Literal, Optional
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
import polars as pl
|
|
9
9
|
|
|
10
|
-
from .analysis import (_calculate_hazard, _calculate_survival,
|
|
11
|
-
_pred_risk, _risk_estimates,
|
|
12
|
-
|
|
10
|
+
from .analysis import (_calculate_hazard, _calculate_survival, _clamp,
|
|
11
|
+
_outcome_fit, _pred_risk, _risk_estimates,
|
|
12
|
+
_subgroup_fit)
|
|
13
|
+
from .error import _data_checker, _param_checker
|
|
13
14
|
from .expansion import _binder, _diagnostics, _dynamic, _random_selection
|
|
14
15
|
from .helpers import _col_string, _format_time, bootstrap_loop
|
|
15
16
|
from .initialization import (_cense_denominator, _cense_numerator,
|
|
@@ -18,11 +19,36 @@ from .plot import _survival_plot
|
|
|
18
19
|
from .SEQopts import SEQopts
|
|
19
20
|
from .SEQoutput import SEQoutput
|
|
20
21
|
from .weighting import (_fit_denominator, _fit_LTFU, _fit_numerator,
|
|
21
|
-
_weight_bind, _weight_predict,
|
|
22
|
-
_weight_stats)
|
|
22
|
+
_fit_visit, _weight_bind, _weight_predict,
|
|
23
|
+
_weight_setup, _weight_stats)
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
class SEQuential:
|
|
27
|
+
"""
|
|
28
|
+
Primary class initializer for SEQuentially nested target trial emulation
|
|
29
|
+
|
|
30
|
+
:param data: Data for analysis
|
|
31
|
+
:type data: pl.DataFrame
|
|
32
|
+
:param id_col: Column name for unique patient IDs
|
|
33
|
+
:type id_col: str
|
|
34
|
+
:param time_col: Column name for observational time points
|
|
35
|
+
:type time_col: str
|
|
36
|
+
:param eligible_col: Column name for analytical eligibility
|
|
37
|
+
:type eligible_col: str
|
|
38
|
+
:param treatment_col: Column name specifying treatment per time_col
|
|
39
|
+
:type treatment_col: str
|
|
40
|
+
:param outcome_col: Column name specifying outcome per time_col
|
|
41
|
+
:type outcome_col: str
|
|
42
|
+
:param time_varying_cols: Time-varying column names as covariates (BMI, Age, etc.)
|
|
43
|
+
:type time_varying_cols: Optional[List[str]] or None
|
|
44
|
+
:param fixed_cols: Fixed column names as covariates (Sex, YOB, etc.)
|
|
45
|
+
:type fixed_cols: Optional[List[str]] or None
|
|
46
|
+
:param method: Method for analysis ['ITT', 'dose-response', or 'censoring']
|
|
47
|
+
:type method: str
|
|
48
|
+
:param parameters: Parameters to augment analysis, specified with ``pySEQTarget.SEQopts``
|
|
49
|
+
:type parameters: Optional[SEQopts] or None
|
|
50
|
+
"""
|
|
51
|
+
|
|
26
52
|
def __init__(
|
|
27
53
|
self,
|
|
28
54
|
data: pl.DataFrame,
|
|
@@ -68,7 +94,7 @@ class SEQuential:
|
|
|
68
94
|
if self.denominator is None:
|
|
69
95
|
self.denominator = _denominator(self)
|
|
70
96
|
|
|
71
|
-
if self.cense_colname is not None:
|
|
97
|
+
if self.cense_colname is not None or self.visit_colname is not None:
|
|
72
98
|
if self.cense_numerator is None:
|
|
73
99
|
self.cense_numerator = _cense_numerator(self)
|
|
74
100
|
|
|
@@ -76,14 +102,18 @@ class SEQuential:
|
|
|
76
102
|
self.cense_denominator = _cense_denominator(self)
|
|
77
103
|
|
|
78
104
|
_param_checker(self)
|
|
79
|
-
|
|
105
|
+
_data_checker(self)
|
|
80
106
|
|
|
81
|
-
def expand(self):
|
|
107
|
+
def expand(self) -> None:
|
|
108
|
+
"""
|
|
109
|
+
Creates the sequentially nested, emulated target trial structure
|
|
110
|
+
"""
|
|
82
111
|
start = time.perf_counter()
|
|
83
112
|
kept = [
|
|
84
113
|
self.cense_colname,
|
|
85
114
|
self.cense_eligible_colname,
|
|
86
115
|
self.compevent_colname,
|
|
116
|
+
self.visit_colname,
|
|
87
117
|
*self.weight_eligible_colnames,
|
|
88
118
|
*self.excused_colnames,
|
|
89
119
|
]
|
|
@@ -136,7 +166,10 @@ class SEQuential:
|
|
|
136
166
|
end = time.perf_counter()
|
|
137
167
|
self._expansion_time = _format_time(start, end)
|
|
138
168
|
|
|
139
|
-
def bootstrap(self, **kwargs):
|
|
169
|
+
def bootstrap(self, **kwargs) -> None:
|
|
170
|
+
"""
|
|
171
|
+
Internally sets up bootstrapping - creating a list of IDs to use per iteration
|
|
172
|
+
"""
|
|
140
173
|
allowed = {
|
|
141
174
|
"bootstrap_nboot",
|
|
142
175
|
"bootstrap_sample",
|
|
@@ -148,7 +181,6 @@ class SEQuential:
|
|
|
148
181
|
setattr(self, key, value)
|
|
149
182
|
else:
|
|
150
183
|
raise ValueError(f"Unknown argument: {key}")
|
|
151
|
-
|
|
152
184
|
UIDs = self.DT.select(pl.col(self.id_col)).unique().to_series().to_list()
|
|
153
185
|
NIDs = len(UIDs)
|
|
154
186
|
|
|
@@ -159,10 +191,12 @@ class SEQuential:
|
|
|
159
191
|
)
|
|
160
192
|
id_counts = Counter(sampled_IDs)
|
|
161
193
|
self._boot_samples.append(id_counts)
|
|
162
|
-
return self
|
|
163
194
|
|
|
164
195
|
@bootstrap_loop
|
|
165
|
-
def fit(self):
|
|
196
|
+
def fit(self) -> None:
|
|
197
|
+
"""
|
|
198
|
+
Fits weight models (numerator, denominator, censoring) and outcome models (outcome, competing event)
|
|
199
|
+
"""
|
|
166
200
|
if self.bootstrap_nboot > 0 and not hasattr(self, "_boot_samples"):
|
|
167
201
|
raise ValueError(
|
|
168
202
|
"Bootstrap sampling not found. Please run the 'bootstrap' method before fitting with bootstrapping."
|
|
@@ -179,6 +213,7 @@ class SEQuential:
|
|
|
179
213
|
WDT[col] = WDT[col].astype("category")
|
|
180
214
|
|
|
181
215
|
_fit_LTFU(self, WDT)
|
|
216
|
+
_fit_visit(self, WDT)
|
|
182
217
|
_fit_numerator(self, WDT)
|
|
183
218
|
_fit_denominator(self, WDT)
|
|
184
219
|
|
|
@@ -211,7 +246,17 @@ class SEQuential:
|
|
|
211
246
|
)
|
|
212
247
|
return models
|
|
213
248
|
|
|
214
|
-
def survival(self):
|
|
249
|
+
def survival(self, **kwargs) -> None:
|
|
250
|
+
"""
|
|
251
|
+
Uses fit outcome models (outcome, competing event) to estimate risk, survival, and incidence curves
|
|
252
|
+
"""
|
|
253
|
+
allowed = {"bootstrap_CI", "bootstrap_CI_method"}
|
|
254
|
+
for key, val in kwargs.items():
|
|
255
|
+
if key in allowed:
|
|
256
|
+
setattr(self, key, val)
|
|
257
|
+
else:
|
|
258
|
+
raise ValueError(f"Unknown or misplaced arugment: {key}")
|
|
259
|
+
|
|
215
260
|
if not hasattr(self, "outcome_model") or not self.outcome_model:
|
|
216
261
|
raise ValueError(
|
|
217
262
|
"Outcome model not found. Please run the 'fit' method before calculating survival."
|
|
@@ -221,13 +266,16 @@ class SEQuential:
|
|
|
221
266
|
|
|
222
267
|
risk_data = _pred_risk(self)
|
|
223
268
|
surv_data = _calculate_survival(self, risk_data)
|
|
224
|
-
self.km_data = pl.concat([risk_data, surv_data])
|
|
269
|
+
self.km_data = _clamp(pl.concat([risk_data, surv_data]))
|
|
225
270
|
self.risk_estimates = _risk_estimates(self)
|
|
226
271
|
|
|
227
272
|
end = time.perf_counter()
|
|
228
273
|
self._survival_time = _format_time(start, end)
|
|
229
274
|
|
|
230
|
-
def hazard(self):
|
|
275
|
+
def hazard(self) -> None:
|
|
276
|
+
"""
|
|
277
|
+
Uses fit outcome models (outcome, competing event) to estimate hazard ratios
|
|
278
|
+
"""
|
|
231
279
|
start = time.perf_counter()
|
|
232
280
|
|
|
233
281
|
if not hasattr(self, "outcome_model") or not self.outcome_model:
|
|
@@ -239,10 +287,22 @@ class SEQuential:
|
|
|
239
287
|
end = time.perf_counter()
|
|
240
288
|
self._hazard_time = _format_time(start, end)
|
|
241
289
|
|
|
242
|
-
def plot(self):
|
|
290
|
+
def plot(self, **kwargs) -> None:
|
|
291
|
+
"""
|
|
292
|
+
Shows a plot specific to plot_type
|
|
293
|
+
"""
|
|
294
|
+
allowed = {"plot_type", "plot_colors", "plot_title", "plot_labels"}
|
|
295
|
+
for key, val in kwargs.items():
|
|
296
|
+
if key in allowed:
|
|
297
|
+
setattr(self, key, val)
|
|
298
|
+
else:
|
|
299
|
+
raise ValueError(f"Unknown or misplaced arugment: {key}")
|
|
243
300
|
self.km_graph = _survival_plot(self)
|
|
244
301
|
|
|
245
|
-
def collect(self):
|
|
302
|
+
def collect(self) -> SEQoutput:
|
|
303
|
+
"""
|
|
304
|
+
Collects all results current created into ``SEQoutput`` class
|
|
305
|
+
"""
|
|
246
306
|
self._time_collected = datetime.datetime.now()
|
|
247
307
|
|
|
248
308
|
generated = [
|