pySEQTarget 0.9.0__tar.gz → 0.10.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/PKG-INFO +16 -8
  2. {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/README.md +13 -7
  3. pyseqtarget-0.10.1/pySEQTarget/SEQopts.py +197 -0
  4. pyseqtarget-0.10.1/pySEQTarget/SEQoutput.py +163 -0
  5. {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/pySEQTarget/SEQuential.py +77 -17
  6. pyseqtarget-0.10.1/pySEQTarget/analysis/__init__.py +9 -0
  7. pyseqtarget-0.10.1/pySEQTarget/analysis/_hazard.py +211 -0
  8. pyseqtarget-0.10.1/pySEQTarget/analysis/_outcome_fit.py +75 -0
  9. pyseqtarget-0.10.1/pySEQTarget/analysis/_risk_estimates.py +136 -0
  10. pyseqtarget-0.10.1/pySEQTarget/analysis/_subgroup_fit.py +30 -0
  11. pyseqtarget-0.10.1/pySEQTarget/analysis/_survival_pred.py +380 -0
  12. pyseqtarget-0.10.1/pySEQTarget/error/__init__.py +2 -0
  13. pyseqtarget-0.10.1/pySEQTarget/error/_data_checker.py +38 -0
  14. pyseqtarget-0.10.1/pySEQTarget/error/_param_checker.py +50 -0
  15. pyseqtarget-0.10.1/pySEQTarget/expansion/__init__.py +5 -0
  16. pyseqtarget-0.10.1/pySEQTarget/expansion/_binder.py +98 -0
  17. pyseqtarget-0.10.1/pySEQTarget/expansion/_diagnostics.py +53 -0
  18. pyseqtarget-0.10.1/pySEQTarget/expansion/_dynamic.py +73 -0
  19. pyseqtarget-0.10.1/pySEQTarget/expansion/_mapper.py +44 -0
  20. pyseqtarget-0.10.1/pySEQTarget/expansion/_selection.py +44 -0
  21. pyseqtarget-0.10.1/pySEQTarget/helpers/__init__.py +8 -0
  22. pyseqtarget-0.10.1/pySEQTarget/helpers/_bootstrap.py +112 -0
  23. pyseqtarget-0.10.1/pySEQTarget/helpers/_col_string.py +6 -0
  24. pyseqtarget-0.10.1/pySEQTarget/helpers/_format_time.py +6 -0
  25. pyseqtarget-0.10.1/pySEQTarget/helpers/_output_files.py +167 -0
  26. pyseqtarget-0.10.1/pySEQTarget/helpers/_pad.py +7 -0
  27. pyseqtarget-0.10.1/pySEQTarget/helpers/_predict_model.py +9 -0
  28. pyseqtarget-0.10.1/pySEQTarget/helpers/_prepare_data.py +19 -0
  29. pyseqtarget-0.10.1/pySEQTarget/initialization/__init__.py +5 -0
  30. pyseqtarget-0.10.1/pySEQTarget/initialization/_censoring.py +53 -0
  31. pyseqtarget-0.10.1/pySEQTarget/initialization/_denominator.py +39 -0
  32. pyseqtarget-0.10.1/pySEQTarget/initialization/_numerator.py +37 -0
  33. pyseqtarget-0.10.1/pySEQTarget/initialization/_outcome.py +56 -0
  34. pyseqtarget-0.10.1/pySEQTarget/plot/__init__.py +1 -0
  35. pyseqtarget-0.10.1/pySEQTarget/plot/_survival_plot.py +104 -0
  36. pyseqtarget-0.10.1/pySEQTarget/weighting/__init__.py +8 -0
  37. pyseqtarget-0.10.1/pySEQTarget/weighting/_weight_bind.py +86 -0
  38. pyseqtarget-0.10.1/pySEQTarget/weighting/_weight_data.py +47 -0
  39. pyseqtarget-0.10.1/pySEQTarget/weighting/_weight_fit.py +99 -0
  40. pyseqtarget-0.10.1/pySEQTarget/weighting/_weight_pred.py +192 -0
  41. pyseqtarget-0.10.1/pySEQTarget/weighting/_weight_stats.py +23 -0
  42. {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/pySEQTarget.egg-info/PKG-INFO +16 -8
  43. pyseqtarget-0.10.1/pySEQTarget.egg-info/SOURCES.txt +56 -0
  44. {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/pyproject.toml +25 -5
  45. {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/tests/test_coefficients.py +109 -71
  46. {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/tests/test_parallel.py +15 -12
  47. pyseqtarget-0.9.0/pySEQTarget/SEQopts.py +0 -105
  48. pyseqtarget-0.9.0/pySEQTarget/SEQoutput.py +0 -86
  49. pyseqtarget-0.9.0/pySEQTarget.egg-info/SOURCES.txt +0 -20
  50. {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/LICENSE +0 -0
  51. {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/pySEQTarget/__init__.py +0 -0
  52. {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/pySEQTarget/data/__init__.py +0 -0
  53. {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/pySEQTarget.egg-info/dependency_links.txt +0 -0
  54. {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/pySEQTarget.egg-info/requires.txt +0 -0
  55. {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/pySEQTarget.egg-info/top_level.txt +0 -0
  56. {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/setup.cfg +0 -0
  57. {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/tests/test_accessor.py +0 -0
  58. {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/tests/test_covariates.py +0 -0
  59. {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/tests/test_followup_options.py +0 -0
  60. {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/tests/test_hazard.py +0 -0
  61. {pyseqtarget-0.9.0 → pyseqtarget-0.10.1}/tests/test_survival.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pySEQTarget
3
- Version: 0.9.0
3
+ Version: 0.10.1
4
4
  Summary: Sequentially Nested Target Trial Emulation
5
5
  Author-email: Ryan O'Dea <ryan.odea@psi.ch>, Alejandro Szmulewicz <aszmulewicz@hsph.harvard.edu>, Tom Palmer <tom.palmer@bristol.ac.uk>, Miguel Hernan <mhernan@hsph.harvard.edu>
6
6
  Maintainer-email: Ryan O'Dea <ryan.odea@psi.ch>
@@ -21,6 +21,8 @@ Classifier: Programming Language :: Python :: 3
21
21
  Classifier: Programming Language :: Python :: 3.10
22
22
  Classifier: Programming Language :: Python :: 3.11
23
23
  Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Programming Language :: Python :: 3.13
25
+ Classifier: Programming Language :: Python :: 3.14
24
26
  Requires-Python: >=3.10
25
27
  Description-Content-Type: text/markdown
26
28
  License-File: LICENSE
@@ -34,11 +36,16 @@ Requires-Dist: lifelines
34
36
  Dynamic: license-file
35
37
 
36
38
  # pySEQTarget - Sequentially Nested Target Trial Emulation
39
+ [![PyPI version](https://badge.fury.io/py/pySEQTarget.svg)](https://pypi.org/project/pySEQTarget)
40
+ [![Downloads](https://static.pepy.tech/badge/pySEQTarget)](https://pepy.tech/project/pySEQTarget)
41
+ [![codecov](https://codecov.io/gh/CausalInference/pySEQTarget/graph/badge.svg?token=DMOVJJUWXP)](https://codecov.io/gh/CausalInference/pySEQTarget)[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
42
+ ![versions](https://img.shields.io/pypi/pyversions/pySEQTarget.svg)
43
+ [![Documentation Status](https://readthedocs.org/projects/pySEQTarget/badge/?version=latest)](https://pySEQTarget.readthedocs.io)
37
44
 
38
45
  Implementation of sequential trial emulation for the analysis of
39
- observational databases. The SEQTaRget software accommodates
46
+ observational databases. The `SEQTaRget` software accommodates
40
47
  time-varying treatments and confounders, as well as binary and failure
41
- time outcomes. SEQTaRget allows to compare both static and dynamic
48
+ time outcomes. `SEQTaRget` allows to compare both static and dynamic
42
49
  strategies, can be used to estimate observational analogs of
43
50
  intention-to-treat and per-protocol effects, and can adjust for
44
51
  potential selection bias.
@@ -61,8 +68,9 @@ From the user side, this amounts to creating a dataclass, `SEQopts`, and then fe
61
68
  ```python
62
69
  import polars as pl
63
70
  from pySEQTarget import SEQuential, SEQopts
71
+ from pySEQTarget.data import load_data
64
72
 
65
- data = pl.from_pandas(SEQdata)
73
+ data = load_data("SEQdata")
66
74
  options = SEQopts(km_curves = True)
67
75
 
68
76
  # Initiate the class
@@ -70,22 +78,22 @@ model = SEQuential(data,
70
78
  id_col = "ID",
71
79
  time_col = "time",
72
80
  eligible_col = "eligible",
81
+ treatment_col = "tx_init",
82
+ outcome_col = "outcome",
73
83
  time_varying_cols = ["N", "L", "P"],
74
84
  fixed_cols = ["sex"],
75
85
  method = "ITT",
76
- options = options)
86
+ parameters = options)
77
87
  model.expand() # Construct the nested structure
78
88
  model.bootstrap(bootstrap_nboot = 20) # Run 20 bootstrap samples
79
89
  model.fit() # Fit the model
80
90
  model.survival() # Create survival curves
81
91
  model.plot() # Create and show a plot of the survival curves
82
92
  model.collect() # Collection of important information
83
-
84
93
  ```
85
94
 
86
95
  ## Assumptions
87
96
  There are several key assumptions in this package -
88
97
  1. User provided `time_col` begins at 0 per unique `id_col`, we also assume this column contains only integers and continues by 1 for every time step, e.g. (0, 1, 2, 3, 4, ...) is allowed and (0, 1, 2, 2.5, ...) or (0, 1, 4, 5) are not
89
98
  1. Provided `time_col` entries may be out of order at intake as a sort is enforced at expansion.
90
- 2. `eligible_col`, `excused_column_names` and [TODO] are once 1, only 1 (with respect to `time_col`) flag variables.
91
-
99
+ 2. `eligible_col` and elements of `excused_colnames` are once 1, only 1 (with respect to `time_col`) flag variables.
@@ -1,9 +1,14 @@
1
1
  # pySEQTarget - Sequentially Nested Target Trial Emulation
2
+ [![PyPI version](https://badge.fury.io/py/pySEQTarget.svg)](https://pypi.org/project/pySEQTarget)
3
+ [![Downloads](https://static.pepy.tech/badge/pySEQTarget)](https://pepy.tech/project/pySEQTarget)
4
+ [![codecov](https://codecov.io/gh/CausalInference/pySEQTarget/graph/badge.svg?token=DMOVJJUWXP)](https://codecov.io/gh/CausalInference/pySEQTarget)[![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
5
+ ![versions](https://img.shields.io/pypi/pyversions/pySEQTarget.svg)
6
+ [![Documentation Status](https://readthedocs.org/projects/pySEQTarget/badge/?version=latest)](https://pySEQTarget.readthedocs.io)
2
7
 
3
8
  Implementation of sequential trial emulation for the analysis of
4
- observational databases. The SEQTaRget software accommodates
9
+ observational databases. The `SEQTaRget` software accommodates
5
10
  time-varying treatments and confounders, as well as binary and failure
6
- time outcomes. SEQTaRget allows to compare both static and dynamic
11
+ time outcomes. `SEQTaRget` allows to compare both static and dynamic
7
12
  strategies, can be used to estimate observational analogs of
8
13
  intention-to-treat and per-protocol effects, and can adjust for
9
14
  potential selection bias.
@@ -26,8 +31,9 @@ From the user side, this amounts to creating a dataclass, `SEQopts`, and then fe
26
31
  ```python
27
32
  import polars as pl
28
33
  from pySEQTarget import SEQuential, SEQopts
34
+ from pySEQTarget.data import load_data
29
35
 
30
- data = pl.from_pandas(SEQdata)
36
+ data = load_data("SEQdata")
31
37
  options = SEQopts(km_curves = True)
32
38
 
33
39
  # Initiate the class
@@ -35,22 +41,22 @@ model = SEQuential(data,
35
41
  id_col = "ID",
36
42
  time_col = "time",
37
43
  eligible_col = "eligible",
44
+ treatment_col = "tx_init",
45
+ outcome_col = "outcome",
38
46
  time_varying_cols = ["N", "L", "P"],
39
47
  fixed_cols = ["sex"],
40
48
  method = "ITT",
41
- options = options)
49
+ parameters = options)
42
50
  model.expand() # Construct the nested structure
43
51
  model.bootstrap(bootstrap_nboot = 20) # Run 20 bootstrap samples
44
52
  model.fit() # Fit the model
45
53
  model.survival() # Create survival curves
46
54
  model.plot() # Create and show a plot of the survival curves
47
55
  model.collect() # Collection of important information
48
-
49
56
  ```
50
57
 
51
58
  ## Assumptions
52
59
  There are several key assumptions in this package -
53
60
  1. User provided `time_col` begins at 0 per unique `id_col`, we also assume this column contains only integers and continues by 1 for every time step, e.g. (0, 1, 2, 3, 4, ...) is allowed and (0, 1, 2, 2.5, ...) or (0, 1, 4, 5) are not
54
61
  1. Provided `time_col` entries may be out of order at intake as a sort is enforced at expansion.
55
- 2. `eligible_col`, `excused_column_names` and [TODO] are once 1, only 1 (with respect to `time_col`) flag variables.
56
-
62
+ 2. `eligible_col` and elements of `excused_colnames` are once 1, only 1 (with respect to `time_col`) flag variables.
@@ -0,0 +1,197 @@
1
+ import multiprocessing
2
+ from dataclasses import dataclass, field
3
+ from typing import List, Literal, Optional
4
+
5
+
6
+ @dataclass
7
+ class SEQopts:
8
+ """
9
+ Parameter builder for ``pySEQTarget.SEQuential`` analysis
10
+
11
+ :param bootstrap_nboot: Number of bootstraps to preform
12
+ :type bootstrap_nboot: int
13
+ :param bootstrap_sample: Subsampling proportion of ID-Trials gathered for each bootstrapping iteration
14
+ :type bootstrap_sample: float
15
+ :param bootstrap_CI: If bootstrapped, confidence interval level
16
+ :type bootstrap_CI: float
17
+ :param bootstrap_CI_method: If bootstrapped, confidence method generation method ['SE' or 'percentile']
18
+ :type bootstrap_CI_method: str
19
+ :param cense_colname: Column name for censoring effect (LTFU, etc.)
20
+ :type cense_colname: str
21
+ :param cense_denominator: Override to specify denominator patsy formula for censoring models
22
+ :type cense_denominator: Optional[str] or None
23
+ :param cense_numerator: Override to specify numerator patsy formula for censoring models
24
+ :type cense_numerator: Optional[str] or None
25
+ :param cense_eligible_colname: Column name to identify which rows are eligible for censoring model fitting
26
+ :type cense_eligible_colname: Optional[str] or None
27
+ :param compevent_colname: Column name specifying a competing event to the outcome
28
+ :type compevent_colname: str
29
+ :param covariates: Override to specify the outcome patsy formula for outcome model fitting
30
+ :type covariates: Optional[str] or None
31
+ :param denominator: Override to specify the outcome patsy formula for denominator model fitting
32
+ :type denominator: Optional[str] or None
33
+ :param excused: Boolean to allow excused conditions when method is censoring
34
+ :type excused: bool
35
+ :param excused_colnames: Column names (at the same length of treatment_level) specifying excused conditions
36
+ :type excused_colnames: List[str] or []
37
+ :param followup_class: Boolean to force followup values to be treated as classes
38
+ :type followup_class: bool
39
+ :param followup_include: Boolean to force regular followup values into model covariates
40
+ :type followup_include: bool
41
+ :param followup_spline: Boolean to force followup values to be fit to cubic spline
42
+ :type followup_spline: bool
43
+ :param followup_max: Maximum allowed followup in analysis
44
+ :type followup_max: int or None
45
+ :param followup_min: Minimum allowed followup in analysis
46
+ :type followup_min: int
47
+ :param hazard_estimate: Boolean to create hazard estimates
48
+ :type hazard_estimate: bool
49
+ :param indicator_baseline: How to indicate baseline columns in models
50
+ :type indicator_baseline: str
51
+ :param indicator_squared: How to indicate squared columns in models
52
+ :type indicator_baseline: str
53
+ :param km_curves: Boolean to create survival, risk, and incidence (if applicable) estimates
54
+ :type km_curves: bool
55
+ :param ncores: Number of cores to use if running in parallel
56
+ :type ncores: int
57
+ :param numerator: Override to specify the outcome patsy formula for numerator models
58
+ :type numerator: str
59
+ :param parallel: Boolean to run model fitting in parallel
60
+ :type parallel: bool
61
+ :param plot_colors: List of colors for KM plots, if applicable
62
+ :type plot_colors: List[str]
63
+ :param plot_labels: List of length treat_level to specify treatment labeling
64
+ :type plot_labels: List[str]
65
+ :param plot_title: Plot title
66
+ :type plot_title: str
67
+ :param plot_type: Type of plot to show ["risk", "survival" or "incidence" if compevent is specified]
68
+ :type plot_type: str
69
+ :param seed: RNG seed
70
+ :type seed: int
71
+ :param selection_first_trial: Boolean to only use first trial for analysis (similar to non-expanded)
72
+ :type selection_first_trial: bool
73
+ :param selection_sample: Subsampling proportion of ID-trials which did not initiate a treatment
74
+ :type selection_sample: float
75
+ :param selection_random: Boolean to randomly downsample ID-trials which did not initiate a treatment
76
+ :type selection_random: bool
77
+ :param subgroup_colname: Column name for subgroups to share the same weighting but different outcome model fits
78
+ :type subgroup_colname: str
79
+ :param treatment_level: List of eligible treatment levels within treatment_col
80
+ :type treatment_level: List[int]
81
+ :param trial_include: Boolean to force trial values into model covariates
82
+ :type trial_include: bool
83
+ :param weight_eligible_colnames: List of column names of length treatment_level to identify which rows are eligible for weight fitting
84
+ :type weight_eligible_colnames: List[str]
85
+ :param weight_min: Minimum weight
86
+ :type weight_min: float
87
+ :param weight_max: Maximum weight
88
+ :type weight_max: float or None
89
+ :param weight_lag_condition: Boolean to fit weights based on their treatment lag
90
+ :type weight_lag_condition: bool
91
+ :param weight_p99: Boolean to force weight min and max to be 1st and 99th percentile respectively
92
+ :type weight_p99: bool
93
+ :param weight_preexpansion: Boolean to fit weights on preexpanded data
94
+ :type weight_preexpansion: bool
95
+ :param weighted: Boolean to weight analysis
96
+ :type weighted: bool
97
+ """
98
+
99
+ bootstrap_nboot: int = 0
100
+ bootstrap_sample: float = 0.8
101
+ bootstrap_CI: float = 0.95
102
+ bootstrap_CI_method: Literal["se", "percentile"] = "se"
103
+ cense_colname: Optional[str] = None
104
+ cense_denominator: Optional[str] = None
105
+ cense_numerator: Optional[str] = None
106
+ cense_eligible_colname: Optional[str] = None
107
+ compevent_colname: Optional[str] = None
108
+ covariates: Optional[str] = None
109
+ denominator: Optional[str] = None
110
+ excused: bool = False
111
+ excused_colnames: List[str] = field(default_factory=lambda: [])
112
+ followup_class: bool = False
113
+ followup_include: bool = True
114
+ followup_max: int = None
115
+ followup_min: int = 0
116
+ followup_spline: bool = False
117
+ hazard_estimate: bool = False
118
+ indicator_baseline: str = "_bas"
119
+ indicator_squared: str = "_sq"
120
+ km_curves: bool = False
121
+ ncores: int = multiprocessing.cpu_count()
122
+ numerator: Optional[str] = None
123
+ parallel: bool = False
124
+ plot_colors: List[str] = field(
125
+ default_factory=lambda: ["#F8766D", "#00BFC4", "#555555"]
126
+ )
127
+ plot_labels: List[str] = field(default_factory=lambda: [])
128
+ plot_title: str = None
129
+ plot_type: Literal["risk", "survival", "incidence"] = "risk"
130
+ seed: Optional[int] = None
131
+ selection_first_trial: bool = False
132
+ selection_sample: float = 0.8
133
+ selection_random: bool = False
134
+ subgroup_colname: str = None
135
+ treatment_level: List[int] = field(default_factory=lambda: [0, 1])
136
+ trial_include: bool = True
137
+ visit_colname: str = None
138
+ weight_eligible_colnames: List[str] = field(default_factory=lambda: [])
139
+ weight_min: float = 0.0
140
+ weight_max: float = None
141
+ weight_lag_condition: bool = True
142
+ weight_p99: bool = False
143
+ weight_preexpansion: bool = False
144
+ weighted: bool = False
145
+
146
+ def __post_init__(self):
147
+ bools = [
148
+ "excused",
149
+ "followup_class",
150
+ "followup_include",
151
+ "followup_spline",
152
+ "hazard_estimate",
153
+ "km_curves",
154
+ "parallel",
155
+ "selection_first_trial",
156
+ "selection_random",
157
+ "trial_include",
158
+ "weight_lag_condition",
159
+ "weight_p99",
160
+ "weight_preexpansion",
161
+ "weighted",
162
+ ]
163
+ for i in bools:
164
+ if not isinstance(getattr(self, i), bool):
165
+ raise TypeError(f"{i} must be a boolean value.")
166
+
167
+ if not isinstance(self.bootstrap_nboot, int) or self.bootstrap_nboot < 0:
168
+ raise ValueError("bootstrap_nboot must be a positive integer.")
169
+
170
+ if self.ncores < 1 or not isinstance(self.ncores, int):
171
+ raise ValueError("ncores must be a positive integer.")
172
+
173
+ if not (0.0 <= self.bootstrap_sample <= 1.0):
174
+ raise ValueError("bootstrap_sample must be between 0 and 1.")
175
+ if not (0.0 < self.bootstrap_CI < 1.0):
176
+ raise ValueError("bootstrap_CI must be between 0 and 1.")
177
+ if not (0.0 <= self.selection_sample <= 1.0):
178
+ raise ValueError("selection_sample must be between 0 and 1.")
179
+
180
+ if self.plot_type not in ["risk", "survival", "incidence"]:
181
+ raise ValueError(
182
+ "plot_type must be either 'risk', 'survival', or 'incidence'."
183
+ )
184
+
185
+ if self.bootstrap_CI_method not in ["se", "percentile"]:
186
+ raise ValueError("bootstrap_CI_method must be one of 'se' or 'percentile'")
187
+
188
+ for i in (
189
+ "covariates",
190
+ "numerator",
191
+ "denominator",
192
+ "cense_numerator",
193
+ "cense_denominator",
194
+ ):
195
+ attr = getattr(self, i)
196
+ if attr is not None and not isinstance(attr, list):
197
+ setattr(self, i, "".join(attr.split()))
@@ -0,0 +1,163 @@
1
+ import tempfile
2
+ from dataclasses import dataclass
3
+ from pathlib import Path
4
+ from typing import List, Literal, Optional
5
+
6
+ import matplotlib.figure
7
+ import polars as pl
8
+ from statsmodels.base.wrapper import ResultsWrapper
9
+
10
+ from .helpers import _build_md, _build_pdf
11
+ from .SEQopts import SEQopts
12
+
13
+
14
+ @dataclass
15
+ class SEQoutput:
16
+ """
17
+ Collector class for results from ``SEQuential``
18
+
19
+ :param options: Options used in the SEQuential process
20
+ :type options: SEQopts or None
21
+ :param method: Method of analysis ['ITT', 'dose-response', or 'censoring']
22
+ :type method: str
23
+ :param numerator_models: Numerator models, if applicable, from the weighting process
24
+ :type numerator_models: List[ResultsWrapper] or None
25
+ :param denominator_models: Denominator models, if applicable, from the weighting process
26
+ :type denominator_models: List[ResultsWrapper] or None
27
+ :param compevent_models: Competing event models, if applicable
28
+ :type compevent_models: List[ResultsWrapper] or None
29
+ :param weight_statistics: Weight statistics once returned back to the expanded dataset
30
+ :type weight_statistics: dict or None
31
+ :param hazard: Hazard ratio if applicable
32
+ :type hazard: pl.DataFrame or None
33
+ :param km_data: Dataframe of risk, survival, and incidence data if applicable at all followups
34
+ :type km_data: pl.DataFrame or None
35
+ :param km_graph: Figure of survival, risk, or incidence over followup times
36
+ :type km_graph: matplotlib.figure.Figure or None
37
+ :param risk_ratio: Dataframe of risk ratios, compared between treatments and subgroups
38
+ :type risk_ratio: pl.DataFrame or None
39
+ :param risk_difference: Dataframe of risk differences, compared between treatments and subgroups
40
+ :type risk_difference: pl.DataFrame or None
41
+ :param time: Timings for every step of the process completed thus far
42
+ :type time: dict or None
43
+ :param diagnostic_tables: Diagnostic tables for unique and nonunique outcome events and treatment switches
44
+ :type diagnostic_tables: dict or None
45
+ """
46
+
47
+ options: SEQopts = None
48
+ method: str = None
49
+ numerator_models: List[ResultsWrapper] = None
50
+ denominator_models: List[ResultsWrapper] = None
51
+ outcome_models: List[List[ResultsWrapper]] = None
52
+ compevent_models: List[List[ResultsWrapper]] = None
53
+ weight_statistics: pl.DataFrame = None
54
+ hazard: pl.DataFrame = None
55
+ km_data: pl.DataFrame = None
56
+ km_graph: matplotlib.figure.Figure = None
57
+ risk_ratio: pl.DataFrame = None
58
+ risk_difference: pl.DataFrame = None
59
+ time: dict = None
60
+ diagnostic_tables: dict = None
61
+
62
+ def plot(self) -> None:
63
+ """
64
+ Prints the kaplan-meier graph
65
+ """
66
+ print(self.km_graph)
67
+
68
+ def summary(
69
+ self, type=Optional[Literal["numerator", "denominator", "outcome", "compevent"]]
70
+ ) -> List:
71
+ """
72
+ Returns a list of model summaries of either the numerator, denominator, outcome, or competing event models
73
+ :param type: Indicator for which model list you would like returned
74
+ :type type: str
75
+ """
76
+ match type:
77
+ case "numerator":
78
+ models = self.numerator_models
79
+ case "denominator":
80
+ models = self.denominator_models
81
+ case "compevent":
82
+ models = self.compevent_models
83
+ case _:
84
+ models = self.outcome_models
85
+
86
+ return [model.summary() for model in models]
87
+
88
+ def retrieve_data(
89
+ self,
90
+ type=Optional[
91
+ Literal[
92
+ "km_data",
93
+ "hazard",
94
+ "risk_ratio",
95
+ "risk_difference",
96
+ "unique_outcomes",
97
+ "nonunique_outcomes",
98
+ "unique_switches",
99
+ "nonunique_switches",
100
+ ]
101
+ ],
102
+ ) -> pl.DataFrame:
103
+ """
104
+ Getter for data stored within ``SEQoutput``
105
+ :param type: Data which you would like to access, ['km_data', 'hazard', 'risk_ratio', 'risk_difference', 'unique_outcomes', 'nonunique_outcomes', 'unique_switches', 'nonunique_switches']
106
+ :type type: str
107
+ """
108
+ match type:
109
+ case "hazard":
110
+ data = self.hazard
111
+ case "risk_ratio":
112
+ data = self.risk_ratio
113
+ case "risk_difference":
114
+ data = self.risk_difference
115
+ case "unique_outcomes":
116
+ data = self.diagnostic_tables["unique_outcomes"]
117
+ case "nonunique_outcomes":
118
+ data = self.diagnostic_tables["nonunique_outcomes"]
119
+ case "unique_switches":
120
+ if self.diagnostic_tables.has_key("unique_switches"):
121
+ data = self.diagnostic_tables["unique_switches"]
122
+ else:
123
+ data = None
124
+ case "nonunique_switches":
125
+ if self.diagnostic_tables.has_key("nonunique_switches"):
126
+ data = self.diagnostic_tables["nonunique_switches"]
127
+ else:
128
+ data = None
129
+ case _:
130
+ data = self.km_data
131
+ if data is None:
132
+ raise ValueError("Data {type} was not created in the SEQuential process")
133
+ return data
134
+
135
+ def to_md(self, filename="SEQuential_results.md") -> None:
136
+ """Generates a markdown report of the SEQuential analysis results."""
137
+
138
+ img_path = None
139
+ if self.options.km_curves and self.km_graph is not None:
140
+ img_path = Path(filename).with_suffix(".png")
141
+ self.km_graph.savefig(img_path, dpi=300, bbox_inches="tight")
142
+ img_path = img_path.name
143
+
144
+ with open(filename, "w") as f:
145
+ f.write(_build_md(self, img_path))
146
+
147
+ print(f"Results saved to {filename}")
148
+
149
+ def to_pdf(self, filename="SEQuential_results.pdf") -> None:
150
+ """Generates a PDF report of the SEQuential analysis results."""
151
+ with tempfile.TemporaryDirectory() as tmpdir:
152
+ tmp_md = Path(tmpdir) / "report.md"
153
+ self.to_md(str(tmp_md))
154
+
155
+ with open(tmp_md, "r") as f:
156
+ md_content = f.read()
157
+
158
+ tmp_img = tmp_md.with_suffix(".png")
159
+ img_abs_path = str(tmp_img.absolute()) if tmp_img.exists() else None
160
+
161
+ _build_pdf(md_content, filename, img_abs_path)
162
+
163
+ print(f"Results saved to {filename}")
@@ -7,9 +7,10 @@ from typing import List, Literal, Optional
7
7
  import numpy as np
8
8
  import polars as pl
9
9
 
10
- from .analysis import (_calculate_hazard, _calculate_survival, _outcome_fit,
11
- _pred_risk, _risk_estimates, _subgroup_fit)
12
- from .error import _datachecker, _param_checker
10
+ from .analysis import (_calculate_hazard, _calculate_survival, _clamp,
11
+ _outcome_fit, _pred_risk, _risk_estimates,
12
+ _subgroup_fit)
13
+ from .error import _data_checker, _param_checker
13
14
  from .expansion import _binder, _diagnostics, _dynamic, _random_selection
14
15
  from .helpers import _col_string, _format_time, bootstrap_loop
15
16
  from .initialization import (_cense_denominator, _cense_numerator,
@@ -18,11 +19,36 @@ from .plot import _survival_plot
18
19
  from .SEQopts import SEQopts
19
20
  from .SEQoutput import SEQoutput
20
21
  from .weighting import (_fit_denominator, _fit_LTFU, _fit_numerator,
21
- _weight_bind, _weight_predict, _weight_setup,
22
- _weight_stats)
22
+ _fit_visit, _weight_bind, _weight_predict,
23
+ _weight_setup, _weight_stats)
23
24
 
24
25
 
25
26
  class SEQuential:
27
+ """
28
+ Primary class initializer for SEQuentially nested target trial emulation
29
+
30
+ :param data: Data for analysis
31
+ :type data: pl.DataFrame
32
+ :param id_col: Column name for unique patient IDs
33
+ :type id_col: str
34
+ :param time_col: Column name for observational time points
35
+ :type time_col: str
36
+ :param eligible_col: Column name for analytical eligibility
37
+ :type eligible_col: str
38
+ :param treatment_col: Column name specifying treatment per time_col
39
+ :type treatment_col: str
40
+ :param outcome_col: Column name specifying outcome per time_col
41
+ :type outcome_col: str
42
+ :param time_varying_cols: Time-varying column names as covariates (BMI, Age, etc.)
43
+ :type time_varying_cols: Optional[List[str]] or None
44
+ :param fixed_cols: Fixed column names as covariates (Sex, YOB, etc.)
45
+ :type fixed_cols: Optional[List[str]] or None
46
+ :param method: Method for analysis ['ITT', 'dose-response', or 'censoring']
47
+ :type method: str
48
+ :param parameters: Parameters to augment analysis, specified with ``pySEQTarget.SEQopts``
49
+ :type parameters: Optional[SEQopts] or None
50
+ """
51
+
26
52
  def __init__(
27
53
  self,
28
54
  data: pl.DataFrame,
@@ -68,7 +94,7 @@ class SEQuential:
68
94
  if self.denominator is None:
69
95
  self.denominator = _denominator(self)
70
96
 
71
- if self.cense_colname is not None:
97
+ if self.cense_colname is not None or self.visit_colname is not None:
72
98
  if self.cense_numerator is None:
73
99
  self.cense_numerator = _cense_numerator(self)
74
100
 
@@ -76,14 +102,18 @@ class SEQuential:
76
102
  self.cense_denominator = _cense_denominator(self)
77
103
 
78
104
  _param_checker(self)
79
- _datachecker(self)
105
+ _data_checker(self)
80
106
 
81
- def expand(self):
107
+ def expand(self) -> None:
108
+ """
109
+ Creates the sequentially nested, emulated target trial structure
110
+ """
82
111
  start = time.perf_counter()
83
112
  kept = [
84
113
  self.cense_colname,
85
114
  self.cense_eligible_colname,
86
115
  self.compevent_colname,
116
+ self.visit_colname,
87
117
  *self.weight_eligible_colnames,
88
118
  *self.excused_colnames,
89
119
  ]
@@ -136,7 +166,10 @@ class SEQuential:
136
166
  end = time.perf_counter()
137
167
  self._expansion_time = _format_time(start, end)
138
168
 
139
- def bootstrap(self, **kwargs):
169
+ def bootstrap(self, **kwargs) -> None:
170
+ """
171
+ Internally sets up bootstrapping - creating a list of IDs to use per iteration
172
+ """
140
173
  allowed = {
141
174
  "bootstrap_nboot",
142
175
  "bootstrap_sample",
@@ -148,7 +181,6 @@ class SEQuential:
148
181
  setattr(self, key, value)
149
182
  else:
150
183
  raise ValueError(f"Unknown argument: {key}")
151
-
152
184
  UIDs = self.DT.select(pl.col(self.id_col)).unique().to_series().to_list()
153
185
  NIDs = len(UIDs)
154
186
 
@@ -159,10 +191,12 @@ class SEQuential:
159
191
  )
160
192
  id_counts = Counter(sampled_IDs)
161
193
  self._boot_samples.append(id_counts)
162
- return self
163
194
 
164
195
  @bootstrap_loop
165
- def fit(self):
196
+ def fit(self) -> None:
197
+ """
198
+ Fits weight models (numerator, denominator, censoring) and outcome models (outcome, competing event)
199
+ """
166
200
  if self.bootstrap_nboot > 0 and not hasattr(self, "_boot_samples"):
167
201
  raise ValueError(
168
202
  "Bootstrap sampling not found. Please run the 'bootstrap' method before fitting with bootstrapping."
@@ -179,6 +213,7 @@ class SEQuential:
179
213
  WDT[col] = WDT[col].astype("category")
180
214
 
181
215
  _fit_LTFU(self, WDT)
216
+ _fit_visit(self, WDT)
182
217
  _fit_numerator(self, WDT)
183
218
  _fit_denominator(self, WDT)
184
219
 
@@ -211,7 +246,17 @@ class SEQuential:
211
246
  )
212
247
  return models
213
248
 
214
- def survival(self):
249
+ def survival(self, **kwargs) -> None:
250
+ """
251
+ Uses fit outcome models (outcome, competing event) to estimate risk, survival, and incidence curves
252
+ """
253
+ allowed = {"bootstrap_CI", "bootstrap_CI_method"}
254
+ for key, val in kwargs.items():
255
+ if key in allowed:
256
+ setattr(self, key, val)
257
+ else:
258
+ raise ValueError(f"Unknown or misplaced arugment: {key}")
259
+
215
260
  if not hasattr(self, "outcome_model") or not self.outcome_model:
216
261
  raise ValueError(
217
262
  "Outcome model not found. Please run the 'fit' method before calculating survival."
@@ -221,13 +266,16 @@ class SEQuential:
221
266
 
222
267
  risk_data = _pred_risk(self)
223
268
  surv_data = _calculate_survival(self, risk_data)
224
- self.km_data = pl.concat([risk_data, surv_data])
269
+ self.km_data = _clamp(pl.concat([risk_data, surv_data]))
225
270
  self.risk_estimates = _risk_estimates(self)
226
271
 
227
272
  end = time.perf_counter()
228
273
  self._survival_time = _format_time(start, end)
229
274
 
230
- def hazard(self):
275
+ def hazard(self) -> None:
276
+ """
277
+ Uses fit outcome models (outcome, competing event) to estimate hazard ratios
278
+ """
231
279
  start = time.perf_counter()
232
280
 
233
281
  if not hasattr(self, "outcome_model") or not self.outcome_model:
@@ -239,10 +287,22 @@ class SEQuential:
239
287
  end = time.perf_counter()
240
288
  self._hazard_time = _format_time(start, end)
241
289
 
242
- def plot(self):
290
+ def plot(self, **kwargs) -> None:
291
+ """
292
+ Shows a plot specific to plot_type
293
+ """
294
+ allowed = {"plot_type", "plot_colors", "plot_title", "plot_labels"}
295
+ for key, val in kwargs.items():
296
+ if key in allowed:
297
+ setattr(self, key, val)
298
+ else:
299
+ raise ValueError(f"Unknown or misplaced arugment: {key}")
243
300
  self.km_graph = _survival_plot(self)
244
301
 
245
- def collect(self):
302
+ def collect(self) -> SEQoutput:
303
+ """
304
+ Collects all results current created into ``SEQoutput`` class
305
+ """
246
306
  self._time_collected = datetime.datetime.now()
247
307
 
248
308
  generated = [