meta-edc 0.3.24__py3-none-any.whl → 0.3.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meta_analytics/dataframes/glucose_endpoints/constants.py +8 -6
- meta_analytics/dataframes/glucose_endpoints/glucose_endpoints_by_date.py +232 -207
- meta_analytics/dataframes/glucose_endpoints/utils.py +12 -13
- {meta_edc-0.3.24.dist-info → meta_edc-0.3.25.dist-info}/METADATA +2 -2
- {meta_edc-0.3.24.dist-info → meta_edc-0.3.25.dist-info}/RECORD +26 -18
- meta_reports/admin/__init__.py +2 -1
- meta_reports/admin/endpoints_admin.py +5 -112
- meta_reports/admin/endpoints_all_admin.py +13 -0
- meta_reports/admin/modeladmin_mixins.py +116 -0
- meta_reports/admin/unmanaged/glucose_summary_admin.py +49 -9
- meta_reports/migrations/0030_auto_20240822_1637.py +54 -0
- meta_reports/migrations/0031_endpointsproxy.py +25 -0
- meta_reports/migrations/0032_alter_endpointsproxy_options.py +21 -0
- meta_reports/migrations/0033_auto_20240823_0012.py +54 -0
- meta_reports/models/__init__.py +1 -0
- meta_reports/models/dbviews/glucose_summary/unmanaged_model.py +4 -0
- meta_reports/models/dbviews/glucose_summary/view_definition.py +30 -15
- meta_reports/models/endpoints_proxy.py +11 -0
- meta_reports/tasks.py +2 -0
- meta_reports/templates/meta_reports/columns/subject_identifier_column.html +1 -1
- meta_reports/templates/meta_reports/endpoints_all_change_list_note.html +12 -0
- meta_reports/templates/meta_reports/{endpoints_changelist_note.html → endpoints_change_list_note.html} +3 -1
- {meta_edc-0.3.24.dist-info → meta_edc-0.3.25.dist-info}/AUTHORS +0 -0
- {meta_edc-0.3.24.dist-info → meta_edc-0.3.25.dist-info}/LICENSE +0 -0
- {meta_edc-0.3.24.dist-info → meta_edc-0.3.25.dist-info}/WHEEL +0 -0
- {meta_edc-0.3.24.dist-info → meta_edc-0.3.25.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,10 @@
|
|
1
|
-
OGTT_THRESHOLD_MET = "OGTT >= 11.1"
|
2
|
-
EOS_DM_MET = "EOS - Patient developed diabetes"
|
3
|
-
CASE_OGTT = 1
|
4
1
|
CASE_EOS = 7
|
2
|
+
CASE_FBGS_WITH_FIRST_OGTT = 2
|
3
|
+
CASE_FBGS_WITH_SECOND_OGTT = 3
|
5
4
|
CASE_FBG_ONLY = 4
|
5
|
+
CASE_OGTT = 1
|
6
|
+
EOS_DM_MET = "EOS - Patient developed diabetes"
|
7
|
+
OGTT_THRESHOLD_MET = "OGTT >= 11.1"
|
6
8
|
|
7
9
|
endpoint_columns = [
|
8
10
|
"subject_identifier",
|
@@ -24,8 +26,8 @@ endpoint_columns = [
|
|
24
26
|
|
25
27
|
endpoint_cases = {
|
26
28
|
CASE_OGTT: OGTT_THRESHOLD_MET,
|
27
|
-
|
28
|
-
|
29
|
-
|
29
|
+
CASE_FBGS_WITH_FIRST_OGTT: "FBG >= 7 x 2, first OGTT<=11.1",
|
30
|
+
CASE_FBGS_WITH_SECOND_OGTT: "FBG >= 7 x 2, second OGTT<=11.1",
|
31
|
+
CASE_FBG_ONLY: "FBG >= 7 x 2, OGTT not considered",
|
30
32
|
CASE_EOS: EOS_DM_MET,
|
31
33
|
}
|
@@ -2,19 +2,14 @@ import numpy as np
|
|
2
2
|
import pandas as pd
|
3
3
|
from django.apps import apps as django_apps
|
4
4
|
from edc_constants.constants import NO, YES
|
5
|
-
from edc_pdutils.dataframes import
|
6
|
-
get_crf,
|
7
|
-
get_eos,
|
8
|
-
get_subject_consent,
|
9
|
-
get_subject_visit,
|
10
|
-
)
|
5
|
+
from edc_pdutils.dataframes import get_crf, get_eos, get_subject_consent
|
11
6
|
from edc_utils import get_utcnow
|
12
7
|
|
13
|
-
from meta_reports.models import Endpoints
|
14
|
-
|
15
8
|
from .constants import (
|
16
9
|
CASE_EOS,
|
17
10
|
CASE_FBG_ONLY,
|
11
|
+
CASE_FBGS_WITH_FIRST_OGTT,
|
12
|
+
CASE_FBGS_WITH_SECOND_OGTT,
|
18
13
|
CASE_OGTT,
|
19
14
|
endpoint_cases,
|
20
15
|
endpoint_columns,
|
@@ -28,13 +23,31 @@ from .utils import (
|
|
28
23
|
)
|
29
24
|
|
30
25
|
|
26
|
+
def normalize_date_columns(df: pd.DataFrame, cols: list[str] = None) -> pd.DataFrame:
|
27
|
+
"""Normalize date columns by flooring"""
|
28
|
+
for col in cols:
|
29
|
+
if not df[col].empty:
|
30
|
+
df[col] = df[col].dt.floor("d")
|
31
|
+
else:
|
32
|
+
df[col] = pd.NaT
|
33
|
+
return df
|
34
|
+
|
35
|
+
|
36
|
+
def calculate_fasting_hrs(df: pd.DataFrame):
|
37
|
+
df.loc[(df["fasting"] == NO), "fasting_duration_delta"] = pd.NaT
|
38
|
+
if df.empty:
|
39
|
+
df["fasting_hrs"] = np.nan
|
40
|
+
else:
|
41
|
+
df["fasting_hrs"] = df["fasting_duration_delta"].dt.total_seconds() / 3600
|
42
|
+
return df
|
43
|
+
|
44
|
+
|
31
45
|
class GlucoseEndpointsByDate:
|
32
46
|
|
33
47
|
fbg_threshhold = 7.0
|
34
48
|
ogtt_threshhold = 11.1
|
35
49
|
endpoint_cls = EndpointByDate
|
36
50
|
keep_cols = [
|
37
|
-
"subject_visit_id",
|
38
51
|
"fasting",
|
39
52
|
"fasting_hrs",
|
40
53
|
"fbg_value",
|
@@ -45,9 +58,6 @@ class GlucoseEndpointsByDate:
|
|
45
58
|
"ogtt_datetime",
|
46
59
|
"source",
|
47
60
|
"report_datetime",
|
48
|
-
]
|
49
|
-
|
50
|
-
visit_cols = [
|
51
61
|
"subject_visit_id",
|
52
62
|
"subject_identifier",
|
53
63
|
"visit_code",
|
@@ -59,45 +69,31 @@ class GlucoseEndpointsByDate:
|
|
59
69
|
def __init__(
|
60
70
|
self, subject_identifiers: list[str] | None = None, case_list: list[int] | None = None
|
61
71
|
):
|
62
|
-
self.
|
63
|
-
|
64
|
-
self.subject_identifiers = []
|
65
|
-
self.case_list = case_list or [CASE_OGTT, 2, 3, CASE_EOS]
|
66
|
-
self.endpoint_cases = {k: v for k, v in endpoint_cases.items() if k in self.case_list}
|
72
|
+
self._glucose_fbg_df = pd.DataFrame()
|
73
|
+
self._glucose_fbg_ogtt_df = pd.DataFrame()
|
67
74
|
self.endpoint_only_df = pd.DataFrame()
|
68
75
|
|
69
|
-
self.
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
self.df["source"] = "meta_subject.glucose"
|
76
|
-
|
77
|
-
self.calculate_fasting_hrs()
|
78
|
-
|
79
|
-
self.fbg_only_df = self.fbg_only_df[
|
80
|
-
[col for col in self.keep_cols if not col.startswith("ogtt")]
|
76
|
+
self.subject_identifiers = subject_identifiers or []
|
77
|
+
self.case_list = case_list or [
|
78
|
+
CASE_OGTT,
|
79
|
+
CASE_FBGS_WITH_FIRST_OGTT,
|
80
|
+
CASE_FBGS_WITH_SECOND_OGTT,
|
81
|
+
CASE_EOS,
|
81
82
|
]
|
82
|
-
self.
|
83
|
-
self.df.reset_index(drop=True)
|
84
|
-
self.df = self.df.copy()
|
85
|
-
|
86
|
-
self.normalize_dates()
|
83
|
+
self.endpoint_cases = {k: v for k, v in endpoint_cases.items() if k in self.case_list}
|
87
84
|
|
88
|
-
#
|
85
|
+
# merge two model DFs
|
89
86
|
self.df = pd.merge(
|
90
|
-
self.
|
91
|
-
self.
|
87
|
+
self.glucose_fbg_ogtt_df,
|
88
|
+
self.glucose_fbg_df,
|
92
89
|
on=["subject_visit_id", "fbg_datetime", "fbg_value"],
|
93
90
|
how="outer",
|
94
91
|
indicator=True,
|
95
92
|
suffixes=("", "2"),
|
96
93
|
)
|
97
|
-
self.df.reset_index(drop=True
|
98
|
-
self.df_merged = self.df.copy()
|
94
|
+
self.df = self.df.reset_index(drop=True)
|
99
95
|
|
100
|
-
# right_only
|
96
|
+
# pivot right_only cols
|
101
97
|
cols = {
|
102
98
|
"fasting": None,
|
103
99
|
"fasting_hrs": np.nan,
|
@@ -107,77 +103,25 @@ class GlucoseEndpointsByDate:
|
|
107
103
|
}
|
108
104
|
for col, null_value in cols.items():
|
109
105
|
self.df.loc[self.df["_merge"] == "right_only", col] = self.df[f"{col}2"]
|
106
|
+
cols = [col for col in self.df.columns if col.endswith("2")]
|
107
|
+
cols.append("_merge")
|
108
|
+
self.df = self.df.drop(columns=cols)
|
109
|
+
self.df = self.df.reset_index(drop=True)
|
110
110
|
|
111
|
-
|
112
|
-
|
113
|
-
)
|
114
|
-
self.df = pd.merge(
|
115
|
-
df_subject_visit[self.visit_cols], self.df, on="subject_visit_id", how="left"
|
116
|
-
)
|
117
|
-
self.df = self.df.sort_values(by=["subject_identifier", "fbg_datetime"])
|
118
|
-
self.df.reset_index(drop=True, inplace=True)
|
119
|
-
|
120
|
-
df_consent = get_subject_consent(
|
121
|
-
"meta_consent.subjectconsent", subject_identifiers=subject_identifiers
|
122
|
-
)
|
123
|
-
self.df = pd.merge(self.df, df_consent, on="subject_identifier", how="left")
|
124
|
-
self.df = self.df.sort_values(by=["subject_identifier", "fbg_datetime"])
|
125
|
-
self.df.reset_index(drop=True, inplace=True)
|
126
|
-
|
127
|
-
df_eos = get_eos("meta_prn.endofstudy", subject_identifiers=subject_identifiers)
|
128
|
-
self.df = pd.merge(self.df, df_eos, on="subject_identifier", how="left")
|
129
|
-
self.df = self.df.sort_values(by=["subject_identifier", "fbg_datetime"])
|
130
|
-
self.df.reset_index(drop=True, inplace=True)
|
131
|
-
|
132
|
-
if not self.df.loc[self.df["visit_datetime"].notna()].empty:
|
133
|
-
self.df["visit_days"] = (
|
134
|
-
self.df["baseline_datetime"].rsub(self.df["visit_datetime"]).dt.days
|
135
|
-
)
|
136
|
-
if not self.df.loc[self.df["fbg_datetime"].notna()].empty:
|
137
|
-
self.df["fgb_days"] = (
|
138
|
-
self.df.loc[self.df["fbg_datetime"].notna()]["baseline_datetime"]
|
139
|
-
.rsub(self.df["fbg_datetime"])
|
140
|
-
.dt.days
|
141
|
-
)
|
142
|
-
else:
|
143
|
-
self.df["fgb_days"] = np.nan
|
144
|
-
if not self.df.loc[self.df["ogtt_datetime"].notna()].empty:
|
145
|
-
self.df["ogtt_days"] = (
|
146
|
-
self.df.loc[self.df["ogtt_datetime"].notna()]["baseline_datetime"]
|
147
|
-
.rsub(self.df["ogtt_datetime"])
|
148
|
-
.dt.days
|
149
|
-
)
|
150
|
-
else:
|
151
|
-
self.df["ogtt_days"] = np.nan
|
152
|
-
|
153
|
-
if self.df.empty:
|
154
|
-
self.df["visit_days"] = np.nan
|
155
|
-
self.df["fgb_days"] = np.nan
|
156
|
-
self.df["ogtt_days"] = np.nan
|
157
|
-
self.df["test"] = np.nan
|
158
|
-
else:
|
159
|
-
self.df["visit_days"] = pd.to_numeric(self.df["visit_days"], downcast="integer")
|
160
|
-
self.df["fgb_days"] = pd.to_numeric(self.df["fgb_days"], downcast="integer")
|
161
|
-
self.df["ogtt_days"] = pd.to_numeric(self.df["ogtt_days"], downcast="integer")
|
162
|
-
|
163
|
-
# label rows by type of glu tests (ones with value)
|
164
|
-
self.df["test"] = self.df.apply(get_test_string, axis=1)
|
111
|
+
self.merge_with_consent()
|
112
|
+
self.merge_with_eos()
|
113
|
+
self.add_calculated_days_from_baseline_to_event_columns()
|
165
114
|
|
166
|
-
|
115
|
+
# label rows by type of glu tests (ones with value)
|
116
|
+
self.df["test"] = self.df.apply(get_test_string, axis=1)
|
167
117
|
self.df = self.df.reset_index(drop=True)
|
168
118
|
|
169
|
-
self.
|
170
|
-
|
171
|
-
!= (
|
172
|
-
"Patient fulfilled late exclusion criteria (due to abnormal blood "
|
173
|
-
"values or raised blood pressure at enrolment"
|
174
|
-
)
|
175
|
-
]
|
119
|
+
self.visit_codes_df = get_unique_visit_codes(self.df)
|
120
|
+
self.subject_identifiers_df = get_unique_subject_identifiers(self.df)
|
176
121
|
|
177
122
|
self.df = self.df.sort_values(by=["subject_identifier", "fbg_datetime"])
|
178
123
|
self.df = self.df.reset_index(drop=True)
|
179
|
-
|
180
|
-
self.subject_identifiers_df = get_unique_subject_identifiers(self.df)
|
124
|
+
|
181
125
|
self.working_df = self.df.copy()
|
182
126
|
self.working_df["endpoint"] = 0
|
183
127
|
self.endpoint_df = get_empty_endpoint_df()
|
@@ -186,16 +130,19 @@ class GlucoseEndpointsByDate:
|
|
186
130
|
self.pre_check_endpoint()
|
187
131
|
for index, row in self.subject_identifiers_df.iterrows():
|
188
132
|
subject_df = self.get_subject_df(row["subject_identifier"])
|
189
|
-
subject_df = self.check_endpoint_by_fbg_for_subject(
|
133
|
+
subject_df = self.check_endpoint_by_fbg_for_subject(
|
134
|
+
subject_df, case_list=[CASE_FBGS_WITH_FIRST_OGTT, CASE_FBGS_WITH_SECOND_OGTT]
|
135
|
+
)
|
190
136
|
if len(subject_df.loc[subject_df["endpoint"] == 1]) == 1:
|
191
137
|
self.append_subject_to_endpoint_df(subject_df)
|
192
138
|
self.remove_subject_from_working_df(row)
|
193
139
|
|
194
140
|
if CASE_FBG_ONLY in self.endpoint_cases:
|
195
|
-
# go back and rerun for case 5
|
196
141
|
for index, row in self.subject_identifiers_df.iterrows():
|
197
142
|
subject_df = self.get_subject_df(row["subject_identifier"])
|
198
|
-
subject_df = self.check_endpoint_by_fbg_for_subject(
|
143
|
+
subject_df = self.check_endpoint_by_fbg_for_subject(
|
144
|
+
subject_df, case_list=[CASE_FBG_ONLY]
|
145
|
+
)
|
199
146
|
if len(subject_df.loc[subject_df["endpoint"] == 1]) == 1:
|
200
147
|
self.append_subject_to_endpoint_df(subject_df)
|
201
148
|
self.remove_subject_from_working_df(row)
|
@@ -203,60 +150,161 @@ class GlucoseEndpointsByDate:
|
|
203
150
|
self.post_check_endpoint()
|
204
151
|
self.merge_with_final_endpoints()
|
205
152
|
|
153
|
+
@property
|
154
|
+
def glucose_fbg_df(self) -> pd.DataFrame:
|
155
|
+
"""Returns a prepared Dataframe of CRF
|
156
|
+
meta_subject.glucosefbg.
|
157
|
+
|
158
|
+
Note: meta_subject.glucosefbg has only FBG measures.
|
159
|
+
"""
|
160
|
+
if self._glucose_fbg_df.empty:
|
161
|
+
df = get_crf(
|
162
|
+
model="meta_subject.glucosefbg",
|
163
|
+
subject_identifiers=self.subject_identifiers,
|
164
|
+
subject_visit_model="meta_subject.subjectvisit",
|
165
|
+
)
|
166
|
+
df["source"] = "meta_subject.glucosefbg"
|
167
|
+
df.rename(columns={"fbg_fasting": "fasting"}, inplace=True)
|
168
|
+
df.loc[(df["fasting"] == "fasting"), "fasting"] = YES
|
169
|
+
df.loc[(df["fasting"] == "non_fasting"), "fasting"] = NO
|
170
|
+
df = calculate_fasting_hrs(df)
|
171
|
+
df = df[[col for col in self.keep_cols if not col.startswith("ogtt")]]
|
172
|
+
df = df.reset_index(drop=True)
|
173
|
+
df = normalize_date_columns(
|
174
|
+
df, cols=["fbg_datetime", "report_datetime", "visit_datetime"]
|
175
|
+
)
|
176
|
+
self._glucose_fbg_df = df
|
177
|
+
return self._glucose_fbg_df
|
178
|
+
|
179
|
+
@property
|
180
|
+
def glucose_fbg_ogtt_df(self):
|
181
|
+
"""Returns a prepared Dataframe of CRF meta_subject.glucose.
|
182
|
+
|
183
|
+
Note: meta_subject.glucose has FBG and OGTT measures.
|
184
|
+
"""
|
185
|
+
if self._glucose_fbg_ogtt_df.empty:
|
186
|
+
df = get_crf(
|
187
|
+
model="meta_subject.glucose",
|
188
|
+
subject_identifiers=self.subject_identifiers,
|
189
|
+
subject_visit_model="meta_subject.subjectvisit",
|
190
|
+
)
|
191
|
+
df["source"] = "meta_subject.glucose"
|
192
|
+
df = calculate_fasting_hrs(df)
|
193
|
+
df = df[self.keep_cols]
|
194
|
+
df = df.reset_index(drop=True)
|
195
|
+
df = normalize_date_columns(
|
196
|
+
df, cols=["fbg_datetime", "ogtt_datetime", "report_datetime", "visit_datetime"]
|
197
|
+
)
|
198
|
+
self._glucose_fbg_ogtt_df = df
|
199
|
+
return self._glucose_fbg_ogtt_df
|
200
|
+
|
201
|
+
def merge_with_consent(self):
|
202
|
+
"""Merge in consent DF."""
|
203
|
+
df_consent = get_subject_consent("meta_consent.subjectconsent")
|
204
|
+
self.df = pd.merge(self.df, df_consent, on="subject_identifier", how="left")
|
205
|
+
self.df = self.df.sort_values(by=["subject_identifier", "fbg_datetime"])
|
206
|
+
self.df = self.df.reset_index(drop=True)
|
207
|
+
|
208
|
+
def merge_with_eos(self):
|
209
|
+
"""Merge in EoS DF.
|
210
|
+
|
211
|
+
Drops patients who were taken off study by late exclusion.
|
212
|
+
"""
|
213
|
+
df_eos = get_eos("meta_prn.endofstudy")
|
214
|
+
df_eos = df_eos[
|
215
|
+
df_eos["offstudy_reason"]
|
216
|
+
!= (
|
217
|
+
"Patient fulfilled late exclusion criteria (due to abnormal blood "
|
218
|
+
"values or raised blood pressure at enrolment"
|
219
|
+
)
|
220
|
+
]
|
221
|
+
self.df = pd.merge(self.df, df_eos, on="subject_identifier", how="left")
|
222
|
+
self.df = self.df.sort_values(by=["subject_identifier", "fbg_datetime"])
|
223
|
+
self.df = self.df.reset_index(drop=True)
|
224
|
+
|
225
|
+
def add_calculated_days_from_baseline_to_event_columns(self):
|
226
|
+
"""Add columns that calculate number of days from
|
227
|
+
baseline to visit, fbg, and ogtt.
|
228
|
+
"""
|
229
|
+
self.df["visit_days"] = np.nan
|
230
|
+
self.df["fbg_days"] = np.nan
|
231
|
+
self.df["ogtt_days"] = np.nan
|
232
|
+
self.df["test"] = np.nan
|
233
|
+
self.df["visit_days"] = (
|
234
|
+
self.df["visit_datetime"] - self.df["baseline_datetime"]
|
235
|
+
).dt.days
|
236
|
+
if not self.df["fbg_datetime"].empty:
|
237
|
+
self.df["fbg_days"] = (
|
238
|
+
self.df["fbg_datetime"] - self.df["baseline_datetime"]
|
239
|
+
).dt.days
|
240
|
+
if not self.df["ogtt_datetime"].empty:
|
241
|
+
self.df["ogtt_days"] = (
|
242
|
+
self.df["ogtt_datetime"] - self.df["baseline_datetime"]
|
243
|
+
).dt.days
|
244
|
+
self.df["visit_days"] = pd.to_numeric(self.df["visit_days"], downcast="integer")
|
245
|
+
self.df["fbg_days"] = pd.to_numeric(self.df["fbg_days"], downcast="integer")
|
246
|
+
self.df["ogtt_days"] = pd.to_numeric(self.df["ogtt_days"], downcast="integer")
|
247
|
+
self.df = self.df.reset_index(drop=True)
|
248
|
+
|
206
249
|
def pre_check_endpoint(self):
|
207
|
-
"
|
208
|
-
|
250
|
+
"""Flag subjects that met endpoint by hitting the OGTT
|
251
|
+
threshold.
|
252
|
+
|
253
|
+
Add them to the endpoint_df and remove them from the
|
254
|
+
working_df.
|
255
|
+
|
256
|
+
Subject must have fasted at the timepoint.
|
257
|
+
|
258
|
+
The OGTT must have an FBG measure at the same timepoint.
|
259
|
+
The value of the FBG is not considered.
|
260
|
+
|
261
|
+
Most of these where taken off study for the OGTT. We are
|
262
|
+
using the OGTT as the reason/date instead of the offstudy
|
263
|
+
reason/date.
|
264
|
+
|
265
|
+
See `merge_with_final_endpoints` where we pick the date of
|
266
|
+
the first OGTT.
|
267
|
+
"""
|
268
|
+
subject_endpoint_df = self.working_df.loc[
|
209
269
|
(self.working_df["ogtt_value"] >= self.ogtt_threshhold)
|
270
|
+
& (self.working_df["fasting"] == YES)
|
210
271
|
& (self.working_df["fbg_value"].notna())
|
211
272
|
].copy()
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
columns={"fbg_fasting": "fasting", "subject_visit": "subject_visit_id"},
|
231
|
-
inplace=True,
|
232
|
-
)
|
233
|
-
fbg_only_df.loc[(fbg_only_df["fasting"] == "fasting"), "fasting"] = YES
|
234
|
-
fbg_only_df.loc[(fbg_only_df["fasting"] == "non_fasting"), "fasting"] = NO
|
235
|
-
return fbg_only_df
|
236
|
-
|
237
|
-
def normalize_dates(self):
|
238
|
-
"""Normalize dates"""
|
239
|
-
for col in ["fbg_datetime", "report_datetime"]:
|
240
|
-
if not self.fbg_only_df[col].empty:
|
241
|
-
self.fbg_only_df[col] = self.fbg_only_df[col].dt.floor("d")
|
242
|
-
if not self.df[col].empty:
|
243
|
-
self.df[col] = self.df[col].dt.floor("d")
|
244
|
-
if not self.df["ogtt_datetime"].empty:
|
245
|
-
self.df["ogtt_datetime"] = self.df["ogtt_datetime"].dt.floor("d")
|
246
|
-
else:
|
247
|
-
self.df["ogtt_datetime"] = pd.NaT
|
248
|
-
|
249
|
-
def calculate_fasting_hrs(self):
|
250
|
-
for dftmp in [self.fbg_only_df, self.df]:
|
251
|
-
dftmp.loc[(dftmp["fasting"] == NO), "fasting_duration_delta"] = pd.NaT
|
252
|
-
if dftmp.empty:
|
253
|
-
dftmp["fasting_hrs"] = np.nan
|
254
|
-
else:
|
255
|
-
dftmp["fasting_hrs"] = (
|
256
|
-
dftmp["fasting_duration_delta"].dt.total_seconds() / 3600
|
273
|
+
if not subject_endpoint_df.empty:
|
274
|
+
# flag the selected endpoint rows as endpoints
|
275
|
+
subject_endpoint_df["endpoint"] = 1
|
276
|
+
subject_endpoint_df["endpoint_label"] = self.endpoint_cases[CASE_OGTT]
|
277
|
+
subject_endpoint_df["endpoint_type"] = CASE_OGTT
|
278
|
+
subject_endpoint_df["interval_in_days"] = np.nan
|
279
|
+
|
280
|
+
# add back the others rows for these subjects
|
281
|
+
subjects_df = self.working_df.loc[
|
282
|
+
(
|
283
|
+
self.working_df["subject_identifier"].isin(
|
284
|
+
subject_endpoint_df["subject_identifier"]
|
285
|
+
)
|
286
|
+
& ~(
|
287
|
+
self.working_df["fbg_datetime"].isin(
|
288
|
+
subject_endpoint_df["fbg_datetime"]
|
289
|
+
)
|
290
|
+
)
|
257
291
|
)
|
292
|
+
].copy()
|
293
|
+
subjects_df = subjects_df.reset_index(drop=True)
|
294
|
+
subjects_df["endpoint"] = np.nan
|
295
|
+
subjects_df["endpoint_label"] = None
|
296
|
+
subjects_df["endpoint_type"] = None
|
297
|
+
subjects_df["interval_in_days"] = np.nan
|
298
|
+
subjects_df = pd.concat([subjects_df, subject_endpoint_df])
|
299
|
+
subjects_df = subjects_df.reset_index(drop=True)
|
300
|
+
|
301
|
+
self.append_subject_to_endpoint_df(subjects_df[endpoint_columns])
|
302
|
+
self.remove_subjects_from_working_df(subjects_df)
|
258
303
|
|
259
304
|
def append_subject_to_endpoint_df(self, subject_df: pd.DataFrame) -> None:
|
305
|
+
"""Appends all rows of a subject, or subjects, to the
|
306
|
+
Endpoints DF.
|
307
|
+
"""
|
260
308
|
if self.endpoint_df.empty:
|
261
309
|
self.endpoint_df = subject_df.copy()
|
262
310
|
else:
|
@@ -267,6 +315,9 @@ class GlucoseEndpointsByDate:
|
|
267
315
|
self.endpoint_df = self.endpoint_df.reset_index(drop=True)
|
268
316
|
|
269
317
|
def remove_subject_from_working_df(self, row: pd.Series) -> None:
|
318
|
+
"""Removes one subject from the working DF given a Series with
|
319
|
+
value `subject_identifier`.
|
320
|
+
"""
|
270
321
|
self.working_df = self.working_df.drop(
|
271
322
|
index=self.working_df[
|
272
323
|
self.working_df["subject_identifier"] == row["subject_identifier"]
|
@@ -274,6 +325,17 @@ class GlucoseEndpointsByDate:
|
|
274
325
|
)
|
275
326
|
self.working_df = self.working_df.reset_index(drop=True)
|
276
327
|
|
328
|
+
def remove_subjects_from_working_df(self, rows: pd.DataFrame) -> None:
|
329
|
+
"""Removes subjects from the working DF given a DF with
|
330
|
+
column `subject_identifier`.
|
331
|
+
"""
|
332
|
+
self.working_df = self.working_df.drop(
|
333
|
+
index=self.working_df.loc[
|
334
|
+
self.working_df["subject_identifier"].isin(rows["subject_identifier"])
|
335
|
+
].index
|
336
|
+
)
|
337
|
+
self.working_df = self.working_df.reset_index(drop=True)
|
338
|
+
|
277
339
|
def get_subject_df(self, subject_identifier: str) -> pd.DataFrame:
|
278
340
|
subject_df = self.working_df.loc[
|
279
341
|
self.working_df["subject_identifier"] == subject_identifier
|
@@ -286,7 +348,7 @@ class GlucoseEndpointsByDate:
|
|
286
348
|
subject_df = subject_df.reset_index(drop=True)
|
287
349
|
subject_df = subject_df[endpoint_columns]
|
288
350
|
subject_df = subject_df.merge(
|
289
|
-
self.
|
351
|
+
self.visit_codes_df,
|
290
352
|
on="visit_code",
|
291
353
|
how="outer",
|
292
354
|
indicator=False,
|
@@ -317,7 +379,7 @@ class GlucoseEndpointsByDate:
|
|
317
379
|
df_eos["endpoint_label"] = self.endpoint_cases[CASE_EOS]
|
318
380
|
df_eos["endpoint_type"] = CASE_EOS
|
319
381
|
df_eos["interval_in_days"] = np.nan
|
320
|
-
df_eos.reset_index(drop=True
|
382
|
+
df_eos = df_eos.reset_index(drop=True)
|
321
383
|
self.append_subject_to_endpoint_df(df_eos[endpoint_columns])
|
322
384
|
self.working_df = self.working_df.drop(
|
323
385
|
index=self.working_df.loc[
|
@@ -326,7 +388,7 @@ class GlucoseEndpointsByDate:
|
|
326
388
|
)
|
327
389
|
|
328
390
|
def merge_with_final_endpoints(self):
|
329
|
-
|
391
|
+
"""Merge endpoint_df with original df"""
|
330
392
|
if self.endpoint_df.empty:
|
331
393
|
self.df = self.df[~(self.df["subject_identifier"].isin(self.subject_identifiers))]
|
332
394
|
else:
|
@@ -335,27 +397,34 @@ class GlucoseEndpointsByDate:
|
|
335
397
|
self.endpoint_df["fbg_datetime"] - self.endpoint_df["baseline_datetime"]
|
336
398
|
).dt.days
|
337
399
|
|
338
|
-
#
|
339
|
-
|
400
|
+
# Create DF of subjects taken offstudy (EOS) where endpoint==1.
|
401
|
+
# Keep the last record for the subject by fbg_datetime.
|
340
402
|
df1 = self.endpoint_df.copy()
|
341
|
-
df1 = df1[
|
403
|
+
df1 = df1[
|
404
|
+
(df1["endpoint_type"].isin([CASE_EOS, CASE_OGTT])) & (df1["endpoint"] == 1)
|
405
|
+
]
|
342
406
|
df1 = df1.sort_values(["subject_identifier", "fbg_datetime"])
|
343
407
|
df1 = df1.reset_index(drop=True)
|
344
408
|
df1 = df1.set_index(["subject_identifier"])
|
345
409
|
df1 = df1[~df1.index.duplicated(keep="last")]
|
346
410
|
df1 = df1.reset_index(drop=False)
|
347
411
|
|
412
|
+
# Create DF of subjects still on-study where endpoint==1.
|
413
|
+
# Keep the first record for the subject by fbg_datetime.
|
348
414
|
df2 = self.endpoint_df.copy()
|
349
|
-
df2 = df2[
|
415
|
+
df2 = df2[
|
416
|
+
~(df2["endpoint_type"].isin([CASE_EOS, CASE_OGTT])) & (df2["endpoint"] == 1)
|
417
|
+
]
|
350
418
|
df2 = df2.sort_values(["subject_identifier", "fbg_datetime"])
|
351
419
|
df2 = df2.reset_index(drop=True)
|
352
420
|
df2 = df2.set_index(["subject_identifier"])
|
353
421
|
df2 = df2[~df2.index.duplicated(keep="first")]
|
354
422
|
df2 = df2.reset_index(drop=False)
|
355
423
|
|
424
|
+
# create new DF with ONE row per subject for those that reached
|
425
|
+
# the endpoint (endpoint=1) by merging two DFs above.
|
356
426
|
self.endpoint_only_df = pd.concat([df1, df2])
|
357
427
|
self.endpoint_only_df = self.endpoint_only_df.reset_index(drop=True)
|
358
|
-
# print(f"After dedup = {len(self.endpoint_df)}")
|
359
428
|
|
360
429
|
self.df = pd.merge(
|
361
430
|
self.df,
|
@@ -367,52 +436,8 @@ class GlucoseEndpointsByDate:
|
|
367
436
|
self.df = self.df.sort_values(by=["subject_identifier", "fbg_datetime"])
|
368
437
|
self.df = self.df.reset_index(drop=True)
|
369
438
|
|
370
|
-
def summarize(
|
371
|
-
self,
|
372
|
-
fasting: str | list | None = None,
|
373
|
-
interval_in_days_min: int | None = None,
|
374
|
-
):
|
375
|
-
days_min = interval_in_days_min or 7
|
376
|
-
|
377
|
-
fasting = fasting or [YES, NO, pd.NA]
|
378
|
-
fasting = fasting if type(fasting) in [list, tuple] else [fasting]
|
379
|
-
|
380
|
-
endpoint_df = self.endpoint_df.copy()
|
381
|
-
|
382
|
-
# endpoint by eos with dm subjects
|
383
|
-
df7 = endpoint_df[
|
384
|
-
(endpoint_df["endpoint_type"] == CASE_EOS) & (endpoint_df["endpoint"] == 1)
|
385
|
-
]
|
386
|
-
df7.reset_index(drop=True, inplace=True)
|
387
|
-
# endpoint by glucose subjects
|
388
|
-
df = endpoint_df[
|
389
|
-
(endpoint_df["endpoint_type"] != CASE_EOS)
|
390
|
-
& (endpoint_df["endpoint"] == 1)
|
391
|
-
& (endpoint_df["fasting"].isin(fasting))
|
392
|
-
& (
|
393
|
-
(endpoint_df["interval_in_days"] >= days_min)
|
394
|
-
| (endpoint_df["interval_in_days"].isna())
|
395
|
-
)
|
396
|
-
]
|
397
|
-
df.reset_index(drop=True, inplace=True)
|
398
|
-
df = pd.concat([df, df7])
|
399
|
-
df.reset_index(drop=True, inplace=True)
|
400
|
-
df_counts = df[["endpoint_type", "endpoint_label"]].value_counts().to_frame()
|
401
|
-
df_counts.sort_values(by=["endpoint_type"], inplace=True)
|
402
|
-
df_counts.reset_index(inplace=True)
|
403
|
-
|
404
|
-
sums = {
|
405
|
-
"endpoint_type": [np.nan],
|
406
|
-
"endpoint_label": ["Total"],
|
407
|
-
"count": [
|
408
|
-
df_counts["count"].sum(),
|
409
|
-
],
|
410
|
-
}
|
411
|
-
sums_df = pd.DataFrame.from_dict(sums)
|
412
|
-
df_counts = pd.concat([df_counts, sums_df], ignore_index=True)
|
413
|
-
return df_counts
|
414
|
-
|
415
439
|
def to_model(self, model: str | None = None, subject_identifiers: list[str] | None = None):
|
440
|
+
"""Write endpoint_only_df to the Endpoints model"""
|
416
441
|
df = self.endpoint_only_df
|
417
442
|
model = model or "meta_reports.endpoints"
|
418
443
|
now = get_utcnow()
|
@@ -49,18 +49,17 @@ def get_empty_endpoint_df() -> pd.DataFrame:
|
|
49
49
|
return endpoint_df
|
50
50
|
|
51
51
|
|
52
|
-
def get_unique_visit_codes(
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
return codes
|
52
|
+
def get_unique_visit_codes(df: pd.DataFrame) -> pd.DataFrame:
|
53
|
+
stats_df = df[df["visit_code"] % 1 == 0]["visit_code"].value_counts().to_frame()
|
54
|
+
stats_df = stats_df.reset_index()
|
55
|
+
stats_df["visit_code"] = stats_df["visit_code"].astype(float)
|
56
|
+
stats_df = stats_df.sort_values(["visit_code"])
|
57
|
+
stats_df = stats_df.reset_index(drop=True)
|
58
|
+
return stats_df
|
60
59
|
|
61
60
|
|
62
|
-
def get_unique_subject_identifiers(
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
return
|
61
|
+
def get_unique_subject_identifiers(df: pd.DataFrame) -> pd.DataFrame:
|
62
|
+
values_df = pd.DataFrame(df["subject_identifier"].unique(), columns=["subject_identifier"])
|
63
|
+
values_df = values_df.sort_values(["subject_identifier"])
|
64
|
+
values_df = values_df.reset_index()
|
65
|
+
return values_df
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: meta-edc
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.25
|
4
4
|
Summary: META Trial EDC (http://www.isrctn.com/ISRCTN76157257)
|
5
5
|
Home-page: https://github.com/meta-trial/meta-edc
|
6
6
|
Author: Erik van Widenfelt
|
@@ -19,7 +19,7 @@ Requires-Python: >=3.12
|
|
19
19
|
Description-Content-Type: text/x-rst
|
20
20
|
License-File: LICENSE
|
21
21
|
License-File: AUTHORS
|
22
|
-
Requires-Dist: edc ==0.6.
|
22
|
+
Requires-Dist: edc ==0.6.3
|
23
23
|
Requires-Dist: edc-microscopy
|
24
24
|
Requires-Dist: beautifulsoup4
|
25
25
|
Requires-Dist: edc-analytics
|