meta-edc 0.3.24__py3-none-any.whl → 0.3.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. meta_analytics/dataframes/glucose_endpoints/constants.py +8 -6
  2. meta_analytics/dataframes/glucose_endpoints/glucose_endpoints_by_date.py +232 -207
  3. meta_analytics/dataframes/glucose_endpoints/utils.py +12 -13
  4. {meta_edc-0.3.24.dist-info → meta_edc-0.3.25.dist-info}/METADATA +2 -2
  5. {meta_edc-0.3.24.dist-info → meta_edc-0.3.25.dist-info}/RECORD +26 -18
  6. meta_reports/admin/__init__.py +2 -1
  7. meta_reports/admin/endpoints_admin.py +5 -112
  8. meta_reports/admin/endpoints_all_admin.py +13 -0
  9. meta_reports/admin/modeladmin_mixins.py +116 -0
  10. meta_reports/admin/unmanaged/glucose_summary_admin.py +49 -9
  11. meta_reports/migrations/0030_auto_20240822_1637.py +54 -0
  12. meta_reports/migrations/0031_endpointsproxy.py +25 -0
  13. meta_reports/migrations/0032_alter_endpointsproxy_options.py +21 -0
  14. meta_reports/migrations/0033_auto_20240823_0012.py +54 -0
  15. meta_reports/models/__init__.py +1 -0
  16. meta_reports/models/dbviews/glucose_summary/unmanaged_model.py +4 -0
  17. meta_reports/models/dbviews/glucose_summary/view_definition.py +30 -15
  18. meta_reports/models/endpoints_proxy.py +11 -0
  19. meta_reports/tasks.py +2 -0
  20. meta_reports/templates/meta_reports/columns/subject_identifier_column.html +1 -1
  21. meta_reports/templates/meta_reports/endpoints_all_change_list_note.html +12 -0
  22. meta_reports/templates/meta_reports/{endpoints_changelist_note.html → endpoints_change_list_note.html} +3 -1
  23. {meta_edc-0.3.24.dist-info → meta_edc-0.3.25.dist-info}/AUTHORS +0 -0
  24. {meta_edc-0.3.24.dist-info → meta_edc-0.3.25.dist-info}/LICENSE +0 -0
  25. {meta_edc-0.3.24.dist-info → meta_edc-0.3.25.dist-info}/WHEEL +0 -0
  26. {meta_edc-0.3.24.dist-info → meta_edc-0.3.25.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,10 @@
1
- OGTT_THRESHOLD_MET = "OGTT >= 11.1"
2
- EOS_DM_MET = "EOS - Patient developed diabetes"
3
- CASE_OGTT = 1
4
1
  CASE_EOS = 7
2
+ CASE_FBGS_WITH_FIRST_OGTT = 2
3
+ CASE_FBGS_WITH_SECOND_OGTT = 3
5
4
  CASE_FBG_ONLY = 4
5
+ CASE_OGTT = 1
6
+ EOS_DM_MET = "EOS - Patient developed diabetes"
7
+ OGTT_THRESHOLD_MET = "OGTT >= 11.1"
6
8
 
7
9
  endpoint_columns = [
8
10
  "subject_identifier",
@@ -24,8 +26,8 @@ endpoint_columns = [
24
26
 
25
27
  endpoint_cases = {
26
28
  CASE_OGTT: OGTT_THRESHOLD_MET,
27
- 2: "FBG >= 7 x 2, first OGTT<=11.1",
28
- 3: "FBG >= 7 x 2, second OGTT<=11.1",
29
- 4: "FBG >= 7 x 2, OGTT not considered",
29
+ CASE_FBGS_WITH_FIRST_OGTT: "FBG >= 7 x 2, first OGTT<=11.1",
30
+ CASE_FBGS_WITH_SECOND_OGTT: "FBG >= 7 x 2, second OGTT<=11.1",
31
+ CASE_FBG_ONLY: "FBG >= 7 x 2, OGTT not considered",
30
32
  CASE_EOS: EOS_DM_MET,
31
33
  }
@@ -2,19 +2,14 @@ import numpy as np
2
2
  import pandas as pd
3
3
  from django.apps import apps as django_apps
4
4
  from edc_constants.constants import NO, YES
5
- from edc_pdutils.dataframes import (
6
- get_crf,
7
- get_eos,
8
- get_subject_consent,
9
- get_subject_visit,
10
- )
5
+ from edc_pdutils.dataframes import get_crf, get_eos, get_subject_consent
11
6
  from edc_utils import get_utcnow
12
7
 
13
- from meta_reports.models import Endpoints
14
-
15
8
  from .constants import (
16
9
  CASE_EOS,
17
10
  CASE_FBG_ONLY,
11
+ CASE_FBGS_WITH_FIRST_OGTT,
12
+ CASE_FBGS_WITH_SECOND_OGTT,
18
13
  CASE_OGTT,
19
14
  endpoint_cases,
20
15
  endpoint_columns,
@@ -28,13 +23,31 @@ from .utils import (
28
23
  )
29
24
 
30
25
 
26
+ def normalize_date_columns(df: pd.DataFrame, cols: list[str] = None) -> pd.DataFrame:
27
+ """Normalize date columns by flooring"""
28
+ for col in cols:
29
+ if not df[col].empty:
30
+ df[col] = df[col].dt.floor("d")
31
+ else:
32
+ df[col] = pd.NaT
33
+ return df
34
+
35
+
36
+ def calculate_fasting_hrs(df: pd.DataFrame):
37
+ df.loc[(df["fasting"] == NO), "fasting_duration_delta"] = pd.NaT
38
+ if df.empty:
39
+ df["fasting_hrs"] = np.nan
40
+ else:
41
+ df["fasting_hrs"] = df["fasting_duration_delta"].dt.total_seconds() / 3600
42
+ return df
43
+
44
+
31
45
  class GlucoseEndpointsByDate:
32
46
 
33
47
  fbg_threshhold = 7.0
34
48
  ogtt_threshhold = 11.1
35
49
  endpoint_cls = EndpointByDate
36
50
  keep_cols = [
37
- "subject_visit_id",
38
51
  "fasting",
39
52
  "fasting_hrs",
40
53
  "fbg_value",
@@ -45,9 +58,6 @@ class GlucoseEndpointsByDate:
45
58
  "ogtt_datetime",
46
59
  "source",
47
60
  "report_datetime",
48
- ]
49
-
50
- visit_cols = [
51
61
  "subject_visit_id",
52
62
  "subject_identifier",
53
63
  "visit_code",
@@ -59,45 +69,31 @@ class GlucoseEndpointsByDate:
59
69
  def __init__(
60
70
  self, subject_identifiers: list[str] | None = None, case_list: list[int] | None = None
61
71
  ):
62
- self.subject_identifiers = subject_identifiers or []
63
- if len(self.subject_identifiers) == Endpoints.objects.all().count():
64
- self.subject_identifiers = []
65
- self.case_list = case_list or [CASE_OGTT, 2, 3, CASE_EOS]
66
- self.endpoint_cases = {k: v for k, v in endpoint_cases.items() if k in self.case_list}
72
+ self._glucose_fbg_df = pd.DataFrame()
73
+ self._glucose_fbg_ogtt_df = pd.DataFrame()
67
74
  self.endpoint_only_df = pd.DataFrame()
68
75
 
69
- self.fbg_only_df = self.get_fbg_only_df()
70
-
71
- self.df = get_crf(
72
- model="meta_subject.glucose",
73
- subject_identifiers=self.subject_identifiers,
74
- )
75
- self.df["source"] = "meta_subject.glucose"
76
-
77
- self.calculate_fasting_hrs()
78
-
79
- self.fbg_only_df = self.fbg_only_df[
80
- [col for col in self.keep_cols if not col.startswith("ogtt")]
76
+ self.subject_identifiers = subject_identifiers or []
77
+ self.case_list = case_list or [
78
+ CASE_OGTT,
79
+ CASE_FBGS_WITH_FIRST_OGTT,
80
+ CASE_FBGS_WITH_SECOND_OGTT,
81
+ CASE_EOS,
81
82
  ]
82
- self.df = self.df[self.keep_cols]
83
- self.df.reset_index(drop=True)
84
- self.df = self.df.copy()
85
-
86
- self.normalize_dates()
83
+ self.endpoint_cases = {k: v for k, v in endpoint_cases.items() if k in self.case_list}
87
84
 
88
- # same shape but fbg_only_df ogtt columns are null
85
+ # merge two model DFs
89
86
  self.df = pd.merge(
90
- self.df,
91
- self.fbg_only_df,
87
+ self.glucose_fbg_ogtt_df,
88
+ self.glucose_fbg_df,
92
89
  on=["subject_visit_id", "fbg_datetime", "fbg_value"],
93
90
  how="outer",
94
91
  indicator=True,
95
92
  suffixes=("", "2"),
96
93
  )
97
- self.df.reset_index(drop=True, inplace=True)
98
- self.df_merged = self.df.copy()
94
+ self.df = self.df.reset_index(drop=True)
99
95
 
100
- # right_only
96
+ # pivot right_only cols
101
97
  cols = {
102
98
  "fasting": None,
103
99
  "fasting_hrs": np.nan,
@@ -107,77 +103,25 @@ class GlucoseEndpointsByDate:
107
103
  }
108
104
  for col, null_value in cols.items():
109
105
  self.df.loc[self.df["_merge"] == "right_only", col] = self.df[f"{col}2"]
106
+ cols = [col for col in self.df.columns if col.endswith("2")]
107
+ cols.append("_merge")
108
+ self.df = self.df.drop(columns=cols)
109
+ self.df = self.df.reset_index(drop=True)
110
110
 
111
- df_subject_visit = get_subject_visit(
112
- "meta_subject.subjectvisit", subject_identifiers=subject_identifiers
113
- )
114
- self.df = pd.merge(
115
- df_subject_visit[self.visit_cols], self.df, on="subject_visit_id", how="left"
116
- )
117
- self.df = self.df.sort_values(by=["subject_identifier", "fbg_datetime"])
118
- self.df.reset_index(drop=True, inplace=True)
119
-
120
- df_consent = get_subject_consent(
121
- "meta_consent.subjectconsent", subject_identifiers=subject_identifiers
122
- )
123
- self.df = pd.merge(self.df, df_consent, on="subject_identifier", how="left")
124
- self.df = self.df.sort_values(by=["subject_identifier", "fbg_datetime"])
125
- self.df.reset_index(drop=True, inplace=True)
126
-
127
- df_eos = get_eos("meta_prn.endofstudy", subject_identifiers=subject_identifiers)
128
- self.df = pd.merge(self.df, df_eos, on="subject_identifier", how="left")
129
- self.df = self.df.sort_values(by=["subject_identifier", "fbg_datetime"])
130
- self.df.reset_index(drop=True, inplace=True)
131
-
132
- if not self.df.loc[self.df["visit_datetime"].notna()].empty:
133
- self.df["visit_days"] = (
134
- self.df["baseline_datetime"].rsub(self.df["visit_datetime"]).dt.days
135
- )
136
- if not self.df.loc[self.df["fbg_datetime"].notna()].empty:
137
- self.df["fgb_days"] = (
138
- self.df.loc[self.df["fbg_datetime"].notna()]["baseline_datetime"]
139
- .rsub(self.df["fbg_datetime"])
140
- .dt.days
141
- )
142
- else:
143
- self.df["fgb_days"] = np.nan
144
- if not self.df.loc[self.df["ogtt_datetime"].notna()].empty:
145
- self.df["ogtt_days"] = (
146
- self.df.loc[self.df["ogtt_datetime"].notna()]["baseline_datetime"]
147
- .rsub(self.df["ogtt_datetime"])
148
- .dt.days
149
- )
150
- else:
151
- self.df["ogtt_days"] = np.nan
152
-
153
- if self.df.empty:
154
- self.df["visit_days"] = np.nan
155
- self.df["fgb_days"] = np.nan
156
- self.df["ogtt_days"] = np.nan
157
- self.df["test"] = np.nan
158
- else:
159
- self.df["visit_days"] = pd.to_numeric(self.df["visit_days"], downcast="integer")
160
- self.df["fgb_days"] = pd.to_numeric(self.df["fgb_days"], downcast="integer")
161
- self.df["ogtt_days"] = pd.to_numeric(self.df["ogtt_days"], downcast="integer")
162
-
163
- # label rows by type of glu tests (ones with value)
164
- self.df["test"] = self.df.apply(get_test_string, axis=1)
111
+ self.merge_with_consent()
112
+ self.merge_with_eos()
113
+ self.add_calculated_days_from_baseline_to_event_columns()
165
114
 
166
- self.df = self.df.sort_values(by=["subject_identifier", "visit_code"])
115
+ # label rows by type of glu tests (ones with value)
116
+ self.df["test"] = self.df.apply(get_test_string, axis=1)
167
117
  self.df = self.df.reset_index(drop=True)
168
118
 
169
- self.df = self.df[
170
- self.df["offstudy_reason"]
171
- != (
172
- "Patient fulfilled late exclusion criteria (due to abnormal blood "
173
- "values or raised blood pressure at enrolment"
174
- )
175
- ]
119
+ self.visit_codes_df = get_unique_visit_codes(self.df)
120
+ self.subject_identifiers_df = get_unique_subject_identifiers(self.df)
176
121
 
177
122
  self.df = self.df.sort_values(by=["subject_identifier", "fbg_datetime"])
178
123
  self.df = self.df.reset_index(drop=True)
179
- self.visit_codes = get_unique_visit_codes(self.df)
180
- self.subject_identifiers_df = get_unique_subject_identifiers(self.df)
124
+
181
125
  self.working_df = self.df.copy()
182
126
  self.working_df["endpoint"] = 0
183
127
  self.endpoint_df = get_empty_endpoint_df()
@@ -186,16 +130,19 @@ class GlucoseEndpointsByDate:
186
130
  self.pre_check_endpoint()
187
131
  for index, row in self.subject_identifiers_df.iterrows():
188
132
  subject_df = self.get_subject_df(row["subject_identifier"])
189
- subject_df = self.check_endpoint_by_fbg_for_subject(subject_df, case_list=[2, 3])
133
+ subject_df = self.check_endpoint_by_fbg_for_subject(
134
+ subject_df, case_list=[CASE_FBGS_WITH_FIRST_OGTT, CASE_FBGS_WITH_SECOND_OGTT]
135
+ )
190
136
  if len(subject_df.loc[subject_df["endpoint"] == 1]) == 1:
191
137
  self.append_subject_to_endpoint_df(subject_df)
192
138
  self.remove_subject_from_working_df(row)
193
139
 
194
140
  if CASE_FBG_ONLY in self.endpoint_cases:
195
- # go back and rerun for case 5
196
141
  for index, row in self.subject_identifiers_df.iterrows():
197
142
  subject_df = self.get_subject_df(row["subject_identifier"])
198
- subject_df = self.check_endpoint_by_fbg_for_subject(subject_df, case_list=[4])
143
+ subject_df = self.check_endpoint_by_fbg_for_subject(
144
+ subject_df, case_list=[CASE_FBG_ONLY]
145
+ )
199
146
  if len(subject_df.loc[subject_df["endpoint"] == 1]) == 1:
200
147
  self.append_subject_to_endpoint_df(subject_df)
201
148
  self.remove_subject_from_working_df(row)
@@ -203,60 +150,161 @@ class GlucoseEndpointsByDate:
203
150
  self.post_check_endpoint()
204
151
  self.merge_with_final_endpoints()
205
152
 
153
+ @property
154
+ def glucose_fbg_df(self) -> pd.DataFrame:
155
+ """Returns a prepared Dataframe of CRF
156
+ meta_subject.glucosefbg.
157
+
158
+ Note: meta_subject.glucosefbg has only FBG measures.
159
+ """
160
+ if self._glucose_fbg_df.empty:
161
+ df = get_crf(
162
+ model="meta_subject.glucosefbg",
163
+ subject_identifiers=self.subject_identifiers,
164
+ subject_visit_model="meta_subject.subjectvisit",
165
+ )
166
+ df["source"] = "meta_subject.glucosefbg"
167
+ df.rename(columns={"fbg_fasting": "fasting"}, inplace=True)
168
+ df.loc[(df["fasting"] == "fasting"), "fasting"] = YES
169
+ df.loc[(df["fasting"] == "non_fasting"), "fasting"] = NO
170
+ df = calculate_fasting_hrs(df)
171
+ df = df[[col for col in self.keep_cols if not col.startswith("ogtt")]]
172
+ df = df.reset_index(drop=True)
173
+ df = normalize_date_columns(
174
+ df, cols=["fbg_datetime", "report_datetime", "visit_datetime"]
175
+ )
176
+ self._glucose_fbg_df = df
177
+ return self._glucose_fbg_df
178
+
179
+ @property
180
+ def glucose_fbg_ogtt_df(self):
181
+ """Returns a prepared Dataframe of CRF meta_subject.glucose.
182
+
183
+ Note: meta_subject.glucose has FBG and OGTT measures.
184
+ """
185
+ if self._glucose_fbg_ogtt_df.empty:
186
+ df = get_crf(
187
+ model="meta_subject.glucose",
188
+ subject_identifiers=self.subject_identifiers,
189
+ subject_visit_model="meta_subject.subjectvisit",
190
+ )
191
+ df["source"] = "meta_subject.glucose"
192
+ df = calculate_fasting_hrs(df)
193
+ df = df[self.keep_cols]
194
+ df = df.reset_index(drop=True)
195
+ df = normalize_date_columns(
196
+ df, cols=["fbg_datetime", "ogtt_datetime", "report_datetime", "visit_datetime"]
197
+ )
198
+ self._glucose_fbg_ogtt_df = df
199
+ return self._glucose_fbg_ogtt_df
200
+
201
+ def merge_with_consent(self):
202
+ """Merge in consent DF."""
203
+ df_consent = get_subject_consent("meta_consent.subjectconsent")
204
+ self.df = pd.merge(self.df, df_consent, on="subject_identifier", how="left")
205
+ self.df = self.df.sort_values(by=["subject_identifier", "fbg_datetime"])
206
+ self.df = self.df.reset_index(drop=True)
207
+
208
+ def merge_with_eos(self):
209
+ """Merge in EoS DF.
210
+
211
+ Drops patients who were taken off study by late exclusion.
212
+ """
213
+ df_eos = get_eos("meta_prn.endofstudy")
214
+ df_eos = df_eos[
215
+ df_eos["offstudy_reason"]
216
+ != (
217
+ "Patient fulfilled late exclusion criteria (due to abnormal blood "
218
+ "values or raised blood pressure at enrolment"
219
+ )
220
+ ]
221
+ self.df = pd.merge(self.df, df_eos, on="subject_identifier", how="left")
222
+ self.df = self.df.sort_values(by=["subject_identifier", "fbg_datetime"])
223
+ self.df = self.df.reset_index(drop=True)
224
+
225
+ def add_calculated_days_from_baseline_to_event_columns(self):
226
+ """Add columns that calculate number of days from
227
+ baseline to visit, fbg, and ogtt.
228
+ """
229
+ self.df["visit_days"] = np.nan
230
+ self.df["fbg_days"] = np.nan
231
+ self.df["ogtt_days"] = np.nan
232
+ self.df["test"] = np.nan
233
+ self.df["visit_days"] = (
234
+ self.df["visit_datetime"] - self.df["baseline_datetime"]
235
+ ).dt.days
236
+ if not self.df["fbg_datetime"].empty:
237
+ self.df["fbg_days"] = (
238
+ self.df["fbg_datetime"] - self.df["baseline_datetime"]
239
+ ).dt.days
240
+ if not self.df["ogtt_datetime"].empty:
241
+ self.df["ogtt_days"] = (
242
+ self.df["ogtt_datetime"] - self.df["baseline_datetime"]
243
+ ).dt.days
244
+ self.df["visit_days"] = pd.to_numeric(self.df["visit_days"], downcast="integer")
245
+ self.df["fbg_days"] = pd.to_numeric(self.df["fbg_days"], downcast="integer")
246
+ self.df["ogtt_days"] = pd.to_numeric(self.df["ogtt_days"], downcast="integer")
247
+ self.df = self.df.reset_index(drop=True)
248
+
206
249
  def pre_check_endpoint(self):
207
- "Case 1: flag and remove all OGTT that met threshold"
208
- subjects_df = self.working_df.loc[
250
+ """Flag subjects that met endpoint by hitting the OGTT
251
+ threshold.
252
+
253
+ Add them to the endpoint_df and remove them from the
254
+ working_df.
255
+
256
+ Subject must have fasted at the timepoint.
257
+
258
+ The OGTT must have an FBG measure at the same timepoint.
259
+ The value of the FBG is not considered.
260
+
261
+ Most of these where taken off study for the OGTT. We are
262
+ using the OGTT as the reason/date instead of the offstudy
263
+ reason/date.
264
+
265
+ See `merge_with_final_endpoints` where we pick the date of
266
+ the first OGTT.
267
+ """
268
+ subject_endpoint_df = self.working_df.loc[
209
269
  (self.working_df["ogtt_value"] >= self.ogtt_threshhold)
270
+ & (self.working_df["fasting"] == YES)
210
271
  & (self.working_df["fbg_value"].notna())
211
272
  ].copy()
212
- subjects_df["endpoint"] = 1
213
- subjects_df["endpoint_label"] = self.endpoint_cases[CASE_OGTT]
214
- subjects_df["endpoint_type"] = CASE_OGTT
215
- subjects_df["interval_in_days"] = np.nan
216
- subjects_df = subjects_df.reset_index(drop=True)
217
- self.append_subject_to_endpoint_df(subjects_df[endpoint_columns])
218
- self.working_df = self.working_df.drop(
219
- index=self.working_df.loc[
220
- self.working_df["subject_identifier"].isin(subjects_df["subject_identifier"])
221
- ].index
222
- )
223
-
224
- def get_fbg_only_df(self) -> pd.DataFrame:
225
- fbg_only_df = get_crf(
226
- model="meta_subject.glucosefbg", subject_identifiers=self.subject_identifiers
227
- )
228
- fbg_only_df["source"] = "meta_subject.glucosefbg"
229
- fbg_only_df.rename(
230
- columns={"fbg_fasting": "fasting", "subject_visit": "subject_visit_id"},
231
- inplace=True,
232
- )
233
- fbg_only_df.loc[(fbg_only_df["fasting"] == "fasting"), "fasting"] = YES
234
- fbg_only_df.loc[(fbg_only_df["fasting"] == "non_fasting"), "fasting"] = NO
235
- return fbg_only_df
236
-
237
- def normalize_dates(self):
238
- """Normalize dates"""
239
- for col in ["fbg_datetime", "report_datetime"]:
240
- if not self.fbg_only_df[col].empty:
241
- self.fbg_only_df[col] = self.fbg_only_df[col].dt.floor("d")
242
- if not self.df[col].empty:
243
- self.df[col] = self.df[col].dt.floor("d")
244
- if not self.df["ogtt_datetime"].empty:
245
- self.df["ogtt_datetime"] = self.df["ogtt_datetime"].dt.floor("d")
246
- else:
247
- self.df["ogtt_datetime"] = pd.NaT
248
-
249
- def calculate_fasting_hrs(self):
250
- for dftmp in [self.fbg_only_df, self.df]:
251
- dftmp.loc[(dftmp["fasting"] == NO), "fasting_duration_delta"] = pd.NaT
252
- if dftmp.empty:
253
- dftmp["fasting_hrs"] = np.nan
254
- else:
255
- dftmp["fasting_hrs"] = (
256
- dftmp["fasting_duration_delta"].dt.total_seconds() / 3600
273
+ if not subject_endpoint_df.empty:
274
+ # flag the selected endpoint rows as endpoints
275
+ subject_endpoint_df["endpoint"] = 1
276
+ subject_endpoint_df["endpoint_label"] = self.endpoint_cases[CASE_OGTT]
277
+ subject_endpoint_df["endpoint_type"] = CASE_OGTT
278
+ subject_endpoint_df["interval_in_days"] = np.nan
279
+
280
+ # add back the others rows for these subjects
281
+ subjects_df = self.working_df.loc[
282
+ (
283
+ self.working_df["subject_identifier"].isin(
284
+ subject_endpoint_df["subject_identifier"]
285
+ )
286
+ & ~(
287
+ self.working_df["fbg_datetime"].isin(
288
+ subject_endpoint_df["fbg_datetime"]
289
+ )
290
+ )
257
291
  )
292
+ ].copy()
293
+ subjects_df = subjects_df.reset_index(drop=True)
294
+ subjects_df["endpoint"] = np.nan
295
+ subjects_df["endpoint_label"] = None
296
+ subjects_df["endpoint_type"] = None
297
+ subjects_df["interval_in_days"] = np.nan
298
+ subjects_df = pd.concat([subjects_df, subject_endpoint_df])
299
+ subjects_df = subjects_df.reset_index(drop=True)
300
+
301
+ self.append_subject_to_endpoint_df(subjects_df[endpoint_columns])
302
+ self.remove_subjects_from_working_df(subjects_df)
258
303
 
259
304
  def append_subject_to_endpoint_df(self, subject_df: pd.DataFrame) -> None:
305
+ """Appends all rows of a subject, or subjects, to the
306
+ Endpoints DF.
307
+ """
260
308
  if self.endpoint_df.empty:
261
309
  self.endpoint_df = subject_df.copy()
262
310
  else:
@@ -267,6 +315,9 @@ class GlucoseEndpointsByDate:
267
315
  self.endpoint_df = self.endpoint_df.reset_index(drop=True)
268
316
 
269
317
  def remove_subject_from_working_df(self, row: pd.Series) -> None:
318
+ """Removes one subject from the working DF given a Series with
319
+ value `subject_identifier`.
320
+ """
270
321
  self.working_df = self.working_df.drop(
271
322
  index=self.working_df[
272
323
  self.working_df["subject_identifier"] == row["subject_identifier"]
@@ -274,6 +325,17 @@ class GlucoseEndpointsByDate:
274
325
  )
275
326
  self.working_df = self.working_df.reset_index(drop=True)
276
327
 
328
+ def remove_subjects_from_working_df(self, rows: pd.DataFrame) -> None:
329
+ """Removes subjects from the working DF given a DF with
330
+ column `subject_identifier`.
331
+ """
332
+ self.working_df = self.working_df.drop(
333
+ index=self.working_df.loc[
334
+ self.working_df["subject_identifier"].isin(rows["subject_identifier"])
335
+ ].index
336
+ )
337
+ self.working_df = self.working_df.reset_index(drop=True)
338
+
277
339
  def get_subject_df(self, subject_identifier: str) -> pd.DataFrame:
278
340
  subject_df = self.working_df.loc[
279
341
  self.working_df["subject_identifier"] == subject_identifier
@@ -286,7 +348,7 @@ class GlucoseEndpointsByDate:
286
348
  subject_df = subject_df.reset_index(drop=True)
287
349
  subject_df = subject_df[endpoint_columns]
288
350
  subject_df = subject_df.merge(
289
- self.visit_codes,
351
+ self.visit_codes_df,
290
352
  on="visit_code",
291
353
  how="outer",
292
354
  indicator=False,
@@ -317,7 +379,7 @@ class GlucoseEndpointsByDate:
317
379
  df_eos["endpoint_label"] = self.endpoint_cases[CASE_EOS]
318
380
  df_eos["endpoint_type"] = CASE_EOS
319
381
  df_eos["interval_in_days"] = np.nan
320
- df_eos.reset_index(drop=True, inplace=True)
382
+ df_eos = df_eos.reset_index(drop=True)
321
383
  self.append_subject_to_endpoint_df(df_eos[endpoint_columns])
322
384
  self.working_df = self.working_df.drop(
323
385
  index=self.working_df.loc[
@@ -326,7 +388,7 @@ class GlucoseEndpointsByDate:
326
388
  )
327
389
 
328
390
  def merge_with_final_endpoints(self):
329
- # merge endpoint_df with original df
391
+ """Merge endpoint_df with original df"""
330
392
  if self.endpoint_df.empty:
331
393
  self.df = self.df[~(self.df["subject_identifier"].isin(self.subject_identifiers))]
332
394
  else:
@@ -335,27 +397,34 @@ class GlucoseEndpointsByDate:
335
397
  self.endpoint_df["fbg_datetime"] - self.endpoint_df["baseline_datetime"]
336
398
  ).dt.days
337
399
 
338
- # print(f"Before dedup = {len(self.endpoint_df)}")
339
-
400
+ # Create DF of subjects taken offstudy (EOS) where endpoint==1.
401
+ # Keep the last record for the subject by fbg_datetime.
340
402
  df1 = self.endpoint_df.copy()
341
- df1 = df1[(df1["endpoint_type"] == CASE_EOS) & (df1["endpoint"] == 1)]
403
+ df1 = df1[
404
+ (df1["endpoint_type"].isin([CASE_EOS, CASE_OGTT])) & (df1["endpoint"] == 1)
405
+ ]
342
406
  df1 = df1.sort_values(["subject_identifier", "fbg_datetime"])
343
407
  df1 = df1.reset_index(drop=True)
344
408
  df1 = df1.set_index(["subject_identifier"])
345
409
  df1 = df1[~df1.index.duplicated(keep="last")]
346
410
  df1 = df1.reset_index(drop=False)
347
411
 
412
+ # Create DF of subjects still on-study where endpoint==1.
413
+ # Keep the first record for the subject by fbg_datetime.
348
414
  df2 = self.endpoint_df.copy()
349
- df2 = df2[(df2["endpoint_type"] != CASE_EOS) & (df2["endpoint"] == 1)]
415
+ df2 = df2[
416
+ ~(df2["endpoint_type"].isin([CASE_EOS, CASE_OGTT])) & (df2["endpoint"] == 1)
417
+ ]
350
418
  df2 = df2.sort_values(["subject_identifier", "fbg_datetime"])
351
419
  df2 = df2.reset_index(drop=True)
352
420
  df2 = df2.set_index(["subject_identifier"])
353
421
  df2 = df2[~df2.index.duplicated(keep="first")]
354
422
  df2 = df2.reset_index(drop=False)
355
423
 
424
+ # create new DF with ONE row per subject for those that reached
425
+ # the endpoint (endpoint=1) by merging two DFs above.
356
426
  self.endpoint_only_df = pd.concat([df1, df2])
357
427
  self.endpoint_only_df = self.endpoint_only_df.reset_index(drop=True)
358
- # print(f"After dedup = {len(self.endpoint_df)}")
359
428
 
360
429
  self.df = pd.merge(
361
430
  self.df,
@@ -367,52 +436,8 @@ class GlucoseEndpointsByDate:
367
436
  self.df = self.df.sort_values(by=["subject_identifier", "fbg_datetime"])
368
437
  self.df = self.df.reset_index(drop=True)
369
438
 
370
- def summarize(
371
- self,
372
- fasting: str | list | None = None,
373
- interval_in_days_min: int | None = None,
374
- ):
375
- days_min = interval_in_days_min or 7
376
-
377
- fasting = fasting or [YES, NO, pd.NA]
378
- fasting = fasting if type(fasting) in [list, tuple] else [fasting]
379
-
380
- endpoint_df = self.endpoint_df.copy()
381
-
382
- # endpoint by eos with dm subjects
383
- df7 = endpoint_df[
384
- (endpoint_df["endpoint_type"] == CASE_EOS) & (endpoint_df["endpoint"] == 1)
385
- ]
386
- df7.reset_index(drop=True, inplace=True)
387
- # endpoint by glucose subjects
388
- df = endpoint_df[
389
- (endpoint_df["endpoint_type"] != CASE_EOS)
390
- & (endpoint_df["endpoint"] == 1)
391
- & (endpoint_df["fasting"].isin(fasting))
392
- & (
393
- (endpoint_df["interval_in_days"] >= days_min)
394
- | (endpoint_df["interval_in_days"].isna())
395
- )
396
- ]
397
- df.reset_index(drop=True, inplace=True)
398
- df = pd.concat([df, df7])
399
- df.reset_index(drop=True, inplace=True)
400
- df_counts = df[["endpoint_type", "endpoint_label"]].value_counts().to_frame()
401
- df_counts.sort_values(by=["endpoint_type"], inplace=True)
402
- df_counts.reset_index(inplace=True)
403
-
404
- sums = {
405
- "endpoint_type": [np.nan],
406
- "endpoint_label": ["Total"],
407
- "count": [
408
- df_counts["count"].sum(),
409
- ],
410
- }
411
- sums_df = pd.DataFrame.from_dict(sums)
412
- df_counts = pd.concat([df_counts, sums_df], ignore_index=True)
413
- return df_counts
414
-
415
439
  def to_model(self, model: str | None = None, subject_identifiers: list[str] | None = None):
440
+ """Write endpoint_only_df to the Endpoints model"""
416
441
  df = self.endpoint_only_df
417
442
  model = model or "meta_reports.endpoints"
418
443
  now = get_utcnow()
@@ -49,18 +49,17 @@ def get_empty_endpoint_df() -> pd.DataFrame:
49
49
  return endpoint_df
50
50
 
51
51
 
52
- def get_unique_visit_codes(source_df: pd.DataFrame) -> pd.DataFrame:
53
- codes = source_df[source_df["visit_code"] % 1 == 0]["visit_code"].value_counts().to_frame()
54
- codes = codes.reset_index()
55
- codes["visit_code"] = codes["visit_code"].astype(float)
56
- codes = codes.sort_values(["visit_code"])
57
- # visit_codes = visit_codes[visit_codes["visit_code"] > self.after_visit_code]
58
- codes = codes.reset_index(drop=True)
59
- return codes
52
+ def get_unique_visit_codes(df: pd.DataFrame) -> pd.DataFrame:
53
+ stats_df = df[df["visit_code"] % 1 == 0]["visit_code"].value_counts().to_frame()
54
+ stats_df = stats_df.reset_index()
55
+ stats_df["visit_code"] = stats_df["visit_code"].astype(float)
56
+ stats_df = stats_df.sort_values(["visit_code"])
57
+ stats_df = stats_df.reset_index(drop=True)
58
+ return stats_df
60
59
 
61
60
 
62
- def get_unique_subject_identifiers(source_df) -> pd.DataFrame:
63
- df = pd.DataFrame(source_df["subject_identifier"].unique(), columns=["subject_identifier"])
64
- df = df.sort_values(["subject_identifier"])
65
- df = df.reset_index()
66
- return df
61
+ def get_unique_subject_identifiers(df: pd.DataFrame) -> pd.DataFrame:
62
+ values_df = pd.DataFrame(df["subject_identifier"].unique(), columns=["subject_identifier"])
63
+ values_df = values_df.sort_values(["subject_identifier"])
64
+ values_df = values_df.reset_index()
65
+ return values_df
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: meta-edc
3
- Version: 0.3.24
3
+ Version: 0.3.25
4
4
  Summary: META Trial EDC (http://www.isrctn.com/ISRCTN76157257)
5
5
  Home-page: https://github.com/meta-trial/meta-edc
6
6
  Author: Erik van Widenfelt
@@ -19,7 +19,7 @@ Requires-Python: >=3.12
19
19
  Description-Content-Type: text/x-rst
20
20
  License-File: LICENSE
21
21
  License-File: AUTHORS
22
- Requires-Dist: edc ==0.6.2
22
+ Requires-Dist: edc ==0.6.3
23
23
  Requires-Dist: edc-microscopy
24
24
  Requires-Dist: beautifulsoup4
25
25
  Requires-Dist: edc-analytics