meta-edc 1.0.6__py3-none-any.whl → 1.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. meta_analytics/dataframes/__init__.py +3 -0
  2. meta_analytics/dataframes/constants.py +1 -1
  3. meta_analytics/dataframes/enrolled/__init__.py +0 -1
  4. meta_analytics/dataframes/get_eos_df.py +15 -2
  5. meta_analytics/dataframes/get_glucose_df.py +149 -0
  6. meta_analytics/dataframes/get_glucose_fbg_df.py +27 -0
  7. meta_analytics/dataframes/get_glucose_fbg_ogtt_df.py +22 -0
  8. meta_analytics/dataframes/glucose_endpoints/endpoint_by_date.py +106 -120
  9. meta_analytics/dataframes/glucose_endpoints/glucose_endpoints_by_date.py +36 -227
  10. meta_analytics/dataframes/utils.py +18 -4
  11. meta_analytics/notebooks/hiv_regimens.ipynb +425 -0
  12. meta_analytics/notebooks/monitoring_report.ipynb +1561 -0
  13. meta_analytics/notebooks/pharmacy.ipynb +971 -0
  14. meta_analytics/utils.py +81 -0
  15. {meta_edc-1.0.6.dist-info → meta_edc-1.0.7.dist-info}/METADATA +4 -3
  16. {meta_edc-1.0.6.dist-info → meta_edc-1.0.7.dist-info}/RECORD +32 -18
  17. {meta_edc-1.0.6.dist-info → meta_edc-1.0.7.dist-info}/WHEEL +1 -1
  18. meta_edc-1.0.7.dist-info/licenses/AUTHORS.rst +8 -0
  19. meta_reports/migrations/0054_auto_20250422_2003.py +81 -0
  20. meta_reports/migrations/0055_alter_glucosesummary_table.py +17 -0
  21. meta_reports/migrations/0056_auto_20250422_2214.py +54 -0
  22. meta_reports/migrations/0057_auto_20250422_2224.py +54 -0
  23. meta_reports/migrations/0058_auto_20250422_2232.py +54 -0
  24. meta_reports/models/dbviews/glucose_summary/unmanaged_model.py +13 -1
  25. meta_reports/models/dbviews/glucose_summary/view_definition.py +8 -5
  26. meta_subject/form_validators/glucose_form_validator.py +16 -1
  27. meta_subject/forms/study_medication_form.py +5 -3
  28. meta_subject/migrations/0221_auto_20250402_1913.py +42 -0
  29. meta_subject/migrations/0222_alter_historicalstudymedication_stock_codes_and_more.py +46 -0
  30. meta_analytics/dataframes/enrolled/get_glucose_df.py +0 -122
  31. /meta_edc-1.0.6.dist-info/AUTHORS → /meta_analytics/dataframes/glucose_endpoints/utils.py +0 -0
  32. {meta_edc-1.0.6.dist-info → meta_edc-1.0.7.dist-info/licenses}/LICENSE +0 -0
  33. {meta_edc-1.0.6.dist-info → meta_edc-1.0.7.dist-info}/top_level.txt +0 -0
@@ -1,44 +1,26 @@
1
1
  import numpy as np
2
2
  import pandas as pd
3
3
  from django.apps import apps as django_apps
4
- from edc_constants.constants import NO, YES
5
- from edc_pdutils.dataframes import (
6
- get_crf,
7
- get_eos,
8
- get_subject_consent,
9
- get_subject_visit,
10
- )
4
+ from edc_constants.constants import YES
11
5
  from edc_utils import get_utcnow
12
6
 
13
7
  from ..constants import (
14
8
  CASE_EOS,
15
- CASE_FBG_ONLY,
16
9
  CASE_FBGS_WITH_FIRST_OGTT,
17
10
  CASE_FBGS_WITH_SECOND_OGTT,
18
11
  CASE_OGTT,
19
12
  endpoint_cases,
20
13
  endpoint_columns,
21
14
  )
15
+ from ..get_glucose_df import get_glucose_df
22
16
  from ..utils import (
23
17
  get_empty_endpoint_df,
24
18
  get_test_string,
25
19
  get_unique_subject_identifiers,
26
- get_unique_visit_codes,
27
20
  )
28
21
  from .endpoint_by_date import EndpointByDate
29
22
 
30
23
 
31
- def calculate_fasting_hrs(df: pd.DataFrame):
32
- df.loc[(df["fasting"] == NO), "fasting_duration_delta"] = pd.NaT
33
- if df.empty:
34
- df["fasting_hrs"] = np.nan
35
- else:
36
- df["fasting_hrs"] = df["fasting_duration_delta"].apply(
37
- lambda s: np.nan if pd.isna(s) else s.total_seconds() / 3600
38
- )
39
- return df
40
-
41
-
42
24
  class GlucoseEndpointsByDate:
43
25
  """
44
26
  Usage:
@@ -61,7 +43,7 @@ class GlucoseEndpointsByDate:
61
43
  ogtt_threshhold = 11.1
62
44
  endpoint_cls = EndpointByDate
63
45
  keep_cols = [
64
- "fasting",
46
+ "fasted",
65
47
  "fasting_hrs",
66
48
  "fbg_value",
67
49
  "fbg_units",
@@ -82,6 +64,7 @@ class GlucoseEndpointsByDate:
82
64
  def __init__(
83
65
  self, subject_identifiers: list[str] | None = None, case_list: list[int] | None = None
84
66
  ):
67
+
85
68
  self._glucose_fbg_df = pd.DataFrame()
86
69
  self._glucose_fbg_ogtt_df = pd.DataFrame()
87
70
  self.endpoint_only_df = pd.DataFrame()
@@ -95,213 +78,35 @@ class GlucoseEndpointsByDate:
95
78
  ]
96
79
  self.endpoint_cases = {k: v for k, v in endpoint_cases.items() if k in self.case_list}
97
80
 
98
- # merge two model DFs
99
- if self.glucose_fbg_ogtt_df.empty:
100
- self.df = self.glucose_fbg_df.copy()
101
- self.df[["ogtt_value", "ogtt_units"]] = np.nan
102
- self.df[["ogtt_datetime"]] = pd.NaT
103
- elif self.glucose_fbg_df.empty:
104
- self.df = self.glucose_fbg_ogtt_df.copy()
105
- else:
106
- self.df = self.glucose_fbg_ogtt_df.merge(
107
- self.glucose_fbg_df,
108
- on=["subject_visit_id"],
109
- how="outer",
110
- indicator=True,
111
- suffixes=("", "_y"),
112
- )
113
- # cast as ...
114
- for col in ["fasting_hrs", "fbg_value"]:
115
- self.df[col] = self.df[col].astype("float64")
116
- if f"{col}_y" in self.df.columns:
117
- self.df[f"{col}_y"] = self.df[f"{col}_y"].astype("float64")
118
- for col in ["fasting", "fbg_units", "source"]:
119
- self.df[col] = self.df[col].astype("object")
120
- if f"{col}_y" in self.df.columns:
121
- self.df[f"{col}_y"] = self.df[f"{col}_y"].astype("object")
122
- self.df = self.df.drop(
123
- columns=[col for col in self.df.columns if col.endswith("_y") or col == "_merge"]
124
- )
125
- self.df = self.df.reset_index(drop=True)
126
-
127
- # merge w/ subject_visit
128
- subject_visit_df = get_subject_visit(
129
- "meta_subject.subjectvisit", subject_identifiers=self.subject_identifiers
130
- )
131
- self.df = subject_visit_df.merge(
132
- self.df, on=["subject_visit_id"], how="left", suffixes=("", "_y")
133
- )
134
- self.df = self.df[[col for col in self.keep_cols]]
135
- self.df = self.df.reset_index(drop=True)
136
-
137
- # pivot right_only cols
138
- cols = [
139
- "fasting",
140
- "fasting_hrs",
141
- "fbg_value",
142
- "fbg_units",
143
- "fbg_datetime",
144
- "source",
145
- "report_datetime",
146
- ]
147
- for col in cols:
148
- if f"{col}_y" in self.df.columns and not self.df[f"{col}_y"].isnull().all():
149
- self.df.loc[
150
- (self.df["_merge"].isin(["both", "right_only"])) & (self.df[col].isna()),
151
- col,
152
- ] = self.df[f"{col}_y"]
153
- # if fbg_datetime still null, use visit datetime
154
- if self.df["fbg_datetime"].isnull().all():
155
- self["fbg_datetime"] = self.df["visit_datetime"]
156
- else:
157
- self.df.loc[(self.df["fbg_datetime"].isna()), "fbg_datetime"] = self.df[
158
- "visit_datetime"
159
- ]
160
- self.df = self.df.drop(
161
- columns=[col for col in self.df.columns if col.endswith("_y") or col == "_merge"]
162
- )
163
- self.df = self.df.reset_index(drop=True)
164
-
165
- self.merge_with_consent()
166
- self.merge_with_eos()
167
-
168
- self.add_calculated_days_from_baseline_to_event_columns()
81
+ self.df = get_glucose_df().copy()
169
82
 
170
83
  # label rows by type of glu tests (ones with value)
171
84
  self.df["test"] = self.df.apply(get_test_string, axis=1)
172
- self.df = self.df.reset_index(drop=True)
173
-
174
- self.visit_codes_df = get_unique_visit_codes(self.df)
175
- self.subject_identifiers_df = get_unique_subject_identifiers(self.df)
176
-
177
- self.df = self.df.sort_values(by=["subject_identifier", "fbg_datetime"])
178
- self.df = self.df.reset_index(drop=True)
179
85
 
86
+ self.df = self.df.sort_values(by=["subject_identifier", "fbg_datetime"]).reset_index(
87
+ drop=True
88
+ )
180
89
  self.working_df = self.df.copy()
181
90
  self.working_df["endpoint"] = 0
182
91
  self.endpoint_df = get_empty_endpoint_df()
183
92
 
184
93
  def run(self):
185
- self.pre_check_endpoint()
186
- for index, row in self.subject_identifiers_df.iterrows():
94
+ self.process_by_ogtt_only()
95
+ subject_identifiers_df = get_unique_subject_identifiers(self.df)
96
+ for index, row in subject_identifiers_df.iterrows():
187
97
  subject_df = self.get_subject_df(row["subject_identifier"])
188
- subject_df = self.check_endpoint_by_fbg_for_subject(
189
- subject_df, case_list=[CASE_FBGS_WITH_FIRST_OGTT, CASE_FBGS_WITH_SECOND_OGTT]
190
- )
98
+ subject_df = self.endpoint_cls(
99
+ subject_df=subject_df,
100
+ fbg_threshhold=self.fbg_threshhold,
101
+ ogtt_threshhold=self.ogtt_threshhold,
102
+ ).subject_df
191
103
  if len(subject_df.loc[subject_df["endpoint"] == 1]) == 1:
192
104
  self.append_subject_to_endpoint_df(subject_df)
193
105
  self.remove_subject_from_working_df(row)
194
-
195
- if CASE_FBG_ONLY in self.endpoint_cases:
196
- for index, row in self.subject_identifiers_df.iterrows():
197
- subject_df = self.get_subject_df(row["subject_identifier"])
198
- subject_df = self.check_endpoint_by_fbg_for_subject(
199
- subject_df, case_list=[CASE_FBG_ONLY]
200
- )
201
- if len(subject_df.loc[subject_df["endpoint"] == 1]) == 1:
202
- self.append_subject_to_endpoint_df(subject_df)
203
- self.remove_subject_from_working_df(row)
204
-
205
106
  self.post_check_endpoint()
206
107
  self.merge_with_final_endpoints()
207
108
 
208
- @property
209
- def glucose_fbg_df(self) -> pd.DataFrame:
210
- """Returns a prepared Dataframe of CRF
211
- meta_subject.glucosefbg.
212
-
213
- Note: meta_subject.glucosefbg has only FBG measures.
214
- """
215
- if self._glucose_fbg_df.empty:
216
- df = get_crf(
217
- model="meta_subject.glucosefbg",
218
- subject_identifiers=self.subject_identifiers,
219
- # subject_visit_model="meta_subject.subjectvisit",
220
- )
221
- df["source"] = "meta_subject.glucosefbg"
222
- df.rename(columns={"fbg_fasting": "fasting"}, inplace=True)
223
- df.loc[(df["fasting"] == "fasting"), "fasting"] = YES
224
- df.loc[(df["fasting"] == "non_fasting"), "fasting"] = NO
225
- df = calculate_fasting_hrs(df)
226
- # df = df[[col for col in self.keep_cols if not col.startswith("ogtt")]]
227
- df = df.reset_index(drop=True)
228
- self._glucose_fbg_df = df
229
- return self._glucose_fbg_df
230
-
231
- @property
232
- def glucose_fbg_ogtt_df(self):
233
- """Returns a prepared Dataframe of CRF meta_subject.glucose.
234
-
235
- Note: meta_subject.glucose has FBG and OGTT measures.
236
- """
237
- if self._glucose_fbg_ogtt_df.empty:
238
- df = get_crf(
239
- model="meta_subject.glucose",
240
- subject_identifiers=self.subject_identifiers,
241
- # subject_visit_model="meta_subject.subjectvisit",
242
- )
243
- df["source"] = "meta_subject.glucose"
244
- df = calculate_fasting_hrs(df)
245
- # df = df[self.keep_cols]
246
- df = df.reset_index(drop=True)
247
- self._glucose_fbg_ogtt_df = df
248
- return self._glucose_fbg_ogtt_df
249
-
250
- def merge_with_consent(self):
251
- """Merge in consent DF."""
252
- df_consent = get_subject_consent(
253
- "meta_consent.subjectconsent", subject_identifiers=self.subject_identifiers
254
- )
255
- self.df = pd.merge(
256
- self.df, df_consent, on="subject_identifier", how="left", suffixes=("", "_y")
257
- )
258
- self.df = self.df.sort_values(by=["subject_identifier", "fbg_datetime"])
259
- self.df = self.df.reset_index(drop=True)
260
-
261
- def merge_with_eos(self):
262
- """Merge in EoS DF.
263
-
264
- Drops patients who were taken off study by late exclusion.
265
- """
266
- df_eos = get_eos("meta_prn.endofstudy", subject_identifiers=self.subject_identifiers)
267
- df_eos = df_eos[
268
- df_eos["offstudy_reason"]
269
- != (
270
- "Patient fulfilled late exclusion criteria (due to abnormal blood "
271
- "values or raised blood pressure at enrolment"
272
- )
273
- ]
274
- self.df = pd.merge(
275
- self.df, df_eos, on="subject_identifier", how="left", suffixes=("", "_y")
276
- )
277
- self.df = self.df.sort_values(by=["subject_identifier", "fbg_datetime"])
278
- self.df = self.df.reset_index(drop=True)
279
-
280
- def add_calculated_days_from_baseline_to_event_columns(self):
281
- """Add columns that calculate number of days from
282
- baseline to visit, fbg, and ogtt.
283
- """
284
- self.df["visit_days"] = np.nan
285
- self.df["fbg_days"] = np.nan
286
- self.df["ogtt_days"] = np.nan
287
- self.df["test"] = np.nan
288
- self.df["visit_days"] = (
289
- self.df["visit_datetime"] - self.df["baseline_datetime"]
290
- ).dt.days
291
- if not self.df["fbg_datetime"].isnull().all():
292
- self.df["fbg_days"] = (
293
- self.df["fbg_datetime"] - self.df["baseline_datetime"]
294
- ).dt.days
295
- if not self.df["ogtt_datetime"].isnull().all():
296
- self.df["ogtt_days"] = (
297
- self.df["ogtt_datetime"] - self.df["baseline_datetime"]
298
- ).dt.days
299
- self.df["visit_days"] = pd.to_numeric(self.df["visit_days"], downcast="integer")
300
- self.df["fbg_days"] = pd.to_numeric(self.df["fbg_days"], downcast="integer")
301
- self.df["ogtt_days"] = pd.to_numeric(self.df["ogtt_days"], downcast="integer")
302
- self.df = self.df.reset_index(drop=True)
303
-
304
- def pre_check_endpoint(self):
109
+ def process_by_ogtt_only(self):
305
110
  """Flag subjects that met endpoint by hitting the OGTT
306
111
  threshold.
307
112
 
@@ -322,15 +127,17 @@ class GlucoseEndpointsByDate:
322
127
  """
323
128
  subject_endpoint_df = self.working_df.loc[
324
129
  (self.working_df["ogtt_value"] >= self.ogtt_threshhold)
325
- & (self.working_df["fasting"] == YES)
130
+ & (self.working_df["ogtt_value"] < 9999.99)
131
+ & (self.working_df["fasted"] == YES)
326
132
  & (self.working_df["fbg_value"].notna())
327
133
  ].copy()
328
- subject_endpoint_df.sort_values(by=["subject_identifier", "fbg_datetime"])
329
- subject_endpoint_df = subject_endpoint_df.reset_index(drop=True)
330
- subject_endpoint_df = subject_endpoint_df.drop_duplicates(
331
- subset=["subject_identifier"], keep="first"
134
+
135
+ subject_endpoint_df = (
136
+ subject_endpoint_df.sort_values(by=["subject_identifier", "fbg_datetime"])
137
+ .reset_index(drop=True)
138
+ .drop_duplicates(subset=["subject_identifier"], keep="first")
139
+ .reset_index(drop=True)
332
140
  )
333
- subject_endpoint_df = subject_endpoint_df.reset_index(drop=True)
334
141
  if not subject_endpoint_df.empty:
335
142
  # flag the selected endpoint rows as endpoints
336
143
  subject_endpoint_df["endpoint"] = 1
@@ -372,8 +179,7 @@ class GlucoseEndpointsByDate:
372
179
  self.endpoint_df = pd.concat([self.endpoint_df, subject_df])
373
180
  self.endpoint_df = self.endpoint_df.sort_values(
374
181
  by=["subject_identifier", "visit_code"]
375
- )
376
- self.endpoint_df = self.endpoint_df.reset_index(drop=True)
182
+ ).reset_index(drop=True)
377
183
 
378
184
  def remove_subject_from_working_df(self, row: pd.Series) -> None:
379
185
  """Removes one subject from the working DF given a Series with
@@ -383,8 +189,7 @@ class GlucoseEndpointsByDate:
383
189
  index=self.working_df[
384
190
  self.working_df["subject_identifier"] == row["subject_identifier"]
385
191
  ].index
386
- )
387
- self.working_df = self.working_df.reset_index(drop=True)
192
+ ).reset_index(drop=True)
388
193
 
389
194
  def remove_subjects_from_working_df(self, rows: pd.DataFrame) -> None:
390
195
  """Removes subjects from the working DF given a DF with
@@ -394,8 +199,7 @@ class GlucoseEndpointsByDate:
394
199
  index=self.working_df.loc[
395
200
  self.working_df["subject_identifier"].isin(rows["subject_identifier"])
396
201
  ].index
397
- )
398
- self.working_df = self.working_df.reset_index(drop=True)
202
+ ).reset_index(drop=True)
399
203
 
400
204
  def get_subject_df(self, subject_identifier: str) -> pd.DataFrame:
401
205
  subject_df = self.working_df.loc[
@@ -407,22 +211,27 @@ class GlucoseEndpointsByDate:
407
211
  subject_df["endpoint"] = 0
408
212
  subject_df = subject_df[endpoint_columns]
409
213
  subject_df = subject_df.sort_values(["subject_identifier", "fbg_datetime"])
214
+ subject_df[[col for col in subject_df if "value" in col]] = subject_df[
215
+ [col for col in subject_df if "value" in col]
216
+ ].fillna(0.0)
217
+
410
218
  subject_df = subject_df.reset_index(drop=True)
411
219
  return subject_df
412
220
 
413
221
  def check_endpoint_by_fbg_for_subject(
414
222
  self, subject_df: pd.DataFrame, case_list: list[int] | None = None
415
223
  ) -> pd.DataFrame:
416
- case_list = case_list or [2, 3]
417
224
  endpoint = self.endpoint_cls(
418
225
  subject_df=subject_df,
419
226
  fbg_threshhold=self.fbg_threshhold,
420
227
  ogtt_threshhold=self.ogtt_threshhold,
421
- case_list=case_list,
422
228
  )
423
229
  return endpoint.subject_df
424
230
 
425
231
  def post_check_endpoint(self):
232
+ """Add any who were taken off study before endpoint guidelines
233
+ were clearly defined.
234
+ """
426
235
  df_eos = self.working_df.loc[
427
236
  self.working_df["offstudy_reason"] == "Patient developed diabetes"
428
237
  ].copy()
@@ -512,7 +321,7 @@ class GlucoseEndpointsByDate:
512
321
  fbg_value=(None if pd.isna(row["fbg_value"]) else row["fbg_value"]),
513
322
  ogtt_value=None if pd.isna(row["ogtt_value"]) else row["ogtt_value"],
514
323
  fbg_date=(None if pd.isna(row["fbg_datetime"]) else row["fbg_datetime"]),
515
- fasting=(None if pd.isna(row["fasting"]) else row["fasting"]),
324
+ fasting=(None if pd.isna(row["fasted"]) else row["fasted"]),
516
325
  endpoint_label=(
517
326
  None if pd.isna(row["endpoint_label"]) else row["endpoint_label"]
518
327
  ),
@@ -1,4 +1,6 @@
1
+ import numpy as np
1
2
  import pandas as pd
3
+ from edc_constants.constants import NO
2
4
 
3
5
  from .constants import endpoint_columns
4
6
 
@@ -59,7 +61,19 @@ def get_unique_visit_codes(df: pd.DataFrame) -> pd.DataFrame:
59
61
 
60
62
 
61
63
  def get_unique_subject_identifiers(df: pd.DataFrame) -> pd.DataFrame:
62
- values_df = pd.DataFrame(df["subject_identifier"].unique(), columns=["subject_identifier"])
63
- values_df = values_df.sort_values(["subject_identifier"])
64
- values_df = values_df.reset_index()
65
- return values_df
64
+ return (
65
+ pd.DataFrame(df["subject_identifier"].unique(), columns=["subject_identifier"])
66
+ .sort_values(["subject_identifier"])
67
+ .reset_index()
68
+ )
69
+
70
+
71
+ def calculate_fasting_hrs(df: pd.DataFrame) -> pd.DataFrame:
72
+ df.loc[(df["fasting"] == NO), "fasting_duration_delta"] = pd.NaT
73
+ if df.empty:
74
+ df["fasting_hrs"] = np.nan
75
+ else:
76
+ df["fasting_hrs"] = df["fasting_duration_delta"].apply(
77
+ lambda s: np.nan if pd.isna(s) else s.total_seconds() / 3600
78
+ )
79
+ return df