PyPI - meta-edc - Versions diffs - 1.0.6__py3-none-any.whl → 1.0.7__py3-none-any.whl - Mend

meta-edc 1.0.6py3-none-any.whl → 1.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

meta_analytics/notebooks/monitoring_report.ipynb ADDED Viewed

@@ -0,0 +1,1561 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "id": "28e21930-b943-4a08-a79a-ff2712ae9215",
+   "metadata": {},
+   "source": [
+    "%%capture\n",
+    "import os\n",
+    "from pathlib import Path\n",
+    "import pandas as pd\n",
+    "from dj_notebook import activate\n",
+    "import numpy as np\n",
+    "from django_pandas.io import read_frame\n",
+    "\n",
+    "env_file = os.environ[\"META_ENV\"]\n",
+    "reports_folder = Path(os.environ[\"META_REPORTS_FOLDER\"])\n",
+    "analysis_folder = Path(os.environ[\"META_ANALYSIS_FOLDER\"])\n",
+    "pharmacy_folder = Path(os.environ[\"META_PHARMACY_FOLDER\"])\n",
+    "plus = activate(dotenv_file=env_file)\n",
+    "pd.set_option('future.no_silent_downcasting', True)"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "id": "52d4bb98-18a5-4528-be8d-e8370b1b5d1e",
+   "metadata": {},
+   "source": [
+    "\n",
+    "import pdfkit\n",
+    "from datetime import date\n",
+    "from edc_pdutils.dataframes import get_subject_visit\n",
+    "from meta_visit_schedule.constants import MONTH15, MONTH18, MONTH21, MONTH27, MONTH30, MONTH33, MONTH39\n",
+    "from meta_analytics.dataframes import GlucoseEndpointsByDate\n",
+    "from scipy.stats import chi2\n",
+    "from great_tables import loc, style, md\n",
+    "from meta_analytics.dataframes import get_eos_df\n",
+    "from meta_analytics.utils import df_as_great_table, df_as_great_table2\n",
+    "from meta_prn.models import LossToFollowup\n",
+    "from edc_visit_schedule.models import SubjectScheduleHistory\n",
+    "from edc_appointment.analytics import get_appointment_df\n",
+    "from edc_appointment.constants import NEW_APPT, CANCELLED_APPT, ONTIME_APPT, MISSED_APPT\n",
+    "from meta_consent.models import SubjectConsentV1Ext\n",
+    "from meta_analytics.dataframes import get_glucose_df\n",
+    "\n",
+    "from edc_appointment.constants import SCHEDULED_APPT, UNSCHEDULED_APPT # noqa\n",
+    "from edc_constants.constants import YES # noqa\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "html_data = []\n",
+    "cutoff_date = date(2025,3, 31)\n",
+    "end_of_trial_date= date(2026,3, 1)\n",
+    "document_title = f\"<h2>Monitoring Report: {cutoff_date.strftime('%B %Y')}</h2><h5>Data Download: {cutoff_date.strftime('%d %B %Y')}</h5>\"\n",
+    "study_title = 'META3 - Metformin treatment for diabetes prevention in Africa'\n",
+    "pdf_filename = f\"monitoring_report_{cutoff_date.strftime('%Y%m%d')}.pdf\"\n"
+   ],
+   "id": "b255fd34cd6f50c0",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "\n",
+    "df_visit = get_subject_visit(\"meta_subject.subjectvisit\")\n",
+    "late_exlusion_offstudy_reasons = ['Patient fulfilled late exclusion criteria (due to abnormal blood values or raised blood pressure at enrolment']\n",
+    "df_eos = get_eos_df()\n",
+    "df_eos_excluded = (\n",
+    "    df_eos\n",
+    "    .query(\"offstudy_reason.isin(@late_exlusion_offstudy_reasons)\")\n",
+    "    .copy()\n",
+    "    .reset_index()\n",
+    ")\n",
+    "df_visit = (\n",
+    "    df_visit\n",
+    "    .merge(df_eos_excluded[[\"subject_identifier\", \"offstudy_datetime\", \"offstudy_reason\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
+    "    .query(\"_merge=='left_only'\")\n",
+    "    .drop(columns=[\"_merge\"])\n",
+    ")\n",
+    "\n",
+    "df_visit = df_visit[df_visit.appt_datetime.dt.date<=cutoff_date]\n",
+    "\n",
+    "df_appointments = get_appointment_df()\n",
+    "df_appointments[\"site_id\"] = df_appointments.site_id.astype(str)\n",
+    "df_appointments = (\n",
+    "    df_appointments\n",
+    "    .merge(df_eos_excluded[[\"subject_identifier\", \"offstudy_datetime\", \"offstudy_reason\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
+    "    .query(\"_merge=='left_only'\")\n",
+    "    .drop(columns=[\"_merge\"])\n",
+    ")\n",
+    "\n",
+    "\n",
+    "cls = GlucoseEndpointsByDate()\n",
+    "cls.run()\n",
+    "df_endpoint = cls.endpoint_only_df.copy()\n",
+    "df_glucose = get_glucose_df()\n",
+    "# df_glucose_fbg = get_glucose_fbg_df()\n",
+    "# df_glucose = pd.concat([df_glucose, df_glucose_fbg])\n",
+    "\n",
+    "\n",
+    "enrolled = df_visit.copy()\n",
+    "enrolled[\"site_id\"] = enrolled[\"site_id\"].astype(str)\n",
+    "enrolled_pivot = (\n",
+    "    enrolled\n",
+    "    .query(\"visit_code==1000.0\").groupby([\"site_id\"])\n",
+    "    .size()\n",
+    "    .reset_index()\n",
+    "    .pivot_table(columns=\"site_id\", values=0, observed=True)\n",
+    ")\n",
+    "enrolled_pivot.columns.name=\"\"\n",
+    "enrolled_pivot[\"total\"] = enrolled_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1)\n",
+    "\n"
+   ],
+   "id": "215212f9d44e79df",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "column_headers = {\"label\": \"Label\", \"visit_code\": \"Visit code\", \"10\": \"Hindu Mandal\", \"20\": \"Amana\", \"30\": \"Temeke\", \"40\": \"Mwananyamala\", \"60\": \"Mnazi Moja\", \"total\": \"Total\"}\n",
+    "column_headers_with_str = {\"label\": \"Label\", \"10_str\": \"Hindu Mandal\", \"20_str\": \"Amana\", \"30_str\": \"Temeke\", \"40_str\": \"Mwananyamala\", \"60_str\": \"Mnazi Moja\", \"total_str\": \"Total\"}"
+   ],
+   "id": "fe90271ff1799692",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# Table 1a Visits completed to date\n",
+    "\n",
+    "df_tbl1 = df_visit[(df_visit.visit_code_sequence==0) & (df_visit.appt_timing==ONTIME_APPT) & ~(df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))].groupby(by=[\"visit_code\", \"site_id\"]).size().to_frame().reset_index()\n",
+    "\n",
+    "df_tbl1.columns = [\"visit_code\", \"site_id\", \"visits\"]\n",
+    "df1 = df_tbl1.pivot(index=\"visit_code\", columns=\"site_id\", values=\"visits\").reset_index()\n",
+    "df1.columns.name = None\n",
+    "df1.columns = ['visit_code', \"10\", \"20\", \"30\", \"40\", \"60\"]\n",
+    "df1['total'] = df1[['10', '20', '30', '40', '60']].sum(axis=1)\n",
+    "df1.fillna(0, inplace=True)\n",
+    "df_attended = df1.copy().reset_index(drop=True)\n",
+    "df_attended = df_attended.fillna(0.0)"
+   ],
+   "id": "9e3d608809eea5",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "gt = df_as_great_table(\n",
+    "    df_attended[[\"visit_code\", \"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]],\n",
+    "    title=\"Table 1a: Visits completed to date\"\n",
+    ")\n",
+    "gt = (\n",
+    "    gt\n",
+    "    .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
+    "    .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
+    "    .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
+    "    .data_color(\n",
+    "        columns=[\"visit_code\"],\n",
+    "        palette=[\"lavender\", \"thistle\"],\n",
+    "        domain=[2000, 5000],\n",
+    "        na_color=\"white\"\n",
+    "    )\n",
+    "    .tab_source_note(source_note=f\"Excludes visit reports submitted for participants eventually withdrawn on late exclusion criteria.\")\n",
+    ")\n",
+    "html_data.append(gt.as_raw_html())\n",
+    "gt.show()"
+   ],
+   "id": "a43c2fbd8a7a692c",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# Table 1b Total scheduled appointments\n",
+    "df_appt_pivot = (\n",
+    "    df_appointments.query(\"appt_reason==@SCHEDULED_APPT\")\n",
+    "    .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
+    "    .query(\"_merge=='left_only'\")\n",
+    "    .drop(columns=[\"_merge\"])\n",
+    "    .reset_index(drop=True)\n",
+    "    .groupby([\"visit_code\", \"site_id\"])\n",
+    "    .size()\n",
+    "    .to_frame()\n",
+    "    .reset_index()\n",
+    "    .pivot(index=\"visit_code\", columns=\"site_id\", values=0)\n",
+    "    .reset_index()\n",
+    "    .fillna(0)\n",
+    ")\n",
+    "\n",
+    "df_appt_pivot[\"total\"] = df_appt_pivot.iloc[:,1:].sum(axis=1)\n",
+    "df_appt_pivot.columns.name = None\n",
+    "gt = df_as_great_table(\n",
+    "    df_appt_pivot,\n",
+    "    title=\"Table 1b: Total appointments\",\n",
+    "    subtitle=\"Total possible appointments not including unscheduled appointments\"\n",
+    "\n",
+    ")\n",
+    "gt = (\n",
+    "    gt\n",
+    "    .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
+    "    .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
+    "    .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
+    "    .data_color(\n",
+    "        columns=[\"visit_code\"],\n",
+    "        palette=[\"lavender\", \"thistle\"],\n",
+    "        domain=[2000, 5000],\n",
+    "        na_color=\"white\"\n",
+    "    )\n",
+    ")\n",
+    "html_data.append(gt.as_raw_html())\n",
+    "gt.show()"
+   ],
+   "id": "70eb34a139ff7095",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# Table 1c Past scheduled appointments -- no information provided\n",
+    "df_appt_pivot = (\n",
+    "    df_appointments.query(\"appt_datetime<@cutoff_date and appt_reason==@SCHEDULED_APPT and appt_timing==@ONTIME_APPT and appt_status.isin([@NEW_APPT])\")\n",
+    "    .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
+    "    .query(\"_merge=='left_only'\")\n",
+    "    .drop(columns=[\"_merge\"])\n",
+    "    .reset_index(drop=True)\n",
+    "    .groupby([\"visit_code\", \"site_id\"])\n",
+    "    .size()\n",
+    "    .to_frame()\n",
+    "    .reset_index()\n",
+    "    .pivot(index=\"visit_code\", columns=\"site_id\", values=0)\n",
+    "    .reset_index()\n",
+    "    .fillna(0)\n",
+    ")\n",
+    "df_appt_pivot[\"total\"] = df_appt_pivot.iloc[:,1:].sum(axis=1)\n",
+    "df_appt_pivot.columns.name = None\n",
+    "gt = df_as_great_table(\n",
+    "    df_appt_pivot,\n",
+    "    title=\"Table 1c: Past appointments not attended/not reported\",\n",
+    "    subtitle=\"Expected by now but no information provided by site\",\n",
+    ")\n",
+    "gt = (\n",
+    "    gt\n",
+    "    .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
+    "    .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
+    "    .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
+    "    .data_color(\n",
+    "        columns=[\"visit_code\"],\n",
+    "        palette=[\"lavender\", \"thistle\"],\n",
+    "        domain=[2000, 5000],\n",
+    "        na_color=\"white\"\n",
+    "    )\n",
+    "    .tab_source_note(source_note=f\"Scheduled appointment date is before {cutoff_date.strftime('%d %B %Y')}.\")\n",
+    ")\n",
+    "html_data.append(gt.as_raw_html())\n",
+    "gt.show()"
+   ],
+   "id": "f243552177b216d7",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# Table 1d Unscheduled appointments\n",
+    "df_appt = (\n",
+    "    df_appointments.query(\"appt_reason==@UNSCHEDULED_APPT and appt_timing==@ONTIME_APPT and appt_status!=@NEW_APPT\")\n",
+    "     .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
+    "    .query(\"_merge=='left_only'\")\n",
+    "    .drop(columns=[\"_merge\"])\n",
+    "    .reset_index(drop=True)\n",
+    "   .copy()\n",
+    "    .reset_index(drop=True)\n",
+    ")\n",
+    "df_appt['visit_code'] = df_appt['visit_code'].astype(int)\n",
+    "df_appt['visit_code'] = df_appt['visit_code'].astype(str)\n",
+    "\n",
+    "subjects_with_unscheduled = df_appt.subject_identifier.nunique()\n",
+    "\n",
+    "df_appt_pivot = (\n",
+    "    df_appt\n",
+    "    .groupby([\"visit_code\", \"site_id\"])\n",
+    "    .size()\n",
+    "    .to_frame()\n",
+    "    .reset_index()\n",
+    "    .pivot(index=\"visit_code\", columns=\"site_id\", values=0)\n",
+    "    .reset_index()\n",
+    "    .fillna(0)\n",
+    ")\n",
+    "df_appt_pivot[\"total\"] = df_appt_pivot.iloc[:,1:].sum(axis=1)\n",
+    "df_appt_pivot.columns.name = None\n",
+    "df_appt_pivot[[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]] = df_appt_pivot[[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]].astype('float64')\n",
+    "\n",
+    "\n",
+    "# add totals row\n",
+    "sum_row = df_appt_pivot.select_dtypes(include='float64').sum()\n",
+    "sum_row['visit_code'] = 'Total'\n",
+    "sum_row_df = pd.DataFrame(sum_row).T\n",
+    "df_appt_pivot = pd.concat([df_appt_pivot, sum_row_df], axis=0).reset_index(drop=True)\n",
+    "\n",
+    "gt = df_as_great_table(\n",
+    "    df_appt_pivot,\n",
+    "    title=\"Table 1d: Unscheduled appointments\",\n",
+    "    subtitle=\"Appointments with sequence>0 grouped by visit code\",\n",
+    ")\n",
+    "gt = (\n",
+    "    gt\n",
+    "    .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
+    "    .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
+    "    .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
+    "    .data_color(\n",
+    "        columns=[\"visit_code\"],\n",
+    "        palette=[\"lavender\", \"thistle\"],\n",
+    "        domain=[2000, 5000],\n",
+    "        na_color=\"white\"\n",
+    "    )\n",
+    "    .fmt_number(columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"], decimals=0)\n",
+    "    .tab_source_note(source_note=f\"{subjects_with_unscheduled} participants had at least one unscheduled appointment.\")\n",
+    ")\n",
+    "html_data.append(gt.as_raw_html())\n",
+    "gt.show()"
+   ],
+   "id": "6e55569e322370a",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# Table 1e Future scheduled appointments\n",
+    "df_appt_pivot = (\n",
+    "    df_appointments.query(\"@cutoff_date<=appt_datetime<@end_of_trial_date and appt_reason==@SCHEDULED_APPT and appt_timing==@ONTIME_APPT and appt_status.isin([@NEW_APPT])\")\n",
+    "    .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
+    "    .query(\"_merge=='left_only'\")\n",
+    "    .drop(columns=[\"_merge\"])\n",
+    "    .reset_index(drop=True)\n",
+    "    .groupby([\"visit_code\", \"site_id\"])\n",
+    "    .size()\n",
+    "    .to_frame()\n",
+    "    .reset_index()\n",
+    "    .pivot(index=\"visit_code\", columns=\"site_id\", values=0)\n",
+    "    .reset_index()\n",
+    "    .fillna(0)\n",
+    ")\n",
+    "df_appt_pivot[\"total\"] = df_appt_pivot.iloc[:,1:].sum(axis=1)\n",
+    "df_appt_pivot.columns.name = None\n",
+    "gt = df_as_great_table(\n",
+    "    df_appt_pivot,\n",
+    "    title=\"Table 1e: Future appointments\",\n",
+    ")\n",
+    "gt = (\n",
+    "    gt\n",
+    "    .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
+    "    .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
+    "    .cols_align(align=\"left\", columns=[\"visit_code\"])\n",
+    "    .data_color(\n",
+    "        columns=[\"visit_code\"],\n",
+    "        palette=[\"lavender\", \"thistle\"],\n",
+    "        domain=[2000, 5000],\n",
+    "        na_color=\"white\"\n",
+    "    )\n",
+    "    .fmt_number(columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"], decimals=0)\n",
+    "    .tab_source_note(source_note=f\"Scheduled appointment date is on or after {cutoff_date.strftime('%d %B %Y')} and before {end_of_trial_date.strftime('%d %B %Y')}.\")\n",
+    ")\n",
+    "html_data.append(gt.as_raw_html())\n",
+    "gt.show()"
+   ],
+   "id": "8193005de33cae6f",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# Table 2 Visits Missed to Date as % of Visits Attended + Visits Missed\n",
+    "subject_count = (\n",
+    "    df_visit\n",
+    "    .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
+    "    .query(\"_merge=='left_only'\")\n",
+    "    .drop(columns=[\"_merge\"])\n",
+    "    .reset_index(drop=True)\n",
+    "    .query(\"visit_code_sequence==0 and appt_timing==@MISSED_APPT and ~appt_status.isin([@NEW_APPT, @CANCELLED_APPT])\")\n",
+    ").subject_identifier.nunique()\n",
+    "df_tbl = (\n",
+    "    df_visit[(df_visit.visit_code_sequence==0) & (df_visit.appt_timing==MISSED_APPT) & ~(df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))]\n",
+    "    .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
+    "    .query(\"_merge=='left_only'\")\n",
+    "    .drop(columns=[\"_merge\"])\n",
+    "    .reset_index(drop=True)\n",
+    "    .groupby(by=[\"visit_code\", \"site_id\"])\n",
+    "    .size()\n",
+    "    .to_frame()\n",
+    "    .reset_index()\n",
+    ")\n",
+    "df_tbl.columns = [\"visit_code\", \"site_id\", \"visits\"]\n",
+    "df_tbl_pivot = df_tbl.pivot(index=\"visit_code\", columns=\"site_id\", values=\"visits\").reset_index()\n",
+    "df_tbl_pivot.columns.name = None\n",
+    "df_tbl_pivot.columns = ['visit_code', \"10\", \"20\", \"30\", \"40\", \"60\"]\n",
+    "df_tbl_pivot['total'] = df_tbl_pivot[['10', '20', '30', '40', '60']].sum(axis=1)\n",
+    "df_missed = (\n",
+    "    df_tbl_pivot\n",
+    "    .fillna(0)\n",
+    "    .copy()\n",
+    "    .set_index([\"visit_code\"])\n",
+    ")\n",
+    "\n",
+    "df_attended_display = df_attended.copy()\n",
+    "df_attended_display = (\n",
+    "    df_attended_display\n",
+    "    .set_index([\"visit_code\"])\n",
+    ")\n",
+    "\n",
+    "attended_and_missed = df_attended_display + df_missed\n",
+    "attended_and_missed = (\n",
+    "    attended_and_missed\n",
+    "    .fillna(0)\n",
+    "    .reset_index()\n",
+    "    .set_index([\"visit_code\"])\n",
+    ")\n",
+    "\n",
+    "attended_and_missed_perc = df_missed/attended_and_missed\n",
+    "attended_and_missed_perc = (\n",
+    "    attended_and_missed_perc\n",
+    "    .fillna(0)\n",
+    "    .reset_index()\n",
+    "    .set_index([\"visit_code\"])\n",
+    ")\n",
+    "\n",
+    "df_result = df_missed.merge(attended_and_missed_perc, on=[\"visit_code\"], suffixes=(\"\", \"_perc\"))\n",
+    "for col in  [\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]:\n",
+    "    col_perc = f\"{col}_perc\"\n",
+    "    df_result[col] = df_result.apply(lambda x: f\"{x[col]} ({x[col_perc]*100:.2f})\", axis=1)\n",
+    "df_result = df_result.reset_index().sort_values(by=[\"visit_code\"], ascending=True)\n",
+    "df_result = df_result.fillna(0.0)"
+   ],
+   "id": "c86c5f0ffe59e951",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "df_table = df_result[[\"visit_code\", \"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]].copy()\n",
+    "gt = df_as_great_table(\n",
+    "    df_table,\n",
+    "    title=\"Table 2a: Visits Missed to Date\",\n",
+    "    subtitle=\"as % of Visits Attended + Visits Missed\"\n",
+    ")\n",
+    "gt = (\n",
+    "    gt\n",
+    "    .cols_label({k:v for k, v in column_headers.items() if k!=\"label\"})\n",
+    "    .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
+    "    .cols_align(align=\"left\", columns=[\"visit_code\", \"label\"])\n",
+    "    .tab_style(\n",
+    "        style=[style.fill(color=\"snow\"), style.text(color=\"black\")],\n",
+    "        locations=loc.body(\n",
+    "            columns=[0],\n",
+    "            rows=list(range(0, len(df_table))),\n",
+    "        ),\n",
+    "    )\n",
+    "    .tab_source_note(source_note=f\"{subject_count} participants had at least one missed visit.\")\n",
+    "\n",
+    ")\n",
+    "html_data.append(gt.as_raw_html())\n",
+    "gt.show()\n"
+   ],
+   "id": "3cd8b1290091660c",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# Table 2b: Number of missed visits by participant\n",
+    "subject_count = (\n",
+    "    df_visit\n",
+    "    .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
+    "    .query(\"_merge=='left_only'\")\n",
+    "    .drop(columns=[\"_merge\"])\n",
+    "    .reset_index(drop=True)\n",
+    "    .query(\"visit_code_sequence==0 and appt_timing==@MISSED_APPT and ~appt_status.isin([@NEW_APPT, @CANCELLED_APPT])\")\n",
+    ").subject_identifier.nunique()\n",
+    "df_tbl = (\n",
+    "    df_visit[(df_visit.visit_code_sequence==0) & (df_visit.appt_timing==MISSED_APPT) & ~(df_visit.appt_status.isin([NEW_APPT, CANCELLED_APPT]))]\n",
+    "    .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", indicator=True)\n",
+    "    .query(\"_merge=='left_only'\")\n",
+    "    .drop(columns=[\"_merge\"])\n",
+    "    .reset_index(drop=True)\n",
+    "    .groupby(by=[\"subject_identifier\", \"site_id\"])\n",
+    "    .size()\n",
+    "    .to_frame()\n",
+    "    .reset_index()\n",
+    ")\n",
+    "df_tbl.columns = [\"subject_identifier\", \"site_id\", \"missed_count\"]\n",
+    "df_tbl[\"category\"] = pd.cut(df_tbl[\"missed_count\"], bins=[0, 1, 3, 5, 7, 100], labels=[\"Missed at least 1\", \"2 to 3\", \"4 to 5\", \"6 to 7\", \"missed more than 7\"])\n",
+    "df_tbl_pivot = df_tbl.pivot_table(index=\"category\", columns=\"site_id\", values=\"missed_count\", observed=False, aggfunc=\"count\").reset_index()\n",
+    "\n",
+    "df_tbl_pivot['total'] = df_tbl_pivot.select_dtypes(include='int').sum(axis=1, skipna=True)\n",
+    "\n",
+    "sum_row = df_tbl_pivot.select_dtypes(include='int64').sum()\n",
+    "sum_row['category'] = 'Total'\n",
+    "\n",
+    "\n",
+    "df_tbl_pivot = (\n",
+    "    pd.concat([df_tbl_pivot, sum_row.to_frame().T], axis=0)\n",
+    "    .rename(columns={10: \"10\", 20: \"20\", 30: \"30\", 40: \"40\", 60: \"60\"})\n",
+    ")\n",
+    "\n",
+    "gt = df_as_great_table(\n",
+    "    df_tbl_pivot,\n",
+    "    title=\"Table 2b: Number of participants who missed one or more visits\",\n",
+    ")\n",
+    "gt = (\n",
+    "    gt\n",
+    "    .cols_label({\"category\": \"Category\", **{k:v for k, v in column_headers.items() if k not in [\"visit_code\", \"label\"]}})\n",
+    "    .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
+    "    .cols_align(align=\"left\", columns=[\"category\"])\n",
+    "    .tab_style(\n",
+    "        style=[style.fill(color=\"snow\"), style.text(color=\"black\")],\n",
+    "        locations=loc.body(\n",
+    "            columns=[0],\n",
+    "            rows=list(range(0, len(df_table))),\n",
+    "        ),\n",
+    "    )\n",
+    ")\n",
+    "html_data.append(gt.as_raw_html())\n",
+    "gt.show()\n",
+    "\n"
+   ],
+   "id": "b18088e16e0bf7f7",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# func for tables 3,4,5\n",
+    "def get_row_df(row_df:pd.DataFrame, label:str)->pd.DataFrame:\n",
+    "    row_df = row_df.groupby(by=[\"site_id\"]).site_id.count().to_frame(name=\"n\")\n",
+    "    row_df[\"label\"] = label\n",
+    "    row_df = row_df.reset_index()\n",
+    "    row_df = row_df.pivot(index=\"label\", values=\"n\", columns=\"site_id\").reset_index()\n",
+    "    row_df.columns.name = \"\"\n",
+    "    all_sites = [10, 20, 30, 40, 60]\n",
+    "    for site in all_sites:\n",
+    "        if site not in row_df.columns:\n",
+    "            row_df[site] = None\n",
+    "    row_df = row_df.reset_index(drop=True)\n",
+    "    return row_df\n",
+    "\n",
+    "\n",
+    "def get_table_df(df_source:pd.DataFrame, visit_code:float|None=None, month_label:str|None=None)->pd.DataFrame:\n",
+    "    if visit_code:\n",
+    "        df_month = df_source[df_source.visit_code==visit_code].copy()\n",
+    "    elif month_label:\n",
+    "        df_month = df_source.copy()\n",
+    "\n",
+    "    \n",
+    "    row_df = df_month.copy()\n",
+    "    table_df = get_row_df(row_df, \"Total (n)\")\n",
+    "    \n",
+    "    row_df = df_month.query(\"ogtt_value<7.8 and fbg_value<6.1\").copy()\n",
+    "    table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT <7.8; FBG <6.1\")])\n",
+    "    \n",
+    "    row_df = df_month[(df_month.ogtt_value<7.8) & (df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()\n",
+    "    table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT <7.8; FBG >=6.1 <7.0\")])\n",
+    "    \n",
+    "    row_df = df_month[(df_month.ogtt_value<7.8) & (df_month.fbg_value>=7.0)].copy()\n",
+    "    table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT <7.8; FBG >=7.0\")])\n",
+    "    \n",
+    "    row_df = df_month[(df_month.ogtt_value>=7.8) & (df_month.ogtt_value<11.1) & (df_month.fbg_value<6.1)].copy()\n",
+    "    table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT ≥7.8 to <11.1; FBG <6.1\")])\n",
+    "    \n",
+    "    row_df = df_month[(df_month.ogtt_value>=7.8) & (df_month.ogtt_value<11.1) & (df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()\n",
+    "    table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT ≥7.8 to <11.1; FBG >=6.1 <7.0\")])\n",
+    "    \n",
+    "    row_df = df_month[(df_month.ogtt_value>=7.8) & (df_month.ogtt_value<11.1) & (df_month.fbg_value>=7.0)].copy()\n",
+    "    table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT ≥7.8 to <11.1; FBG >=7.0\")])\n",
+    "    \n",
+    "    row_df = df_month[(df_month.ogtt_value>=11.1) & (df_month.fbg_value<6.1)].copy()\n",
+    "    table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT ≥11.1; FBG <6.1\")])\n",
+    "    \n",
+    "    row_df = df_month[(df_month.ogtt_value>=11.1) & (df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()\n",
+    "    table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT ≥11.1; FBG >=6.1 <7.0\")])\n",
+    "    \n",
+    "    row_df = df_month[(df_month.ogtt_value>=11.1) & (df_month.fbg_value>=7.0)].copy()\n",
+    "    table_df = pd.concat([table_df, get_row_df(row_df, \"OGTT ≥11.1; FBG >=7.0\")])\n",
+    "\n",
+    "    row_df = df_month[(df_month.ogtt_value.isna())].copy()\n",
+    "    table_df = pd.concat([table_df, get_row_df(row_df, \"Missing OGTT\")])\n",
+    "    return table_df\n",
+    "\n",
+    "\n",
+    "def format_table_df(tbl_df, add_totals:bool|None=None):\n",
+    "    add_totals = True if add_totals is None else add_totals\n",
+    "    tbl_df = tbl_df.fillna(0.0)\n",
+    "    tbl_df[\"total\"] = tbl_df.iloc[:,1:].sum(axis=1)\n",
+    "    tbl_df = tbl_df.reset_index(drop=True)\n",
+    "\n",
+    "    if add_totals:\n",
+    "        df_last = tbl_df[1:].sum().to_frame()\n",
+    "        df_last.loc[\"label\"] = np.nan\n",
+    "        df_last = df_last.reset_index()\n",
+    "        df_last.columns = [\"label\", \"value\"]\n",
+    "        df_last = df_last.pivot_table(columns=\"label\",  values=\"value\").reset_index(drop=True)\n",
+    "        df_last.columns.name = \"\"\n",
+    "        df_last[\"label\"] = \"Totals\"\n",
+    "\n",
+    "        tbl_df = pd.concat([tbl_df, df_last])\n",
+    "        tbl_df = tbl_df.reset_index(drop=True)\n",
+    "\n",
+    "    tbl_df.columns = [\"label\", \"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]\n",
+    "\n",
+    "    for site in [\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]:\n",
+    "        tbl_df[f\"{site}_perc\"] = (tbl_df[site]/tbl_df.iloc[0][site]) * 100 if tbl_df.iloc[0][site]>0 else 0\n",
+    "        tbl_df[f\"{site}_perc_str\"] = tbl_df[f\"{site}_perc\"].map('{:.1f}'.format)\n",
+    "\n",
+    "\n",
+    "    for site in [\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]:\n",
+    "        tbl_df[f\"{site}_str\"] = tbl_df[[f\"{site}\", f\"{site}_perc_str\"]].apply(lambda x: ' ('.join(x.astype(str)), axis=1)\n",
+    "        tbl_df[f\"{site}_str\"] = tbl_df[f\"{site}_str\"] + \")\"\n",
+    "\n",
+    "    cols = [\"label\", *[f\"{site}_str\" for site in [\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]]]\n",
+    "    tbl_df1 = tbl_df[cols]\n",
+    "    tbl_df1.loc[tbl_df.label==\"Total (n)\"] = tbl_df.iloc[0][[\"label\", \"10\", \"20\", \"30\", \"40\", \"60\", \"total\"]].to_list()\n",
+    "    return tbl_df1"
+   ],
+   "id": "e1bcc6507b1e49c7",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# Table 3: OGTT and FBG at 12-month visit\n",
+    "df_table3 = get_table_df(df_glucose, 1120.0)\n",
+    "df_table3 = format_table_df(df_table3)\n",
+    "df_table3 = df_table3.fillna(0.0)\n",
+    "gt = df_as_great_table(df_table3, title=\"Table 3: OGTT and FBG at 12-month visit\")\n",
+    "gt = (\n",
+    "    gt\n",
+    "    .cols_label(column_headers_with_str)\n",
+    "    .cols_align(align=\"center\", columns=[\"10_str\", \"20_str\", \"30_str\", \"40_str\", \"60_str\", \"total_str\"])\n",
+    "    .cols_align(align=\"left\", columns=[\"label\"])\n",
+    "    .cols_width(cases={\"label\": \"35%\"})\n",
+    ")\n",
+    "html_data.append(gt.as_raw_html())\n",
+    "gt.show()\n"
+   ],
+   "id": "9a9616a118ae674d",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# Table 4: OGTT and FBG at 24-month visit\n",
+    "df_table4 = get_table_df(df_glucose, 1240.0)\n",
+    "df_table4 = format_table_df(df_table4)\n",
+    "df_table4 = df_table4.fillna(0.0)\n",
+    "gt = df_as_great_table(df_table4, title=\"Table 4: OGTT and FBG at 24-month visit\")\n",
+    "gt = (\n",
+    "    gt\n",
+    "    .cols_label(column_headers_with_str)\n",
+    "    .cols_align(align=\"center\", columns=[\"10_str\", \"20_str\", \"30_str\", \"40_str\", \"60_str\", \"total_str\"])\n",
+    "    .cols_align(align=\"left\", columns=[\"label\"])\n",
+    "    .cols_width(cases={\"label\": \"35%\"})\n",
+    ")\n",
+    "html_data.append(gt.as_raw_html())\n",
+    "gt.show()"
+   ],
+   "id": "ec0988364166e130",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# Table 5: OGTT and FBG at 36-month visit\n",
+    "df_table5 = get_table_df(df_glucose, 1360.0)\n",
+    "df_table5 = format_table_df(df_table5)\n",
+    "df_table5 = df_table5.fillna(0.0)\n",
+    "gt = df_as_great_table(df_table5, title=\"Table 5: OGTT and FBG at 36-month visit\")\n",
+    "gt = (\n",
+    "    gt\n",
+    "    .cols_label(column_headers_with_str)\n",
+    "    .cols_align(align=\"center\", columns=[\"10_str\", \"20_str\", \"30_str\", \"40_str\", \"60_str\", \"total_str\"])\n",
+    "    .cols_align(align=\"left\", columns=[\"label\"])\n",
+    "    .cols_width(cases={\"label\": \"35%\"})\n",
+    ")\n",
+    "html_data.append(gt.as_raw_html())\n",
+    "gt.show()"
+   ],
+   "id": "59be72121202df15",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# Table 6: Any OGTT>11.1 ever\n",
+    "row_df = df_glucose[df_glucose.ogtt_value>=11.1].copy()\n",
+    "table_df = get_row_df(row_df, \"Total (n)\")\n",
+    "df_table6 = format_table_df(table_df)\n",
+    "df_table = df_table6[:1].fillna(0.0).copy().reset_index(drop=True)\n",
+    "gt = df_as_great_table(df_table, title=\"Table 6: Any OGTT>11.1 ever\")\n",
+    "gt = (\n",
+    "    gt\n",
+    "    .cols_label(column_headers_with_str)\n",
+    "    .cols_align(align=\"center\", columns=[\"10_str\", \"20_str\", \"30_str\", \"40_str\", \"60_str\", \"total_str\"])\n",
+    "    .cols_align(align=\"left\", columns=[\"label\"])\n",
+    "    .cols_width(cases={\"label\": \"35%\"})\n",
+    ")\n",
+    "html_data.append(gt.as_raw_html())\n",
+    "gt.show()"
+   ],
+   "id": "f016ddbe736c2f93",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# func for table 7\n",
+    "def get_table7_df(df_source:pd.DataFrame, visit_code:float)->pd.DataFrame:\n",
+    "    df_month = df_source[(df_source.visit_code>=visit_code) & (df_source.visit_code<=visit_code + 0.9)].copy()\n",
+    "\n",
+    "    row_df = df_month.copy()\n",
+    "    table_df = get_row_df(row_df, \"Total (n)\")\n",
+    "\n",
+    "    row_df = df_month[(df_month.fbg_value<6.1)].copy()\n",
+    "    table_df = pd.concat([table_df, get_row_df(row_df, \"FBG <6.1\")])\n",
+    "\n",
+    "    row_df = df_month[(df_month.fbg_value>=6.1) & (df_month.fbg_value<7.0)].copy()\n",
+    "    table_df = pd.concat([table_df, get_row_df(row_df, \"FBG >=6.1 <7.0\")])\n",
+    "\n",
+    "    row_df = df_month[(df_month.fbg_value>=7.0)].copy()\n",
+    "    table_df = pd.concat([table_df, get_row_df(row_df, \"FBG >=7.0\")])\n",
+    "    return table_df"
+   ],
+   "id": "6193907cc12f5b5c",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# Table 7: Interim FBG results\n",
+    "df_table7 = get_table7_df(df_glucose, 1150.0)\n",
+    "df_table7 = format_table_df(df_table7, add_totals=False)\n",
+    "df_table7[\"visit_code\"] = MONTH15\n",
+    "\n",
+    "df_table71 = get_table7_df(df_glucose, 1180.0)\n",
+    "df_table71 = format_table_df(df_table71, add_totals=False)\n",
+    "df_table71[\"visit_code\"] = MONTH18\n",
+    "\n",
+    "df_table72 = get_table7_df(df_glucose, 1210.0)\n",
+    "df_table72 = format_table_df(df_table72, add_totals=False)\n",
+    "df_table72[\"visit_code\"] = MONTH21\n",
+    "\n",
+    "df_table73 = get_table7_df(df_glucose, 1270.0)\n",
+    "df_table73 = format_table_df(df_table73, add_totals=False)\n",
+    "df_table73[\"visit_code\"] = MONTH27\n",
+    "\n",
+    "df_table74 = get_table7_df(df_glucose, 1300.0)\n",
+    "df_table74 = format_table_df(df_table74, add_totals=False)\n",
+    "df_table74[\"visit_code\"] = MONTH30\n",
+    "\n",
+    "df_table75 = get_table7_df(df_glucose, 1330.0)\n",
+    "df_table75 = format_table_df(df_table75, add_totals=False)\n",
+    "df_table75[\"visit_code\"] = MONTH33\n",
+    "\n",
+    "df_table76 = get_table7_df(df_glucose, 1390.0)\n",
+    "df_table76 = format_table_df(df_table76, add_totals=False)\n",
+    "df_table76[\"visit_code\"] = MONTH39\n",
+    "\n",
+    "df_table = pd.concat([df_table7, df_table71, df_table72, df_table73, df_table74, df_table75, df_table76])\n",
+    "df_table = df_table.reset_index(drop=True)\n",
+    "df_table = df_table.fillna(0.0)"
+   ],
+   "id": "16adb1f965081358",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "column_headers_with_str = {\"visit_code\": \"Visit Code\", **column_headers_with_str}\n",
+    "gt = df_as_great_table2(df_table, title=\"Table 7: Interim FBG results\")\n",
+    "gt = (\n",
+    "    gt\n",
+    "    .cols_label(column_headers_with_str)\n",
+    "    .cols_move_to_start(columns=\"visit_code\")\n",
+    "    .cols_align(align=\"center\", columns=[\"10_str\", \"20_str\", \"30_str\", \"40_str\", \"60_str\", \"total_str\"])\n",
+    "    .cols_align(align=\"left\", columns=[\"visit_code\", \"label\"])\n",
+    "    .cols_width(cases={\"label\": \"15%\"})\n",
+    "    .tab_style(\n",
+    "        style=[\n",
+    "            style.text(color=\"black\", weight=\"bold\"),\n",
+    "            style.fill(color=\"lightgray\")\n",
+    "        ],\n",
+    "        locations=loc.row_groups()\n",
+    "    )\n",
+    ")\n",
+    "html_data.append(gt.as_raw_html())\n",
+    "gt.show()"
+   ],
+   "id": "81bcfe52d364b646",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# Table 8: Primary Endpoint met\n",
+    "df_endpoint_grp = df_endpoint.groupby(by=[\"site_id\", \"endpoint_label\"]).size().to_frame().reset_index()\n",
+    "df_endpoint_grp.columns = [\"site_id\", \"label\", \"endpoints\"]\n",
+    "df_endpoint_pivot = df_endpoint_grp.pivot_table(index=\"label\", columns=\"site_id\", values=\"endpoints\").reset_index()\n",
+    "df_endpoint_pivot.columns.name = \"\"\n",
+    "df_endpoint_pivot.columns = ['label', \"10\", \"20\", \"30\", \"40\", \"60\"]\n",
+    "df_endpoint_pivot.loc[len(df_endpoint_pivot)] = df_endpoint_pivot[['10', '20', '30', '40', '60']].sum().to_dict()\n",
+    "df_endpoint_pivot.at[len(df_endpoint_pivot)-1, 'label'] = 'Total'\n",
+    "df_endpoint_pivot['total'] = df_endpoint_pivot[['10', '20', '30', '40', '60']].sum(axis=1)\n",
+    "df_endpoint_pivot = df_endpoint_pivot.fillna(0.0)\n",
+    "\n",
+    "gt = df_as_great_table(\n",
+    "    df_endpoint_pivot,\n",
+    "    title=\"Table 8: Primary Endpoint met\"\n",
+    ")\n",
+    "gt = (\n",
+    "    gt\n",
+    "    .cols_label({k:v for k, v in column_headers.items() if k not in [\"visit_code\"]})\n",
+    "    .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
+    "    .cols_align(align=\"left\", columns=[\"label\"])\n",
+    "    .cols_width(cases={\"label\": \"25%\"})\n",
+    ")\n",
+    "html_data.append(gt.as_raw_html())\n",
+    "gt.show()"
+   ],
+   "id": "37904c7ce49724e6",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": "#read_frame(SubjectScheduleHistory.objects.filter(offschedule_model=\"meta_prn.offschedule\"), verbose=False).rename(columns={\"site\": \"site_id\"})",
+   "id": "562a1cc911a71255",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "from great_tables import html\n",
+    "\n",
+    "# Table 8a: Primary Endpoint no EOS or DM Referral\n",
+    "df_subjecthistory = read_frame(SubjectScheduleHistory.objects.filter(offschedule_model=\"meta_prn.offschedule\", offschedule_datetime__isnull=False), verbose=False).rename(columns={\"site\": \"site_id\"})\n",
+    "df_subjecthistory[\"site_id\"] = df_subjecthistory[\"site_id\"].astype(str)\n",
+    "df_endpoint_no_off = df_endpoint.merge(df_subjecthistory[[\"subject_identifier\", \"offschedule_datetime\"]], on=[\"subject_identifier\"], how=\"left\")\n",
+    "df_endpoint_grp = df_endpoint_no_off.query(\"offschedule_datetime.isna()\").groupby(by=[\"site_id\", \"endpoint_label\"]).size().to_frame().reset_index()\n",
+    "df_endpoint_grp.columns = [\"site_id\", \"label\", \"endpoints\"]\n",
+    "df_endpoint_pivot = df_endpoint_grp.pivot_table(index=\"label\", columns=\"site_id\", values=\"endpoints\").reset_index()\n",
+    "df_endpoint_pivot.columns.name = \"\"\n",
+    "df_endpoint_pivot.columns = ['label', *[str(col) for col in df_endpoint_pivot.columns if col != \"label\"]]\n",
+    "for col in [c for c in ['label', \"10\", \"20\", \"30\", \"40\", \"60\"] if str(c) not in df_endpoint_pivot.columns]:\n",
+    "    df_endpoint_pivot[str(col)] = np.nan\n",
+    "df_endpoint_pivot.columns = ['label', \"10\", \"20\", \"30\", \"40\", \"60\"]\n",
+    "df_endpoint_pivot.loc[len(df_endpoint_pivot)] = df_endpoint_pivot[['10', '20', '30', '40', '60']].sum().to_dict()\n",
+    "df_endpoint_pivot.at[len(df_endpoint_pivot)-1, 'label'] = 'Total'\n",
+    "df_endpoint_pivot['total'] = df_endpoint_pivot[['10', '20', '30', '40', '60']].sum(axis=1)\n",
+    "df_endpoint_pivot = df_endpoint_pivot.fillna(0.0)\n",
+    "subjects = df_endpoint_no_off.query(\"offschedule_datetime.isna()\").subject_identifier.to_list()\n",
+    "\n",
+    "gt = df_as_great_table(\n",
+    "    df_endpoint_pivot,\n",
+    "    title=\"Table 8a: Primary Endpoint met -- participant not referred\"\n",
+    ")\n",
+    "gt = (\n",
+    "    gt\n",
+    "    .cols_label({k:v for k, v in column_headers.items() if k not in [\"visit_code\"]})\n",
+    "    .cols_align(align=\"center\", columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"])\n",
+    "    .cols_align(align=\"left\", columns=[\"label\"])\n",
+    "    .cols_width(cases={\"label\": \"25%\"})\n",
+    "    .tab_source_note(source_note=html(\"<BR>\".join(subjects)))\n",
+    ")\n",
+    "html_data.append(gt.as_raw_html())\n",
+    "gt.show()"
+   ],
+   "id": "a74cd253ec5827f9",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": "",
+   "id": "4df949cb48b088d7",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# Table 9: Incident Rate per 1000 person years\n",
+    "\n",
+    "def get_df_main(df_visit:pd.DataFrame, lower_days:float|None=None, upper_days:float|None=None):\n",
+    "    if not lower_days:\n",
+    "        lower_days = -1\n",
+    "    cutoff_datetime = df_visit.query(\"@lower_days<followup_days<=@upper_days\").visit_datetime.max()\n",
+    "    # exclude subjects for this reason\n",
+    "    offstudy_reasons = ['Patient fulfilled late exclusion criteria (due to abnormal blood values or raised blood pressure at enrolment']\n",
+    "\n",
+    "    df_eos = get_eos_df()\n",
+    "    df_eos_excluded = (\n",
+    "        df_eos\n",
+    "        .query(\"followup_days>@lower_days and followup_days<=@upper_days and offstudy_reason.isin(@offstudy_reasons)\")\n",
+    "        .copy()\n",
+    "        .reset_index()\n",
+    "    )\n",
+    "    df_visit_final = (\n",
+    "        df_visit.query(\"@lower_days<followup_days<=@upper_days and reason!='missed' and visit_code<2000.0\")\n",
+    "        .merge(df_eos_excluded[[\"subject_identifier\"]], on=\"subject_identifier\", how=\"left\", suffixes=(\"\", \"_y\"), indicator=True)\n",
+    "        .query(\"_merge=='left_only'\")\n",
+    "        .drop(columns=[\"_merge\"])\n",
+    "    )\n",
+    "    df_main = (\n",
+    "        df_visit_final\n",
+    "        .groupby(by=[\"subject_identifier\"])[[\"baseline_datetime\", \"visit_datetime\", \"followup_days\"]]\n",
+    "        .max()\n",
+    "        .reset_index()\n",
+    "    )\n",
+    "\n",
+    "    df_main = (\n",
+    "        df_main\n",
+    "        .merge(\n",
+    "            df_endpoint.query(\"days_to_endpoint>@lower_days\")[[\"subject_identifier\", \"endpoint_label\", \"endpoint_type\", \"days_to_endpoint\"]],\n",
+    "            how=\"left\",\n",
+    "            on=[\"subject_identifier\"])\n",
+    "        .reset_index(drop=True)\n",
+    "    )\n",
+    "    if lower_days>=365.25:\n",
+    "        df_main[\"followup_days\"] = df_main[\"followup_days\"] - lower_days\n",
+    "    df_main[\"followup_years\"] = df_main[\"followup_days\"]/365.25\n",
+    "    return df_main, len(df_main), len(df_main.query(\"@lower_days<days_to_endpoint<=@upper_days and endpoint_label.notna()\"))\n",
+    "\n",
+    "def get_rate_and_ci(events, person_years_total):\n",
+    "    lower_ci = (chi2.ppf(0.025, 2 * events) / (2 * person_years_total)) * 1000\n",
+    "    upper_ci = (chi2.ppf(0.975, 2 * (events + 1)) / (2 * person_years_total)) * 1000\n",
+    "    return events/person_years_total*1000, lower_ci, upper_ci\n",
+    "\n",
+    "def get_incidence_data(term:str, lower_days:float, upper_days:float):\n",
+    "    data = {}\n",
+    "    df_main, subjects, events = get_df_main(df_visit, lower_days=lower_days, upper_days=upper_days)\n",
+    "    person_years_total = df_main.followup_years.sum()\n",
+    "    data.update({term:[person_years_total, subjects, events, *get_rate_and_ci(events, person_years_total)]})\n",
+    "    return data"
+   ],
+   "id": "920db81ad440edab",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "incidence_data = {}\n",
+    "incidence_data.update(get_incidence_data(\"total\", lower_days=-1, upper_days=10000))\n",
+    "incidence_data.update(get_incidence_data(\"0-1 years\", lower_days=-1, upper_days=365.25))\n",
+    "incidence_data.update(get_incidence_data(\"1-2 years\", lower_days=365.25, upper_days=2 * 365.25))\n",
+    "incidence_data.update(get_incidence_data(\"2-3 years\", lower_days=2 * 365.25, upper_days=3 * 365.25))\n",
+    "incidence_data.update(get_incidence_data(\"3+ years\", lower_days=3 * 365.25, upper_days=10 * 365.25))\n",
+    "data = dict(label=[], person_years=[], subjects=[], failures=[], rate=[], lower_ci=[], upper_ci=[])\n",
+    "for k in incidence_data:\n",
+    "    data[\"label\"].append(k)\n",
+    "\n",
+    "for v in incidence_data.values():\n",
+    "    data[\"person_years\"].append(v[0])\n",
+    "    data[\"subjects\"].append(v[1])\n",
+    "    data[\"failures\"].append(v[2])\n",
+    "    data[\"rate\"].append(v[3])\n",
+    "    data[\"lower_ci\"].append(v[4])\n",
+    "    data[\"upper_ci\"].append(v[5])\n",
+    "\n",
+    "df_table9 = pd.DataFrame(data={k:v for k,v in data.items() if k!=\"subjects\"})"
+   ],
+   "id": "44651e865641b75d",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "gt = df_as_great_table(\n",
+    "    df_table9,\n",
+    "    title=\"Table 9: Incident Rate per 1000 person years\",\n",
+    "    subtitle=md(\"using randomisation to diabetes/last seen\"),\n",
+    ")\n",
+    "gt = gt.fmt_number(columns=[\"person_years\", \"failures\", \"rate\", \"lower_ci\", \"upper_ci\"], decimals=2)\n",
+    "gt = (gt\n",
+    "    .cols_label({\"label\": \"Label\", \"person_years\": \"Person years\", \"failures\": \"Failures\", \"rate\": \"Rate\", \"lower_ci\": \"Lower\", \"upper_ci\": \"Upper\"})\n",
+    "    .cols_align(align=\"left\", columns=[\"label\"])\n",
+    "    .cols_align(align=\"center\", columns=[\"person_years\", \"failures\", \"rate\", \"lower_ci\", \"upper_ci\"])\n",
+    "    .tab_spanner(\n",
+    "        label=\"95%CI\",\n",
+    "        columns=[\"lower_ci\", \"upper_ci\"],\n",
+    "    )\n",
+    "    .tab_source_note(source_note=\"Excluding patients withdrawn for `late exclusion` criteria\")\n",
+    ")\n",
+    "gt.show()\n",
+    "html_data.append(gt.as_raw_html())"
+   ],
+   "id": "da4e67d83522768a",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# Table 10: Proportion meeting primary endpoint\n",
+    "df_table10 = pd.DataFrame(data=data)\n",
+    "df_table10[\"proportion\"] = df_table10[\"failures\"]/df_table10[\"subjects\"]*100\n",
+    "gt = df_as_great_table(\n",
+    "    df_table10[[\"label\", \"subjects\", 'failures', \"proportion\"]],\n",
+    "    title=\"Table 10: Proportion meeting primary endpoint\",\n",
+    ")\n",
+    "gt = (\n",
+    "    gt\n",
+    "    .fmt_number(columns=[\"failures\", \"proportion\"], decimals=2)\n",
+    "    .cols_label({\"label\": \"Label\", \"subjects\": \"Participants\", \"failures\": \"Failures\", \"proportion\": \"%\"})\n",
+    "    .cols_align(align=\"left\", columns=[\"label\"])\n",
+    "    .cols_align(align=\"center\", columns=[\"subjects\", \"failures\", \"proportion\"])\n",
+    "    .tab_source_note(source_note=\"Excluding patients withdrawn for `late exclusion` criteria\")\n",
+    ")\n",
+    "html_data.append(gt.as_raw_html())\n",
+    "gt.show()\n"
+   ],
+   "id": "2376a30803fbc743",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": "",
+   "id": "afc730c0bd9d03aa",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# Table 11a: End of Study Table (for those who have completed an end of study form)\n",
+    "df_eos = get_eos_df()\n",
+    "offstudy_reasons = {\n",
+    "    \"Delivered / Completed followup from pregnancy\": \"Pregnancy\",\n",
+    "    \"Patient completed 36 months of follow-up\": \"Completed 36m\",\n",
+    "    \"Patient developed diabetes\": \"Developed diabetes\",\n",
+    "    \"Other reason (specify below)\": \"Other\",\n",
+    "    \"Patient fulfilled late exclusion criteria (due to abnormal blood values or raised blood pressure at enrolment\": \"Late exclusion\",\n",
+    "    \"Patient has been transferred to another health centre\": \"Transferred out\",\n",
+    "    \"Patient is withdrawn on CLINICAL grounds ...\": \"Withdrawal: Clinical grounds\",\n",
+    "    \"Patient lost to follow-up\": \"LTFU\",\n",
+    "    \"Patient reported/known to have died\": \"Died\",\n",
+    "    \"Patient withdrew consent to participate further\": \"Withdrawal: Consent\",\n",
+    "}\n",
+    "df_eos[\"offstudy_reason\"] = df_eos[\"offstudy_reason\"].map(offstudy_reasons)\n",
+    "df_eos[\"offstudy_reason\"] = pd.Categorical(df_eos[\"offstudy_reason\"], categories=sorted(list(offstudy_reasons.values())), ordered=True)\n",
+    "df_eos[\"site_id\"] = df_eos[\"site_id\"].astype(str)\n",
+    "df_eos_pivot = (\n",
+    "    df_eos\n",
+    "    .groupby(by=[\"offstudy_reason\", \"site_id\"],observed=True)\n",
+    "    .size()\n",
+    "    .reset_index()\n",
+    "    .pivot_table(index=\"offstudy_reason\", columns=\"site_id\", values=0, observed=True)\n",
+    "    .fillna(0)\n",
+    "    .astype(int)\n",
+    "    .reset_index()\n",
+    ")\n",
+    "df_eos_pivot[\"total\"] = df_eos_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1)\n",
+    "df_eos_pivot.columns.name=\"\"\n",
+    "sum_row = df_eos_pivot.select_dtypes(include='int64').sum()\n",
+    "sum_row['offstudy_reason'] = 'Total'\n",
+    "sum_row_df = pd.DataFrame(sum_row).T\n",
+    "enrolled_pivot[\"offstudy_reason\"] = \"Enrolled\"\n",
+    "enrolled_pivot = enrolled_pivot[[*df_eos_pivot.columns]]\n",
+    "df_eos_pivot = pd.concat([enrolled_pivot, df_eos_pivot, sum_row_df], ignore_index=True)\n",
+    "\n",
+    "gt = df_as_great_table(\n",
+    "    df_eos_pivot,\n",
+    "    title=\"Table 11a: End of study report\",\n",
+    "    subtitle=md(\"for those who have completed an End of study report\"),\n",
+    ")\n",
+    "gt = (\n",
+    "    gt\n",
+    "    .cols_label({\"offstudy_reason\": \"Reason\", **{k:v for k,v in column_headers.items() if k not in [\"visit_code\", \"label\"]}})\n",
+    "    .cols_align(align=\"left\", columns=[\"offstudy_reason\"])\n",
+    "    .cols_align(align=\"center\", columns=[\"10\", \"20\",\"30\",\"40\",\"60\", \"total\"])\n",
+    "    .tab_style(\n",
+    "        style=[style.fill(color=\"snow\"), style.text(color=\"black\")],\n",
+    "        locations=loc.body(\n",
+    "            columns=[0],\n",
+    "            rows=[len(df_eos_pivot)-1]),\n",
+    "        )\n",
+    "    .tab_style(\n",
+    "        style=[style.fill(color=\"lightblue\"), style.text(color=\"black\")],\n",
+    "        locations=loc.body(\n",
+    "            columns=[\"10\", \"20\", \"30\", \"40\", \"60\"],\n",
+    "            rows=[len(df_eos_pivot)-1],\n",
+    "        ),\n",
+    "    )\n",
+    "    .tab_style(\n",
+    "        style=[style.fill(color=\"lightgreen\"), style.text(color=\"black\")],\n",
+    "        locations=loc.body(\n",
+    "            columns=[\"total\"],\n",
+    "            rows=[len(df_eos_pivot)-1],\n",
+    "        ),\n",
+    "    )\n",
+    "    .tab_style(\n",
+    "        style=[style.fill(color=\"snow\"), style.text(color=\"black\")],\n",
+    "        locations=loc.body(\n",
+    "            columns=[\"offstudy_reason\"],\n",
+    "            rows=[0],\n",
+    "        ),\n",
+    "    )\n",
+    ")\n",
+    "html_data.append(gt.as_raw_html())\n",
+    "gt.show()\n"
+   ],
+   "id": "37dcd320411bd9c5",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": "",
+   "id": "9acdb7515d1490a3",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# Table 11b: Study status\n",
+    "def get_schedule_df(df_subjecthistory:pd.DataFrame, onschedule_model:str, offschedule_model:str, mode:str)->pd.DataFrame:\n",
+    "    columns = {k:f\"{k}_{mode}\" for k in [\"10\", \"20\", \"30\", \"40\", \"60\"]}\n",
+    "    df_schedule = (\n",
+    "        df_subjecthistory\n",
+    "        .query(f\"onschedule_model==@onschedule_model and offschedule_model==@offschedule_model and offschedule_datetime.{'isna' if mode=='on' else 'notna'}()\")\n",
+    "        .groupby(by=[\"onschedule_model\", \"site_id\"])\n",
+    "        .size()\n",
+    "        .reset_index()\n",
+    "        .pivot_table(index=\"onschedule_model\", columns=\"site_id\", values=0, observed=True)\n",
+    "        .reset_index()\n",
+    "        .rename(columns={\"onschedule_model\":\"schedule\", **columns})\n",
+    "        .fillna(0)\n",
+    "        .copy()\n",
+    "    )\n",
+    "    df_schedule.columns.name = \"\"\n",
+    "    return df_schedule\n",
+    "\n",
+    "df_subjecthistory = read_frame(SubjectScheduleHistory.objects.all(), verbose=False).rename(columns={\"site\": \"site_id\"})\n",
+    "df_subjecthistory[\"site_id\"] = df_subjecthistory[\"site_id\"].astype(str)\n",
+    "\n",
+    "df_on = pd.concat([\n",
+    "    get_schedule_df(df_subjecthistory, 'meta_prn.onschedule', 'meta_prn.offschedule', \"on\"),\n",
+    "    get_schedule_df(df_subjecthistory, 'meta_prn.onscheduledmreferral', 'meta_prn.offscheduledmreferral', \"on\"),\n",
+    "    get_schedule_df(df_subjecthistory, 'meta_prn.onschedulepregnancy', 'meta_prn.offschedulepregnancy', \"on\"),\n",
+    "])\n",
+    "\n",
+    "df_on = (\n",
+    "    df_on\n",
+    "    .fillna(0)\n",
+    "    .reset_index(drop=True)\n",
+    ")\n",
+    "\n",
+    "df_off = pd.concat([\n",
+    "    get_schedule_df(df_subjecthistory, 'meta_prn.onschedule', 'meta_prn.offschedule', \"off\"),\n",
+    "    get_schedule_df(df_subjecthistory, 'meta_prn.onscheduledmreferral', 'meta_prn.offscheduledmreferral', \"off\"),\n",
+    "    get_schedule_df(df_subjecthistory, 'meta_prn.onschedulepregnancy', 'meta_prn.offschedulepregnancy', \"off\"),\n",
+    "])\n",
+    "df_off = (\n",
+    "    df_off\n",
+    "    .fillna(0)\n",
+    "    .reset_index(drop=True)\n",
+    ")\n",
+    "\n",
+    "df_status = pd.merge(df_on, df_off, on=[\"schedule\"], how=\"outer\")\n",
+    "columns = []\n",
+    "for ele in [[f\"{x}_on\", f\"{x}_off\"] for x in [\"10\", \"20\", \"30\", \"40\", \"60\"]]:\n",
+    "    columns.extend(ele)\n",
+    "df_status = df_status[[\"schedule\", *columns]]\n",
+    "df_status[\"total_on\"] = df_status[[col for col in columns if \"on\" in col]].sum(axis=1)\n",
+    "df_status[\"total_off\"] = df_status[[col for col in columns if \"off\" in col]].sum(axis=1)\n",
+    "df_status[\"total\"] = df_status[columns].sum(axis=1)\n",
+    "df_status[\"schedule\"] = df_status.schedule.map({\"meta_prn.onschedule\": \"Main trial\", \"meta_prn.onscheduledmreferral\": \"Diabetes\", \"meta_prn.onschedulepregnancy\": \"Pregnancy\"})\n",
+    "\n",
+    "gt = df_as_great_table(\n",
+    "    df_status,\n",
+    "    title=\"Table 11b: Study status\",\n",
+    "    subtitle=md(\"Calculated from Offschedule form; not End of study report\"),\n",
+    ")\n",
+    "# gt = gt.fmt_number(columns=[\"person_years\", \"failures\", \"rate\", \"lower_ci\", \"upper_ci\"], decimals=0)\n",
+    "gt = (gt\n",
+    "    .tab_source_note(\n",
+    "        source_note=(\n",
+    "            \"Note: Offschedule form is always submitted before the End of study report. \"\n",
+    "            \"When the Offschedule form is submitted, future appointments for the schedule are removed and \"\n",
+    "            \"the site staff are actioned to submit the End of study report.\"\n",
+    "        )\n",
+    "    )\n",
+    "    .cols_label({\n",
+    "        \"10_on\": \"On\", \"10_off\": \"Off\",\n",
+    "        \"20_on\": \"On\", \"20_off\": \"Off\",\n",
+    "        \"30_on\": \"On\", \"30_off\": \"Off\",\n",
+    "        \"40_on\": \"On\", \"40_off\": \"Off\",\n",
+    "        \"60_on\": \"On\", \"60_off\": \"Off\",\n",
+    "        \"total_on\": \"On\", \"total_off\": \"Off\",\n",
+    "        \"schedule\": \"Schedule\", \"total\": \"Total\"})\n",
+    "    .cols_align(align=\"center\")\n",
+    "    .cols_align(align=\"left\", columns=[\"label\"])\n",
+    "    .tab_spanner(\n",
+    "        label=\"Hindu mandal\",\n",
+    "        columns=[\"10_on\", \"10_off\"],\n",
+    "    )\n",
+    "    .tab_spanner(\n",
+    "        label=\"Amana\",\n",
+    "        columns=[\"20_on\", \"20_off\"],\n",
+    "    )\n",
+    "    .tab_spanner(\n",
+    "        label=\"Temeke\",\n",
+    "        columns=[\"30_on\", \"30_off\"],\n",
+    "    )\n",
+    "    .tab_spanner(\n",
+    "        label=\"Mwananyamala\",\n",
+    "        columns=[\"40_on\", \"40_off\"],\n",
+    "    )\n",
+    "    .tab_spanner(\n",
+    "        label=\"Mnazi Moja\",\n",
+    "        columns=[\"60_on\", \"60_off\"],\n",
+    "    )\n",
+    "    .tab_spanner(\n",
+    "        label=\"Total\",\n",
+    "        columns=[\"total_on\", \"total_off\"],\n",
+    "    )\n",
+    "    .tab_style(\n",
+    "        style=[style.fill(color=\"lightblue\"), style.text(color=\"black\")],\n",
+    "        locations=loc.body(\n",
+    "            columns=[\"10_off\", \"20_off\", \"30_off\", \"40_off\", \"60_off\"],\n",
+    "            rows=list(range(0, 1)),\n",
+    "        ),\n",
+    "    )\n",
+    "    .tab_style(\n",
+    "        style=[style.fill(color=\"lightgreen\"), style.text(color=\"black\")],\n",
+    "        locations=loc.body(\n",
+    "            columns=[\"total_off\"],\n",
+    "            rows=list(range(0, 1)),\n",
+    "        ),\n",
+    "    )\n",
+    "    .fmt_number(columns=[*[c for c in df_status.columns if c not in [\"schedule\"]]], decimals=0)\n",
+    ")\n",
+    "html_data.append(gt.as_raw_html())\n",
+    "gt.show()"
+   ],
+   "id": "35840b9a7971e4cf",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# Table 12: Loss to Follow Up\n",
+    "df_ltfu = read_frame(LossToFollowup.objects.all(), verbose=False).rename(columns={\"site\": \"site_id\"})\n",
+    "df_ltfu_pivot = (\n",
+    "    df_ltfu\n",
+    "    .groupby(by=[\"loss_category\", \"site_id\"],observed=True,dropna=False)\n",
+    "    .size()\n",
+    "    .reset_index()\n",
+    "    .pivot_table(index=\"loss_category\", columns=\"site_id\", values=0, observed=True,dropna=False)\n",
+    "    .fillna(0)\n",
+    "    .astype(int)\n",
+    "    .reset_index()\n",
+    ")\n",
+    "df_ltfu_pivot[\"total\"] = df_eos_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1)\n",
+    "df_ltfu_pivot.columns.name=\"\"\n",
+    "sum_row = df_ltfu_pivot.select_dtypes(include='int64').sum()\n",
+    "sum_row['loss_category'] = 'Total'\n",
+    "sum_row_df = pd.DataFrame(sum_row).T\n",
+    "df_ltfu_pivot = pd.concat([df_ltfu_pivot, sum_row_df], ignore_index=True)\n",
+    "df_ltfu_pivot\n"
+   ],
+   "id": "534c51e7321e2ef3",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# Table 11c: End of study report not submitted\n",
+    "\n",
+    "df1 = (\n",
+    "    df_status\n",
+    "    .query(\"schedule=='Main trial'\")[[col for col in columns if \"off\" in col]]\n",
+    "    .rename(columns=dict(zip([col for col in columns if \"off\" in col], [\"10\", \"20\",\"30\",\"40\",\"60\"])))\n",
+    "    .reset_index(drop=True)\n",
+    ")\n",
+    "df2 = (\n",
+    "    df_eos_pivot\n",
+    "    .query(\"offstudy_reason=='Total'\")[[\"10\", \"20\",\"30\",\"40\",\"60\"]]\n",
+    "    .reset_index(drop=True)\n",
+    ")\n",
+    "\n",
+    "df_eos_not_reported = df1-df2\n",
+    "df_eos_not_reported[\"schedule\"] = 'Main trial'\n",
+    "df_eos_not_reported[\"total\"] = df_eos_not_reported[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1)\n",
+    "df_eos_not_reported = df_eos_not_reported[[\"schedule\", \"10\", \"20\",\"30\",\"40\",\"60\", \"total\"]]\n",
+    "\n",
+    "gt = df_as_great_table(\n",
+    "    df_eos_not_reported,\n",
+    "    title=\"Table 11c: End of study report not submitted\",\n",
+    "    subtitle=md(\"End of study report expected based on Offschedule form\"),\n",
+    ")\n",
+    "gt = (\n",
+    "    gt\n",
+    "    .cols_label({\"schedule\": \"Schedule\", **{k:v for k,v in column_headers.items() if k not in [\"visit_code\", \"label\"]}})\n",
+    "    .cols_align(align=\"left\", columns=[\"schedule\"])\n",
+    "    .cols_align(align=\"center\", columns=[\"10\", \"20\",\"30\",\"40\",\"60\", \"total\"])\n",
+    "    .tab_style(\n",
+    "        style=[style.fill(color=\"snow\"), style.text(color=\"black\")],\n",
+    "        locations=loc.body(\n",
+    "            columns=[0],\n",
+    "            rows=[len(df_eos_pivot)-1]),\n",
+    "        )\n",
+    "    .tab_style(\n",
+    "        style=[style.fill(color=\"lightblue\"), style.text(color=\"black\")],\n",
+    "        locations=loc.body(\n",
+    "            columns=[\"10\", \"20\", \"30\", \"40\", \"60\"],\n",
+    "            rows=[len(df_eos_pivot)-1],\n",
+    "        ),\n",
+    "    )\n",
+    "    .tab_style(\n",
+    "        style=[style.fill(color=\"lightgreen\"), style.text(color=\"black\")],\n",
+    "        locations=loc.body(\n",
+    "            columns=[\"total\"],\n",
+    "            rows=[len(df_eos_pivot)-1],\n",
+    "        ),\n",
+    "    )\n",
+    ")\n",
+    "html_data.append(gt.as_raw_html())\n",
+    "gt.show()\n"
+   ],
+   "id": "25d05831ef76f267",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": "# Table 13: Baseline Sample",
+   "id": "b607a436749cc3b2",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# Table 15: Consented to extended followup\n",
+    "df_consented = (\n",
+    "    read_frame(SubjectConsentV1Ext.objects.all(), verbose=False)\n",
+    "    .query(\"agrees_to_extension==@YES\")\n",
+    "    .rename(columns={\"site\": \"site_id\"})\n",
+    ")\n",
+    "df_consented[\"site_id\"] = df_consented.site_id.astype(str)\n",
+    "df_consented[\"month\"] = df_consented.report_datetime.dt.strftime(\"%m\")\n",
+    "df_consented[\"year\"] = df_consented.report_datetime.dt.strftime(\"%Y\")\n",
+    "df_consented_grp = (\n",
+    "    df_consented.groupby(by=[\"site_id\", \"year\", \"month\"]).\n",
+    "    size()\n",
+    "    .reset_index()\n",
+    "    .sort_values(by=[\"site_id\", \"year\", \"month\"], ascending=True)\n",
+    "    .reset_index(drop=True)\n",
+    ")\n",
+    "df_consented_pivot = (\n",
+    "    df_consented_grp\n",
+    "    .pivot_table(index=[\"year\", \"month\"], columns=\"site_id\", values=0, aggfunc=\"sum\")\n",
+    "    .reset_index()\n",
+    "    .fillna(0)\n",
+    ")\n",
+    "if \"60\" not in df_consented_pivot.columns:\n",
+    "    df_consented_pivot[\"60\"] = 0.0 * len(df_consented_pivot)\n",
+    "df_consented_pivot.columns.name=\"\"\n",
+    "df_consented_pivot[\"year\"] = df_consented_pivot[\"year\"].astype(str)\n",
+    "df_consented_pivot[\"month\"] = df_consented_pivot[\"month\"].astype(str)\n",
+    "\n",
+    "sum_row = df_consented_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum()\n",
+    "sum_row['year'] = \"Total\"\n",
+    "sum_row['month'] = \"\"\n",
+    "df_consented_pivot = pd.concat([df_consented_pivot, sum_row.to_frame().T], ignore_index=True)\n",
+    "df_consented_pivot[\"total\"] = df_consented_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].sum(axis=1).astype(int)\n",
+    "df_consented_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]] = df_consented_pivot[[\"10\", \"20\",\"30\",\"40\",\"60\"]].astype(int)\n",
+    "gt = df_as_great_table2(\n",
+    "    df_consented_pivot,\n",
+    "    title=\"Table 15: Consented to extended followup\",\n",
+    "    rowname_col=\"month\",\n",
+    "    groupname_col=\"year\",\n",
+    ")\n",
+    "gt = (\n",
+    "    gt\n",
+    "    .cols_label({\"year\": \"Year\", \"month\": \"Month\", **{k:v for k, v in column_headers.items() if k not in [\"visit_code\", \"label\"]}})\n",
+    "    .cols_align(align=\"center\")\n",
+    "    .fmt_number(columns=[\"10\", \"20\", \"30\", \"40\", \"60\", \"total\"], decimals=0)\n",
+    "    .tab_stubhead(label=\"Consented\")\n",
+    "    .tab_style(\n",
+    "        style=[\n",
+    "            style.text(color=\"black\", weight=\"bold\"),\n",
+    "            style.fill(color=\"lightgray\")\n",
+    "        ],\n",
+    "        locations=loc.row_groups()\n",
+    "    )\n",
+    ")\n",
+    "html_data.append(gt.as_raw_html())\n",
+    "gt.show()"
+   ],
+   "id": "8acd2dd7e5a958e9",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": "",
+   "id": "45ee71bc4a06f8f7",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# gather raw html\n",
+    "raw_html = [f'<div class=\"page-break\">{s}</div>' for s in html_data]\n",
+    "style_css = \"\"\"\n",
+    "<style>\n",
+    "  .page-break {\n",
+    "    page-break-inside: avoid; /* Always add page break before this element */\n",
+    "  }\n",
+    "  .table-header {\n",
+    "    font-weight: bold;\n",
+    "    font-size: 18px;\n",
+    "    text-align: center;\n",
+    "    border-bottom: None;\n",
+    "  }\n",
+    "</style>\n",
+    "\"\"\"\n",
+    "raw_html = ''.join(raw_html)\n",
+    "raw_html = f'<!DOCTYPE html>\\n<html lang=\"en\">\\n{style_css}\\n<head>\\n<meta charset=\"utf-8\"/>\\n</head>\\n<body>\\n' + document_title + raw_html + '\\n</body>\\n</html>\\n'"
+   ],
+   "id": "a38e9d7ba59d063b",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": [
+    "# render html to PDF\n",
+    "pdfkit.from_string(raw_html, str(analysis_folder / pdf_filename),\n",
+    "options={\n",
+    "    'footer-center': 'Page [page] of [topage]',\n",
+    "    'footer-font-size': '8',\n",
+    "    'footer-spacing': '5',\n",
+    "    'encoding': \"UTF-8\",\n",
+    "    'margin-top':'10mm',\n",
+    "    'margin-right':'15mm',\n",
+    "    'margin-bottom':'15mm',\n",
+    "    'margin-left':'15mm',\n",
+    "    'header-center': study_title,\n",
+    "    'header-font-size': '6',\n",
+    "    'header-spacing': '0',\n",
+    "    'disable-javascript': None,\n",
+    "    'no-outline': None,\n",
+    "},\n",
+    "verbose=True)"
+   ],
+   "id": "792243aad557cc86",
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "source": "",
+   "id": "4cfdfa6f69c3916a",
+   "outputs": [],
+   "execution_count": null
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

meta-edc 1.0.6__py3-none-any.whl → 1.0.7__py3-none-any.whl

meta-edc 1.0.6py3-none-any.whl → 1.0.7py3-none-any.whl