clinicedc 2.0.12__py3-none-any.whl → 2.0.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of clinicedc might be problematic. Click here for more details.
- {clinicedc-2.0.12.dist-info → clinicedc-2.0.13.dist-info}/METADATA +2 -1
- {clinicedc-2.0.12.dist-info → clinicedc-2.0.13.dist-info}/RECORD +101 -24
- edc_action_item/migrations/0017_auto_20190305_0123.py +1 -1
- edc_action_item/migrations/0030_edcpermissions.py +1 -1
- edc_adverse_event/migrations/0001_initial.py +1 -1
- edc_adverse_event/migrations/0002_auto_20190802_0059.py +1 -1
- edc_adverse_event/migrations/0008_auto_20220825_0451.py +1 -1
- edc_adverse_event/migrations/0009_auto_20220907_0157.py +1 -1
- edc_adverse_event/model_mixins/hospitaization/hospitalization_model_mixin.py +1 -3
- edc_analytics/__init__.py +3 -0
- edc_analytics/apps.py +8 -0
- edc_analytics/constants.py +26 -0
- edc_analytics/custom_tables/__init__.py +11 -0
- edc_analytics/custom_tables/age.py +72 -0
- edc_analytics/custom_tables/art.py +88 -0
- edc_analytics/custom_tables/bmi.py +125 -0
- edc_analytics/custom_tables/bp.py +103 -0
- edc_analytics/custom_tables/fasting.py +126 -0
- edc_analytics/custom_tables/fbg.py +98 -0
- edc_analytics/custom_tables/fbg_ogtt.py +384 -0
- edc_analytics/custom_tables/gender.py +12 -0
- edc_analytics/custom_tables/hba1c.py +87 -0
- edc_analytics/custom_tables/ogtt.py +95 -0
- edc_analytics/custom_tables/waist.py +105 -0
- edc_analytics/data.py +36 -0
- edc_analytics/row/__init__.py +4 -0
- edc_analytics/row/row_definition.py +43 -0
- edc_analytics/row/row_definitions.py +32 -0
- edc_analytics/row/row_statistics.py +88 -0
- edc_analytics/row/row_statistics_with_gender.py +115 -0
- edc_analytics/stata/__init__.py +1 -0
- edc_analytics/stata/get_stata_labels_from_model.py +44 -0
- edc_analytics/styler.py +93 -0
- edc_analytics/table.py +108 -0
- edc_analytics/urls.py +6 -0
- edc_appointment/migrations/0018_auto_20190305_0123.py +1 -1
- edc_auth/migrations/0001_squashed_0033_alter_userprofile_is_multisite_viewer.py +1 -1
- edc_auth/migrations/0012_auto_20191026_0034.py +1 -1
- edc_auth/migrations/0013_auto_20191026_0055.py +1 -1
- edc_auth/migrations/0025_permissions.py +1 -1
- edc_consent/migrations/0001_initial.py +1 -1
- edc_dashboard/migrations/0001_initial.py +1 -1
- edc_data_manager/migrations/0001_initial.py +1 -1
- edc_data_manager/migrations/0025_edcpermissions.py +1 -1
- edc_dx/__init__.py +6 -0
- edc_dx/apps.py +5 -0
- edc_dx/diagnoses.py +250 -0
- edc_dx/form_validators/__init__.py +2 -0
- edc_dx/form_validators/diagnosis_form_validator_mixin.py +54 -0
- edc_dx/form_validators/result_form_validator_mixin.py +65 -0
- edc_dx/utils.py +42 -0
- edc_dx_review/__init__.py +0 -0
- edc_dx_review/apps.py +5 -0
- edc_dx_review/auth_objects.py +13 -0
- edc_dx_review/auths.py +12 -0
- edc_dx_review/choices.py +24 -0
- edc_dx_review/constants.py +7 -0
- edc_dx_review/fieldsets.py +47 -0
- edc_dx_review/form_mixins/__init__.py +3 -0
- edc_dx_review/form_mixins/clinical_review_baseline_required_form_mixin.py +25 -0
- edc_dx_review/form_validator_mixins/__init__.py +6 -0
- edc_dx_review/form_validator_mixins/clinical_review_baseline_form_validator_mixin.py +7 -0
- edc_dx_review/form_validator_mixins/clinical_review_followup_form_validator_mixin.py +25 -0
- edc_dx_review/list_data.py +19 -0
- edc_dx_review/medical_date.py +195 -0
- edc_dx_review/migrations/0001_initial.py +307 -0
- edc_dx_review/migrations/0002_diagnosislocations_extra_value_and_more.py +32 -0
- edc_dx_review/migrations/0003_alter_diagnosislocations_options_and_more.py +148 -0
- edc_dx_review/migrations/0004_remove_diagnosislocations_edc_dx_revi_name_a39b40_idx_and_more.py +20 -0
- edc_dx_review/migrations/__init__.py +0 -0
- edc_dx_review/model_mixins/__init__.py +20 -0
- edc_dx_review/model_mixins/clinical_review_baseline_model_mixin.py +25 -0
- edc_dx_review/model_mixins/clinical_review_followup/__init__.py +5 -0
- edc_dx_review/model_mixins/clinical_review_followup/clinical_review_followup_chol_model_mixin.py +54 -0
- edc_dx_review/model_mixins/clinical_review_followup/clinical_review_followup_dm_model_mixin.py +54 -0
- edc_dx_review/model_mixins/clinical_review_followup/clinical_review_followup_hiv_model_mixin.py +54 -0
- edc_dx_review/model_mixins/clinical_review_followup/clinical_review_followup_htn_model_mixin.py +56 -0
- edc_dx_review/model_mixins/clinical_review_followup/clinical_review_followup_model_mixin.py +25 -0
- edc_dx_review/model_mixins/dx_location_model_mixin.py +17 -0
- edc_dx_review/model_mixins/factory/__init__.py +4 -0
- edc_dx_review/model_mixins/factory/baseline_review_model_mixin_factory.py +55 -0
- edc_dx_review/model_mixins/factory/calculate_date.py +43 -0
- edc_dx_review/model_mixins/factory/dx_initial_review_model_mixin_factory.py +97 -0
- edc_dx_review/model_mixins/factory/followup_review_model_mixin_factory.py +39 -0
- edc_dx_review/model_mixins/factory/rx_initial_review_model_mixin_factory.py +69 -0
- edc_dx_review/model_mixins/followup_review/__init__.py +2 -0
- edc_dx_review/model_mixins/followup_review/followup_review_model_mixin.py +22 -0
- edc_dx_review/model_mixins/followup_review/hiv_followup_review_model_mixin.py +32 -0
- edc_dx_review/model_mixins/initial_review/__init__.py +6 -0
- edc_dx_review/model_mixins/initial_review/chol_initial_review_model_mixin.py +34 -0
- edc_dx_review/model_mixins/initial_review/hiv_initial_model_mixins.py +119 -0
- edc_dx_review/model_mixins/initial_review/ncd_initial_review_model_mixin.py +42 -0
- edc_dx_review/models.py +20 -0
- edc_dx_review/radio_fields.py +30 -0
- edc_dx_review/utils.py +220 -0
- edc_export/migrations/0004_auto_20190305_0123.py +1 -1
- edc_export/migrations/0013_edcpermissions.py +1 -1
- edc_facility/migrations/0005_healthfacility_healthfacilitytypes_and_more.py +1 -1
- edc_vitals/model_mixins/blood_pressure_model_mixin.py +1 -0
- {clinicedc-2.0.12.dist-info → clinicedc-2.0.13.dist-info}/WHEEL +0 -0
- {clinicedc-2.0.12.dist-info → clinicedc-2.0.13.dist-info}/licenses/LICENSE +0 -0
edc_analytics/data.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Data:
|
|
8
|
+
|
|
9
|
+
def __init__(
|
|
10
|
+
self,
|
|
11
|
+
label: str,
|
|
12
|
+
table_df: pd.DataFrame,
|
|
13
|
+
data_df: pd.DataFrame,
|
|
14
|
+
filename_prefix: str,
|
|
15
|
+
folder: str | None = None,
|
|
16
|
+
):
|
|
17
|
+
self.label = label
|
|
18
|
+
self.table_df = table_df
|
|
19
|
+
self.data_df = data_df
|
|
20
|
+
self.filename_prefix = filename_prefix
|
|
21
|
+
self.folder = folder or "~/"
|
|
22
|
+
|
|
23
|
+
def __repr__(self):
|
|
24
|
+
return f"Data({self.label}) <obs={len(self.data_df)}>"
|
|
25
|
+
|
|
26
|
+
def to_csv(
|
|
27
|
+
self, folder: str | None = None, filename: str | None = None, cols: int | None = None
|
|
28
|
+
):
|
|
29
|
+
folder = folder or self.folder
|
|
30
|
+
cols = cols or 5
|
|
31
|
+
datestamp = datetime.now().strftime("%Y%m%d%H%M")
|
|
32
|
+
filename = filename or f"{self.filename_prefix}_table_{self.label}_{datestamp}.csv"
|
|
33
|
+
path = Path(folder) / filename
|
|
34
|
+
self.table_df.iloc[:, :cols].to_csv(
|
|
35
|
+
path_or_buf=path, encoding="utf-8", index=0, sep="|"
|
|
36
|
+
)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
from ..constants import N_WITH_ROW_PROP, STATISTICS
|
|
4
|
+
from ..styler import StylerError
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class RowDefinition:
|
|
8
|
+
def __init__(
|
|
9
|
+
self,
|
|
10
|
+
title: str | None = None,
|
|
11
|
+
label: str = None,
|
|
12
|
+
colname: str | None = None,
|
|
13
|
+
condition: pd.Series = None,
|
|
14
|
+
columns: dict[str, tuple[str, int]] = None,
|
|
15
|
+
drop: bool | None = None,
|
|
16
|
+
):
|
|
17
|
+
"""
|
|
18
|
+
A row definition is passed by Table to the RowStatistics
|
|
19
|
+
class.
|
|
20
|
+
|
|
21
|
+
:param title:
|
|
22
|
+
:param label:
|
|
23
|
+
:param colname:
|
|
24
|
+
:param condition:
|
|
25
|
+
:param columns: dictionary of label: (statistic, places)=
|
|
26
|
+
{
|
|
27
|
+
FEMALE: (N_WITH_ROW_PROP, 2),
|
|
28
|
+
MALE: (N_WITH_ROW_PROP, 2),
|
|
29
|
+
"All": (N_ONLY, 2),
|
|
30
|
+
}
|
|
31
|
+
:param drop: drops rows from the source dataframe once used by
|
|
32
|
+
the row definition.
|
|
33
|
+
"""
|
|
34
|
+
self.title = title or ""
|
|
35
|
+
self.label = label
|
|
36
|
+
self.colname = colname
|
|
37
|
+
self.condition = condition # condition to filter DF
|
|
38
|
+
self.drop = False if drop is None else drop # drop index of previous row numerator
|
|
39
|
+
self.columns = columns or {"All": (N_WITH_ROW_PROP, 2)}
|
|
40
|
+
for col, style_info in self.columns.items():
|
|
41
|
+
style, _ = style_info
|
|
42
|
+
if style not in STATISTICS:
|
|
43
|
+
raise StylerError(f"Unknown statistic. Got `{style}` for column `{col}`.")
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from typing import Iterable
|
|
2
|
+
|
|
3
|
+
from .row_definition import RowDefinition
|
|
4
|
+
from .row_statistics import RowStatistics
|
|
5
|
+
from .row_statistics_with_gender import RowStatisticsWithGender
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class RowDefinitions:
|
|
9
|
+
"""Collection of RowDefinitions"""
|
|
10
|
+
|
|
11
|
+
def __init__(
|
|
12
|
+
self,
|
|
13
|
+
colname: str = None,
|
|
14
|
+
row_statistics_cls: RowStatistics | RowStatisticsWithGender = None,
|
|
15
|
+
reverse_rows: bool = False,
|
|
16
|
+
):
|
|
17
|
+
self.definitions: list[RowDefinition] = []
|
|
18
|
+
self.row_statistics_cls = row_statistics_cls
|
|
19
|
+
self.colname = colname
|
|
20
|
+
self.reverse_rows = reverse_rows
|
|
21
|
+
|
|
22
|
+
def add(self, row_definition: RowDefinition):
|
|
23
|
+
self.definitions.append(row_definition)
|
|
24
|
+
|
|
25
|
+
def extend(self, row_definition: list[RowDefinition]):
|
|
26
|
+
self.definitions.extend(row_definition)
|
|
27
|
+
|
|
28
|
+
def reverse(self):
|
|
29
|
+
self.definitions.reverse()
|
|
30
|
+
|
|
31
|
+
def __iter__(self) -> Iterable[RowDefinition]:
|
|
32
|
+
return iter(self.definitions)
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from pandas.api.types import is_numeric_dtype
|
|
4
|
+
|
|
5
|
+
from ..constants import COUNT_COLUMN, N_ONLY
|
|
6
|
+
from ..styler import Styler
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class RowStatistics:
|
|
10
|
+
"""A class that calculates descriptive statistics for an
|
|
11
|
+
indictor.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
colname: str = None,
|
|
17
|
+
df_numerator: pd.DataFrame = None,
|
|
18
|
+
df_denominator: pd.DataFrame = None,
|
|
19
|
+
df_all: pd.DataFrame = None,
|
|
20
|
+
coltotal: float | int | None = None,
|
|
21
|
+
style: str | None = None,
|
|
22
|
+
places: int | None = None,
|
|
23
|
+
):
|
|
24
|
+
self.places = 2 if places is None else places
|
|
25
|
+
self.style = style or N_ONLY
|
|
26
|
+
|
|
27
|
+
# counts (6 columns)
|
|
28
|
+
self.count = 0.0 if df_numerator.empty else len(df_numerator)
|
|
29
|
+
self.total = len(df_all)
|
|
30
|
+
self.coltotal = coltotal or len(df_denominator)
|
|
31
|
+
self.rowtotal = self.count # rowtotal or len(df_denominator)
|
|
32
|
+
self.colprop = self.count / self.coltotal if self.count else 0.0
|
|
33
|
+
self.rowprop = self.count / self.total if self.count else 0.0
|
|
34
|
+
|
|
35
|
+
# numeric stats (9 columns)
|
|
36
|
+
if colname and not df_numerator.empty and is_numeric_dtype(df_numerator[colname]):
|
|
37
|
+
stats = df_numerator[colname].describe()
|
|
38
|
+
self.mean = stats.loc["mean"]
|
|
39
|
+
self.sd = stats.loc["std"]
|
|
40
|
+
self.min = stats.loc["min"]
|
|
41
|
+
self.max = stats.loc["max"]
|
|
42
|
+
self.q25, self.q50, self.q75 = df_numerator[colname].quantile([0.25, 0.50, 0.75])
|
|
43
|
+
stats = df_numerator[colname].agg(["mean", "sem"])
|
|
44
|
+
self.ci95l = stats.loc["mean"] - 1.96 * stats.loc["sem"]
|
|
45
|
+
self.ci95h = stats.loc["mean"] + 1.96 * stats.loc["sem"]
|
|
46
|
+
else:
|
|
47
|
+
(
|
|
48
|
+
self.mean,
|
|
49
|
+
self.sd,
|
|
50
|
+
self.min,
|
|
51
|
+
self.max,
|
|
52
|
+
self.q25,
|
|
53
|
+
self.q50,
|
|
54
|
+
self.q75,
|
|
55
|
+
self.ci95l,
|
|
56
|
+
self.ci95h,
|
|
57
|
+
) = [np.nan] * 9
|
|
58
|
+
|
|
59
|
+
def values_list(self) -> list:
|
|
60
|
+
return list(self.as_dict().values())
|
|
61
|
+
|
|
62
|
+
def labels(self) -> list:
|
|
63
|
+
return list(self.as_dict().keys())
|
|
64
|
+
|
|
65
|
+
def as_dict(self):
|
|
66
|
+
return {
|
|
67
|
+
COUNT_COLUMN: self.count,
|
|
68
|
+
"coltotal": self.coltotal,
|
|
69
|
+
"rowtotal": self.rowtotal,
|
|
70
|
+
"total": self.total,
|
|
71
|
+
"colprop": self.colprop,
|
|
72
|
+
"rowprop": self.rowprop,
|
|
73
|
+
"mean": self.mean,
|
|
74
|
+
"sd": self.sd,
|
|
75
|
+
"min": self.min,
|
|
76
|
+
"max": self.max,
|
|
77
|
+
"q25": self.q25,
|
|
78
|
+
"q50": self.q50,
|
|
79
|
+
"q75": self.q75,
|
|
80
|
+
"ci95l": self.ci95l,
|
|
81
|
+
"ci95h": self.ci95h,
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
def formatted_cell(self) -> str:
|
|
85
|
+
return Styler(style=self.style, statistics=self, places=self.places).value
|
|
86
|
+
|
|
87
|
+
def row(self):
|
|
88
|
+
return [self.formatted_cell()] + self.values_list()
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from edc_constants.constants import FEMALE, MALE
|
|
3
|
+
|
|
4
|
+
from .row_statistics import RowStatistics
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class RowStatisticsError(Exception):
|
|
8
|
+
pass
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class RowStatisticsFemale(RowStatistics):
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
df_numerator: pd.DataFrame = None,
|
|
15
|
+
df_denominator: pd.DataFrame = None,
|
|
16
|
+
**kwargs,
|
|
17
|
+
):
|
|
18
|
+
df_numerator = df_numerator.loc[df_numerator["gender"] == FEMALE]
|
|
19
|
+
super().__init__(
|
|
20
|
+
df_numerator=df_numerator,
|
|
21
|
+
df_denominator=df_denominator,
|
|
22
|
+
**kwargs,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class RowStatisticsMale(RowStatistics):
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
df_numerator: pd.DataFrame = None,
|
|
30
|
+
df_denominator: pd.DataFrame = None,
|
|
31
|
+
**kwargs,
|
|
32
|
+
):
|
|
33
|
+
df_numerator = df_numerator.loc[df_numerator["gender"] == MALE]
|
|
34
|
+
super().__init__(
|
|
35
|
+
df_numerator=df_numerator,
|
|
36
|
+
df_denominator=df_denominator,
|
|
37
|
+
**kwargs,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class RowStatisticsWithGender(RowStatistics):
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
columns: dict[str, tuple[str, int]] = None,
|
|
45
|
+
df_all: pd.DataFrame = None,
|
|
46
|
+
coltotal: float | int | None = None,
|
|
47
|
+
**kwargs,
|
|
48
|
+
):
|
|
49
|
+
"""
|
|
50
|
+
custom row for displaying with gender columns: F, M, All
|
|
51
|
+
:param colname:
|
|
52
|
+
:param df_numerator:
|
|
53
|
+
:param df_denominator:
|
|
54
|
+
:param df_all:
|
|
55
|
+
:param columns: dict of {col: (style name, places)} where col
|
|
56
|
+
is "F", "M" or "All"
|
|
57
|
+
|
|
58
|
+
Note: the default df["gender"] is "M" or "F".
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
female_style, female_places = columns[FEMALE]
|
|
62
|
+
male_style, male_places = columns[MALE]
|
|
63
|
+
all_style, all_places = columns["All"]
|
|
64
|
+
|
|
65
|
+
super().__init__(
|
|
66
|
+
places=all_places,
|
|
67
|
+
style=all_style,
|
|
68
|
+
df_all=df_all,
|
|
69
|
+
coltotal=coltotal,
|
|
70
|
+
**kwargs,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
self.m = RowStatisticsMale(
|
|
74
|
+
places=male_places,
|
|
75
|
+
style=male_style,
|
|
76
|
+
coltotal=len(df_all[df_all["gender"] == MALE]),
|
|
77
|
+
df_all=df_all,
|
|
78
|
+
**kwargs,
|
|
79
|
+
)
|
|
80
|
+
self.f = RowStatisticsFemale(
|
|
81
|
+
places=female_places,
|
|
82
|
+
style=female_style,
|
|
83
|
+
coltotal=len(df_all[df_all["gender"] == FEMALE]),
|
|
84
|
+
df_all=df_all,
|
|
85
|
+
**kwargs,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
def values_list(self, style: str | None = None, places: int | None = None) -> list:
|
|
89
|
+
values_list = super().values_list()
|
|
90
|
+
return (
|
|
91
|
+
list(self.formatted_cells().values())
|
|
92
|
+
+ self.f.values_list()
|
|
93
|
+
+ self.m.values_list()
|
|
94
|
+
+ values_list
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
def labels(self) -> list[str]:
|
|
98
|
+
labels = super().labels()
|
|
99
|
+
return (
|
|
100
|
+
list(self.formatted_cells().keys())
|
|
101
|
+
+ [f"f{x}" for x in self.f.labels()]
|
|
102
|
+
+ [f"m{x}" for x in self.m.labels()]
|
|
103
|
+
+ labels
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
def row(self):
|
|
107
|
+
return [self.formatted_cells()] + self.values_list()
|
|
108
|
+
|
|
109
|
+
def formatted_cells(self) -> dict:
|
|
110
|
+
formatted_cell = super().formatted_cell()
|
|
111
|
+
return dict(
|
|
112
|
+
F=self.f.formatted_cell(),
|
|
113
|
+
M=self.m.formatted_cell(),
|
|
114
|
+
All=formatted_cell,
|
|
115
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .get_stata_labels_from_model import get_stata_labels_from_model
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from bs4 import BeautifulSoup
|
|
5
|
+
from django.apps import apps as django_apps
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def strip_html(text: str) -> str:
|
|
9
|
+
if pd.isna(text):
|
|
10
|
+
return text
|
|
11
|
+
if bool(re.search(r"<[^>]+>", text)):
|
|
12
|
+
return BeautifulSoup(text, "html.parser").get_text()
|
|
13
|
+
return text
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# def get_stata_labels_from_model(df: pd.DataFrame, model: str, suffix: str) -> dict[str:str]:
|
|
17
|
+
# """Generate STATA labels"""
|
|
18
|
+
# labels = {}
|
|
19
|
+
# _, model_name = model.split(".")
|
|
20
|
+
# model_cls = django_apps.get_model(model)
|
|
21
|
+
# for fld in model_cls._meta.get_fields():
|
|
22
|
+
# if f"{fld.name}_{suffix}" in df.columns:
|
|
23
|
+
# labels.update({f"{fld.name}_{suffix}": strip_html(str(fld.verbose_name)[:80])})
|
|
24
|
+
# return labels
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_stata_labels_from_model(
|
|
28
|
+
df: pd.DataFrame, model: str, suffix: str | None = None
|
|
29
|
+
) -> dict[str:str]:
|
|
30
|
+
"""Generate STATA labels"""
|
|
31
|
+
labels = {}
|
|
32
|
+
_, model_name = model.split(".")
|
|
33
|
+
model_cls = django_apps.get_model(model)
|
|
34
|
+
for fld in model_cls._meta.get_fields():
|
|
35
|
+
if suffix:
|
|
36
|
+
if f"{fld.name}_{suffix}" in df.columns:
|
|
37
|
+
labels.update({f"{fld.name}_{suffix}": strip_html(str(fld.verbose_name)[:80])})
|
|
38
|
+
else:
|
|
39
|
+
if f"{fld.name}_{suffix}" in df.columns:
|
|
40
|
+
try:
|
|
41
|
+
labels.update({fld.name: strip_html(str(fld.verbose_name)[:80])})
|
|
42
|
+
except AttributeError:
|
|
43
|
+
pass
|
|
44
|
+
return labels
|
edc_analytics/styler.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from .constants import (
|
|
6
|
+
MEAN_95CI,
|
|
7
|
+
MEAN_RANGE,
|
|
8
|
+
MEAN_SD,
|
|
9
|
+
MEDIAN_IQR,
|
|
10
|
+
MEDIAN_RANGE,
|
|
11
|
+
N_MEAN,
|
|
12
|
+
N_ONLY,
|
|
13
|
+
N_WITH_COL_PROP,
|
|
14
|
+
N_WITH_ROW_PROP,
|
|
15
|
+
STATISTICS,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from .row import RowStatistics
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class StylerError(Exception):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class Styler:
|
|
27
|
+
"""A class to format statistics per the format label given."""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
style: str = None,
|
|
32
|
+
statistics: RowStatistics = None,
|
|
33
|
+
places: int | None = None,
|
|
34
|
+
):
|
|
35
|
+
self.style = style
|
|
36
|
+
self.row = statistics
|
|
37
|
+
self.places = places if places is not None else 2
|
|
38
|
+
if style not in STATISTICS:
|
|
39
|
+
raise StylerError(f"Unknown style. Got `{style}`.")
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def value(self):
|
|
43
|
+
"""Make sure values are numerics first!
|
|
44
|
+
|
|
45
|
+
For example, when preparing the dataframe convert values to
|
|
46
|
+
numerics:
|
|
47
|
+
df[cols] = df[cols].apply(pd.to_numeric)
|
|
48
|
+
"""
|
|
49
|
+
col_value = "no style"
|
|
50
|
+
if self.style == N_WITH_ROW_PROP:
|
|
51
|
+
col_value = (
|
|
52
|
+
f"{self.row.count} ("
|
|
53
|
+
f"{round(self.row.rowprop * 100, self.places):.{self.places}f}%)"
|
|
54
|
+
)
|
|
55
|
+
elif self.style == N_ONLY:
|
|
56
|
+
col_value = f"{self.row.count}"
|
|
57
|
+
elif self.style == N_WITH_COL_PROP:
|
|
58
|
+
col_value = (
|
|
59
|
+
f"{self.row.count:.{self.places}f} "
|
|
60
|
+
f"({round(self.row.colprop * 100, self.places):.{self.places}f}%)"
|
|
61
|
+
)
|
|
62
|
+
elif self.style == N_MEAN:
|
|
63
|
+
col_value = f"{round(self.row.mean, self.places):.{self.places}f}"
|
|
64
|
+
elif self.style == MEDIAN_IQR:
|
|
65
|
+
col_value = (
|
|
66
|
+
f"{round(self.row.q50, self.places):.{self.places}f} "
|
|
67
|
+
f"({round(self.row.q25, self.places)},"
|
|
68
|
+
f"{round(self.row.q75, self.places):.{self.places}f})"
|
|
69
|
+
)
|
|
70
|
+
elif self.style == MEDIAN_RANGE:
|
|
71
|
+
col_value = (
|
|
72
|
+
f"{round(self.row.q50, self.places):.{self.places}f} "
|
|
73
|
+
f"({round(self.row.min, self.places):.{self.places}f}, "
|
|
74
|
+
f"{round(self.row.max, self.places):.{self.places}f})"
|
|
75
|
+
)
|
|
76
|
+
elif self.style == MEAN_RANGE:
|
|
77
|
+
col_value = (
|
|
78
|
+
f"{round(self.row.mean, self.places):.{self.places}f} "
|
|
79
|
+
f"({round(self.row.min, self.places):.{self.places}f}, "
|
|
80
|
+
f"{round(self.row.max, self.places):.{self.places}f})"
|
|
81
|
+
)
|
|
82
|
+
elif self.style == MEAN_SD:
|
|
83
|
+
col_value = (
|
|
84
|
+
f"{round(self.row.mean, self.places):.{self.places}f} "
|
|
85
|
+
f"({round(self.row.sd, self.places):.{self.places}f})"
|
|
86
|
+
)
|
|
87
|
+
elif self.style == MEAN_95CI:
|
|
88
|
+
col_value = (
|
|
89
|
+
f"{round(self.row.mean, self.places):.{self.places}f} "
|
|
90
|
+
f"({round(self.row.ci95l, self.places):.{self.places}f}, "
|
|
91
|
+
f"{round(self.row.ci95h, self.places):.{self.places}f})"
|
|
92
|
+
)
|
|
93
|
+
return col_value
|
edc_analytics/table.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from edc_constants.constants import FEMALE, MALE
|
|
3
|
+
|
|
4
|
+
from .constants import COUNT_COLUMN, N_ONLY, N_WITH_ROW_PROP, TITLE_COLUMN
|
|
5
|
+
from .row import RowDefinition, RowDefinitions, RowStatisticsWithGender
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Table:
|
|
9
|
+
|
|
10
|
+
title_column = "Characteristics"
|
|
11
|
+
label_column = "Statistic"
|
|
12
|
+
default_sublabel = "n"
|
|
13
|
+
gender_column = "gender"
|
|
14
|
+
row_statistics_cls: RowStatisticsWithGender = RowStatisticsWithGender
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
colname: str | None = None,
|
|
19
|
+
main_df: pd.DataFrame = None,
|
|
20
|
+
title: str | None = None,
|
|
21
|
+
include_zero_counts: bool | None = None,
|
|
22
|
+
):
|
|
23
|
+
|
|
24
|
+
self.colname = colname
|
|
25
|
+
self.main_df = main_df
|
|
26
|
+
self.title = title
|
|
27
|
+
self.include_zero_counts = include_zero_counts
|
|
28
|
+
self.table_df: pd.DataFrame = pd.DataFrame()
|
|
29
|
+
|
|
30
|
+
self.build_table_df()
|
|
31
|
+
if self.title:
|
|
32
|
+
# add a redundant column to hold title name for each
|
|
33
|
+
# row in this table.
|
|
34
|
+
self.table_df[TITLE_COLUMN] = self.title
|
|
35
|
+
|
|
36
|
+
@property
|
|
37
|
+
def row_definitions(self) -> RowDefinitions:
|
|
38
|
+
"""Override with your RowDefs
|
|
39
|
+
|
|
40
|
+
The default adds a first row with gender breakdown.
|
|
41
|
+
"""
|
|
42
|
+
row_defs = RowDefinitions(
|
|
43
|
+
colname=self.colname, row_statistics_cls=self.row_statistics_cls
|
|
44
|
+
)
|
|
45
|
+
row_defs.add(
|
|
46
|
+
RowDefinition(
|
|
47
|
+
title=self.title,
|
|
48
|
+
label=self.default_sublabel,
|
|
49
|
+
colname=None,
|
|
50
|
+
condition=(self.main_df[self.gender_column].notna()),
|
|
51
|
+
columns={
|
|
52
|
+
FEMALE: (N_WITH_ROW_PROP, 2),
|
|
53
|
+
MALE: (N_WITH_ROW_PROP, 2),
|
|
54
|
+
"All": (N_ONLY, 2),
|
|
55
|
+
},
|
|
56
|
+
drop=False,
|
|
57
|
+
)
|
|
58
|
+
)
|
|
59
|
+
return row_defs
|
|
60
|
+
|
|
61
|
+
def reorder_df(self):
|
|
62
|
+
"""Override to reorder the rows in `table_df`."""
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
def build_table_df(self) -> None:
|
|
66
|
+
"""Build the table_df using the row definitions."""
|
|
67
|
+
df_denominator = self.main_df.copy()
|
|
68
|
+
rows = []
|
|
69
|
+
for index, rd in enumerate(self.row_definitions.definitions):
|
|
70
|
+
if not rd.condition.empty:
|
|
71
|
+
df_numerator = df_denominator.loc[rd.condition]
|
|
72
|
+
else:
|
|
73
|
+
# default to first col non-null values
|
|
74
|
+
df_numerator = df_denominator.loc[
|
|
75
|
+
df_denominator[df_denominator.columns[0]].notna()
|
|
76
|
+
]
|
|
77
|
+
row_stats = self.row_statistics_cls(
|
|
78
|
+
colname=rd.colname,
|
|
79
|
+
df_numerator=df_numerator,
|
|
80
|
+
df_denominator=df_denominator,
|
|
81
|
+
df_all=self.main_df,
|
|
82
|
+
columns=rd.columns,
|
|
83
|
+
)
|
|
84
|
+
if index == 0:
|
|
85
|
+
columns = (
|
|
86
|
+
[self.title_column, self.label_column]
|
|
87
|
+
+ row_stats.labels()
|
|
88
|
+
+ [TITLE_COLUMN]
|
|
89
|
+
)
|
|
90
|
+
# reset table_df
|
|
91
|
+
self.table_df = pd.DataFrame(columns=columns)
|
|
92
|
+
rows.append([rd.title, rd.label] + row_stats.values_list() + [self.title])
|
|
93
|
+
if rd.drop and not df_numerator.empty:
|
|
94
|
+
df_denominator.drop(df_numerator.index, inplace=True)
|
|
95
|
+
if self.row_definitions.reverse_rows:
|
|
96
|
+
rows.reverse()
|
|
97
|
+
for index, values_list in enumerate(rows):
|
|
98
|
+
self.table_df.loc[index] = values_list
|
|
99
|
+
if not self.include_zero_counts:
|
|
100
|
+
self.table_df.drop(
|
|
101
|
+
self.table_df[self.table_df[COUNT_COLUMN] == 0].index, inplace=True
|
|
102
|
+
)
|
|
103
|
+
self.reorder_df()
|
|
104
|
+
|
|
105
|
+
@property
|
|
106
|
+
def formatted_df(self) -> pd.DataFrame:
|
|
107
|
+
"""Return DF with first 5 columns"""
|
|
108
|
+
return self.table_df.iloc[:, :5]
|
edc_analytics/urls.py
ADDED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import django_audit_fields.fields.hostname_modification_field
|
|
4
4
|
import django_audit_fields.fields.userfield
|
|
5
|
-
import
|
|
5
|
+
import django.utils.timezone
|
|
6
6
|
from django.db import migrations, models
|
|
7
7
|
import django.utils.timezone
|
|
8
8
|
|
|
@@ -6,7 +6,7 @@ import django.db.models.deletion
|
|
|
6
6
|
import django_audit_fields.fields.hostname_modification_field
|
|
7
7
|
import django_audit_fields.fields.userfield
|
|
8
8
|
import django_audit_fields.fields.uuid_auto_field
|
|
9
|
-
import
|
|
9
|
+
import django.utils.timezone
|
|
10
10
|
import django_revision.revision_field
|
|
11
11
|
from django.conf import settings
|
|
12
12
|
from django.db import migrations, models
|
|
@@ -4,7 +4,7 @@ import _socket
|
|
|
4
4
|
import django_audit_fields.fields.hostname_modification_field
|
|
5
5
|
import django_audit_fields.fields.userfield
|
|
6
6
|
import django_audit_fields.fields.uuid_auto_field
|
|
7
|
-
import
|
|
7
|
+
import django.utils.timezone
|
|
8
8
|
import django_revision.revision_field
|
|
9
9
|
from django.db import migrations, models
|
|
10
10
|
import django.utils.timezone
|
|
@@ -4,7 +4,7 @@ import _socket
|
|
|
4
4
|
import django_audit_fields.fields.hostname_modification_field
|
|
5
5
|
import django_audit_fields.fields.userfield
|
|
6
6
|
import django_audit_fields.fields.uuid_auto_field
|
|
7
|
-
import
|
|
7
|
+
import django.utils.timezone
|
|
8
8
|
import django_revision.revision_field
|
|
9
9
|
from django.db import migrations, models
|
|
10
10
|
import django.utils.timezone
|
|
@@ -4,7 +4,7 @@ import _socket
|
|
|
4
4
|
import django_audit_fields.fields.hostname_modification_field
|
|
5
5
|
import django_audit_fields.fields.userfield
|
|
6
6
|
import django_audit_fields.fields.uuid_auto_field
|
|
7
|
-
import
|
|
7
|
+
import django.utils.timezone
|
|
8
8
|
import django_revision.revision_field
|
|
9
9
|
from django.db import migrations, models
|
|
10
10
|
import django.utils.timezone
|
|
@@ -4,7 +4,7 @@ import _socket
|
|
|
4
4
|
import django_audit_fields.fields.hostname_modification_field
|
|
5
5
|
import django_audit_fields.fields.userfield
|
|
6
6
|
import django_audit_fields.fields.uuid_auto_field
|
|
7
|
-
import
|
|
7
|
+
import django.utils.timezone
|
|
8
8
|
import django_revision.revision_field
|
|
9
9
|
from django.db import migrations, models
|
|
10
10
|
import django.utils.timezone
|
|
@@ -4,7 +4,7 @@ import _socket
|
|
|
4
4
|
import django_audit_fields.fields.hostname_modification_field
|
|
5
5
|
import django_audit_fields.fields.userfield
|
|
6
6
|
import django_audit_fields.fields.uuid_auto_field
|
|
7
|
-
import
|
|
7
|
+
import django.utils.timezone
|
|
8
8
|
import django_revision.revision_field
|
|
9
9
|
from django.db import migrations, models
|
|
10
10
|
import django.utils.timezone
|
|
@@ -10,7 +10,7 @@ import django.db.models.manager
|
|
|
10
10
|
import django_audit_fields.fields.hostname_modification_field
|
|
11
11
|
import django_audit_fields.fields.userfield
|
|
12
12
|
import django_audit_fields.fields.uuid_auto_field
|
|
13
|
-
import
|
|
13
|
+
import django.utils.timezone
|
|
14
14
|
import django_revision.revision_field
|
|
15
15
|
import simple_history.models
|
|
16
16
|
from django.conf import settings
|
|
@@ -4,7 +4,7 @@ import _socket
|
|
|
4
4
|
import django_audit_fields.fields.hostname_modification_field
|
|
5
5
|
import django_audit_fields.fields.userfield
|
|
6
6
|
import django_audit_fields.fields.uuid_auto_field
|
|
7
|
-
import
|
|
7
|
+
import django.utils.timezone
|
|
8
8
|
import django_revision.revision_field
|
|
9
9
|
from django.db import migrations, models
|
|
10
10
|
import django.utils.timezone
|