clinicedc 2.0.11__py3-none-any.whl → 2.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of clinicedc might be problematic. Click here for more details.

Files changed (137) hide show
  1. {clinicedc-2.0.11.dist-info → clinicedc-2.0.13.dist-info}/METADATA +2 -1
  2. {clinicedc-2.0.11.dist-info → clinicedc-2.0.13.dist-info}/RECORD +137 -24
  3. edc_action_item/migrations/0017_auto_20190305_0123.py +1 -1
  4. edc_action_item/migrations/0030_edcpermissions.py +1 -1
  5. edc_action_item/migrations/0041_alter_actionitem_revision_alter_actiontype_revision_and_more.py +86 -0
  6. edc_adverse_event/migrations/0001_initial.py +1 -1
  7. edc_adverse_event/migrations/0002_auto_20190802_0059.py +1 -1
  8. edc_adverse_event/migrations/0008_auto_20220825_0451.py +1 -1
  9. edc_adverse_event/migrations/0009_auto_20220907_0157.py +1 -1
  10. edc_adverse_event/migrations/0017_alter_aeactionclassification_revision_and_more.py +77 -0
  11. edc_adverse_event/model_mixins/hospitaization/hospitalization_model_mixin.py +1 -3
  12. edc_analytics/__init__.py +3 -0
  13. edc_analytics/apps.py +8 -0
  14. edc_analytics/constants.py +26 -0
  15. edc_analytics/custom_tables/__init__.py +11 -0
  16. edc_analytics/custom_tables/age.py +72 -0
  17. edc_analytics/custom_tables/art.py +88 -0
  18. edc_analytics/custom_tables/bmi.py +125 -0
  19. edc_analytics/custom_tables/bp.py +103 -0
  20. edc_analytics/custom_tables/fasting.py +126 -0
  21. edc_analytics/custom_tables/fbg.py +98 -0
  22. edc_analytics/custom_tables/fbg_ogtt.py +384 -0
  23. edc_analytics/custom_tables/gender.py +12 -0
  24. edc_analytics/custom_tables/hba1c.py +87 -0
  25. edc_analytics/custom_tables/ogtt.py +95 -0
  26. edc_analytics/custom_tables/waist.py +105 -0
  27. edc_analytics/data.py +36 -0
  28. edc_analytics/row/__init__.py +4 -0
  29. edc_analytics/row/row_definition.py +43 -0
  30. edc_analytics/row/row_definitions.py +32 -0
  31. edc_analytics/row/row_statistics.py +88 -0
  32. edc_analytics/row/row_statistics_with_gender.py +115 -0
  33. edc_analytics/stata/__init__.py +1 -0
  34. edc_analytics/stata/get_stata_labels_from_model.py +44 -0
  35. edc_analytics/styler.py +93 -0
  36. edc_analytics/table.py +108 -0
  37. edc_analytics/urls.py +6 -0
  38. edc_appointment/migrations/0018_auto_20190305_0123.py +1 -1
  39. edc_appointment/migrations/0051_alter_appointment_revision_and_more.py +38 -0
  40. edc_auth/migrations/0001_squashed_0033_alter_userprofile_is_multisite_viewer.py +1 -1
  41. edc_auth/migrations/0012_auto_20191026_0034.py +1 -1
  42. edc_auth/migrations/0013_auto_20191026_0055.py +1 -1
  43. edc_auth/migrations/0025_permissions.py +1 -1
  44. edc_auth/migrations/0037_alter_edcpermissions_revision_alter_role_revision.py +38 -0
  45. edc_consent/migrations/0001_initial.py +1 -1
  46. edc_consent/migrations/0007_alter_edcpermissions_revision.py +26 -0
  47. edc_crf/migrations/0010_alter_crfstatus_revision.py +26 -0
  48. edc_dashboard/migrations/0001_initial.py +1 -1
  49. edc_dashboard/migrations/0007_alter_edcpermissions_revision.py +26 -0
  50. edc_data_manager/migrations/0001_initial.py +1 -1
  51. edc_data_manager/migrations/0025_edcpermissions.py +1 -1
  52. edc_data_manager/migrations/0042_alter_datadictionary_revision_and_more.py +98 -0
  53. edc_dx/__init__.py +6 -0
  54. edc_dx/apps.py +5 -0
  55. edc_dx/diagnoses.py +250 -0
  56. edc_dx/form_validators/__init__.py +2 -0
  57. edc_dx/form_validators/diagnosis_form_validator_mixin.py +54 -0
  58. edc_dx/form_validators/result_form_validator_mixin.py +65 -0
  59. edc_dx/utils.py +42 -0
  60. edc_dx_review/__init__.py +0 -0
  61. edc_dx_review/apps.py +5 -0
  62. edc_dx_review/auth_objects.py +13 -0
  63. edc_dx_review/auths.py +12 -0
  64. edc_dx_review/choices.py +24 -0
  65. edc_dx_review/constants.py +7 -0
  66. edc_dx_review/fieldsets.py +47 -0
  67. edc_dx_review/form_mixins/__init__.py +3 -0
  68. edc_dx_review/form_mixins/clinical_review_baseline_required_form_mixin.py +25 -0
  69. edc_dx_review/form_validator_mixins/__init__.py +6 -0
  70. edc_dx_review/form_validator_mixins/clinical_review_baseline_form_validator_mixin.py +7 -0
  71. edc_dx_review/form_validator_mixins/clinical_review_followup_form_validator_mixin.py +25 -0
  72. edc_dx_review/list_data.py +19 -0
  73. edc_dx_review/medical_date.py +195 -0
  74. edc_dx_review/migrations/0001_initial.py +307 -0
  75. edc_dx_review/migrations/0002_diagnosislocations_extra_value_and_more.py +32 -0
  76. edc_dx_review/migrations/0003_alter_diagnosislocations_options_and_more.py +148 -0
  77. edc_dx_review/migrations/0004_remove_diagnosislocations_edc_dx_revi_name_a39b40_idx_and_more.py +20 -0
  78. edc_dx_review/migrations/__init__.py +0 -0
  79. edc_dx_review/model_mixins/__init__.py +20 -0
  80. edc_dx_review/model_mixins/clinical_review_baseline_model_mixin.py +25 -0
  81. edc_dx_review/model_mixins/clinical_review_followup/__init__.py +5 -0
  82. edc_dx_review/model_mixins/clinical_review_followup/clinical_review_followup_chol_model_mixin.py +54 -0
  83. edc_dx_review/model_mixins/clinical_review_followup/clinical_review_followup_dm_model_mixin.py +54 -0
  84. edc_dx_review/model_mixins/clinical_review_followup/clinical_review_followup_hiv_model_mixin.py +54 -0
  85. edc_dx_review/model_mixins/clinical_review_followup/clinical_review_followup_htn_model_mixin.py +56 -0
  86. edc_dx_review/model_mixins/clinical_review_followup/clinical_review_followup_model_mixin.py +25 -0
  87. edc_dx_review/model_mixins/dx_location_model_mixin.py +17 -0
  88. edc_dx_review/model_mixins/factory/__init__.py +4 -0
  89. edc_dx_review/model_mixins/factory/baseline_review_model_mixin_factory.py +55 -0
  90. edc_dx_review/model_mixins/factory/calculate_date.py +43 -0
  91. edc_dx_review/model_mixins/factory/dx_initial_review_model_mixin_factory.py +97 -0
  92. edc_dx_review/model_mixins/factory/followup_review_model_mixin_factory.py +39 -0
  93. edc_dx_review/model_mixins/factory/rx_initial_review_model_mixin_factory.py +69 -0
  94. edc_dx_review/model_mixins/followup_review/__init__.py +2 -0
  95. edc_dx_review/model_mixins/followup_review/followup_review_model_mixin.py +22 -0
  96. edc_dx_review/model_mixins/followup_review/hiv_followup_review_model_mixin.py +32 -0
  97. edc_dx_review/model_mixins/initial_review/__init__.py +6 -0
  98. edc_dx_review/model_mixins/initial_review/chol_initial_review_model_mixin.py +34 -0
  99. edc_dx_review/model_mixins/initial_review/hiv_initial_model_mixins.py +119 -0
  100. edc_dx_review/model_mixins/initial_review/ncd_initial_review_model_mixin.py +42 -0
  101. edc_dx_review/models.py +20 -0
  102. edc_dx_review/radio_fields.py +30 -0
  103. edc_dx_review/utils.py +220 -0
  104. edc_export/migrations/0004_auto_20190305_0123.py +1 -1
  105. edc_export/migrations/0013_edcpermissions.py +1 -1
  106. edc_export/migrations/0024_alter_datarequest_revision_and_more.py +170 -0
  107. edc_facility/migrations/0005_healthfacility_healthfacilitytypes_and_more.py +1 -1
  108. edc_facility/migrations/0018_alter_healthfacility_revision_and_more.py +38 -0
  109. edc_form_runners/migrations/0006_alter_issue_revision.py +26 -0
  110. edc_identifier/migrations/0012_alter_identifiermodel_revision.py +26 -0
  111. edc_lab/migrations/0039_alter_aliquot_revision_alter_box_revision_and_more.py +269 -0
  112. edc_lab_dashboard/migrations/0006_alter_edcpermissions_revision.py +26 -0
  113. edc_label/migrations/0008_alter_zpllabeltemplates_revision.py +26 -0
  114. edc_listboard/migrations/0008_alter_listboard_revision.py +26 -0
  115. edc_locator/migrations/0042_alter_historicalsubjectlocator_revision_and_more.py +38 -0
  116. edc_metadata/migrations/0032_alter_crfmetadata_revision_and_more.py +38 -0
  117. edc_navbar/migrations/0010_alter_edcpermissions_revision.py +26 -0
  118. edc_notification/migrations/0012_alter_notification_revision.py +26 -0
  119. edc_offstudy/migrations/0025_alter_historicalsubjectoffstudy_revision_and_more.py +41 -0
  120. edc_pharmacy/migrations/0091_alter_allocation_revision_alter_assignment_revision_and_more.py +794 -0
  121. edc_protocol_incident/migrations/0026_alter_historicalprotocoldeviationviolation_revision_and_more.py +65 -0
  122. edc_pylabels/migrations/0014_alter_labelconfiguration_revision.py +26 -0
  123. edc_qareports/migrations/0021_alter_edcpermissions_revision_alter_note_revision.py +38 -0
  124. edc_randomization/migrations/0015_alter_edcpermissions_revision_and_more.py +50 -0
  125. edc_refusal/migrations/0014_alter_historicalsubjectrefusal_revision_and_more.py +38 -0
  126. edc_registration/migrations/0034_alter_historicalregisteredsubject_revision_and_more.py +41 -0
  127. edc_reportable/migrations/0008_alter_gradingdata_revision_and_more.py +110 -0
  128. edc_review_dashboard/migrations/0007_alter_edcpermissions_revision.py +26 -0
  129. edc_screening/migrations/0006_alter_edcpermissions_revision.py +26 -0
  130. edc_sites/migrations/0011_alter_edcpermissions_revision.py +26 -0
  131. edc_subject_dashboard/migrations/0006_alter_edcpermissions_revision.py +26 -0
  132. edc_unblinding/migrations/0016_alter_historicalunblindingrequest_revision_and_more.py +65 -0
  133. edc_visit_schedule/migrations/0021_alter_historicalonschedule_revision_and_more.py +89 -0
  134. edc_visit_tracking/migrations/0011_alter_historicalsubjectvisit_revision_and_more.py +65 -0
  135. edc_vitals/model_mixins/blood_pressure_model_mixin.py +1 -0
  136. {clinicedc-2.0.11.dist-info → clinicedc-2.0.13.dist-info}/WHEEL +0 -0
  137. {clinicedc-2.0.11.dist-info → clinicedc-2.0.13.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,95 @@
1
+ import pandas as pd
2
+ from edc_constants.constants import FEMALE, MALE
3
+
4
+ from ..constants import MEAN_95CI, N_ONLY, N_WITH_COL_PROP, N_WITH_ROW_PROP
5
+ from ..row import RowDefinition, RowDefinitions
6
+ from ..table import Table
7
+
8
+
9
+ class OgttTable(Table):
10
+ def __init__(self, main_df: pd.DataFrame = None):
11
+ super().__init__(
12
+ colname="ogtt",
13
+ main_df=main_df,
14
+ title="OGTT (mmol/L) categories",
15
+ )
16
+
17
+ @property
18
+ def row_definitions(self) -> RowDefinitions:
19
+ df_tmp = self.main_df.copy()
20
+ row_defs = RowDefinitions(reverse_rows=False)
21
+ row0 = RowDefinition(
22
+ title=self.title,
23
+ label=self.default_sublabel,
24
+ condition=(df_tmp["gender"].notna()),
25
+ columns={FEMALE: (N_ONLY, 2), MALE: (N_ONLY, 2), "All": (N_ONLY, 2)},
26
+ drop=False,
27
+ )
28
+ row_defs.add(row0)
29
+ columns = {
30
+ FEMALE: (N_WITH_COL_PROP, 2),
31
+ MALE: (N_WITH_COL_PROP, 2),
32
+ "All": (N_WITH_ROW_PROP, 2),
33
+ }
34
+ row_defs.add(
35
+ RowDefinition(
36
+ colname=self.colname,
37
+ label="Not fasted",
38
+ condition=(self.main_df["fasting_ogtt_hrs"] < 8.0),
39
+ columns=columns,
40
+ drop=True,
41
+ )
42
+ )
43
+ row_defs.add(
44
+ RowDefinition(
45
+ colname=self.colname,
46
+ label="<7.8",
47
+ condition=(self.main_df[self.colname] < 7.8),
48
+ columns=columns,
49
+ drop=False,
50
+ )
51
+ )
52
+ row_defs.add(
53
+ RowDefinition(
54
+ colname=self.colname,
55
+ label="7.8-11.1",
56
+ condition=(self.main_df[self.colname] >= 7.8)
57
+ & (self.main_df[self.colname] < 11.1),
58
+ columns=columns,
59
+ drop=False,
60
+ )
61
+ )
62
+ row_defs.add(
63
+ RowDefinition(
64
+ colname=self.colname,
65
+ label="11.1 and above",
66
+ condition=(self.main_df[self.colname] >= 11.1),
67
+ columns=columns,
68
+ drop=False,
69
+ )
70
+ )
71
+ row_defs.add(
72
+ RowDefinition(
73
+ colname=self.colname,
74
+ label="not measured",
75
+ condition=(self.main_df[self.colname].isna()),
76
+ columns=columns,
77
+ drop=False,
78
+ )
79
+ )
80
+ columns = {
81
+ FEMALE: (MEAN_95CI, 2),
82
+ MALE: (MEAN_95CI, 2),
83
+ "All": (MEAN_95CI, 2),
84
+ }
85
+ row_defs.add(
86
+ RowDefinition(
87
+ colname=self.colname,
88
+ label="Mean (95% CI)",
89
+ condition=(self.main_df[self.colname].notna()),
90
+ columns=columns,
91
+ drop=False,
92
+ )
93
+ )
94
+
95
+ return row_defs
@@ -0,0 +1,105 @@
1
+ import pandas as pd
2
+ from edc_constants.constants import FEMALE, MALE
3
+
4
+ from ..constants import (
5
+ MEDIAN_IQR,
6
+ MEDIAN_RANGE,
7
+ N_ONLY,
8
+ N_WITH_COL_PROP,
9
+ N_WITH_ROW_PROP,
10
+ )
11
+ from ..row import RowDefinition, RowDefinitions
12
+ from ..table import Table
13
+
14
+
15
+ class WaistCircumferenceTable(Table):
16
+
17
+ def __init__(self, main_df: pd.DataFrame = None):
18
+ super().__init__(
19
+ colname="waist_circumference",
20
+ main_df=main_df,
21
+ title="Waist circumference (cm)",
22
+ )
23
+
24
+ @property
25
+ def row_definitions(self) -> RowDefinitions:
26
+ df_tmp = self.main_df.copy()
27
+ row_defs = RowDefinitions(reverse_rows=False)
28
+ row0 = RowDefinition(
29
+ title=self.title,
30
+ label=self.default_sublabel,
31
+ condition=(df_tmp["gender"].notna()),
32
+ columns={FEMALE: (N_ONLY, 2), MALE: (N_ONLY, 2), "All": (N_ONLY, 2)},
33
+ drop=False,
34
+ )
35
+ row_defs.add(row0)
36
+
37
+ columns = {
38
+ FEMALE: (N_WITH_COL_PROP, 2),
39
+ MALE: (N_WITH_COL_PROP, 2),
40
+ "All": (N_WITH_ROW_PROP, 2),
41
+ }
42
+
43
+ cond_lt_102 = (
44
+ (self.main_df[self.colname] < 102.0) & (self.main_df["gender"] == "Male")
45
+ ) | ((self.main_df[self.colname] < 88.0) & (self.main_df["gender"] == "Female"))
46
+ row_defs.add(
47
+ RowDefinition(
48
+ colname=self.colname,
49
+ label="Women<88 / Men<102",
50
+ condition=cond_lt_102,
51
+ columns=columns,
52
+ drop=False,
53
+ )
54
+ )
55
+ cond_gte_102 = (
56
+ (self.main_df[self.colname] >= 102.0) & (self.main_df["gender"] == "Male")
57
+ ) | ((self.main_df[self.colname] >= 88.0) & (self.main_df["gender"] == "Female"))
58
+ row_defs.add(
59
+ RowDefinition(
60
+ colname=self.colname,
61
+ label="Women>=88 / Men>=102",
62
+ condition=cond_gte_102,
63
+ columns=columns,
64
+ drop=False,
65
+ )
66
+ )
67
+ cond_gte_missing = self.main_df[self.colname].isna()
68
+ row_defs.add(
69
+ RowDefinition(
70
+ colname=self.colname,
71
+ label="not measured",
72
+ condition=cond_gte_missing,
73
+ columns=columns,
74
+ drop=False,
75
+ )
76
+ )
77
+
78
+ columns = {
79
+ FEMALE: (MEDIAN_RANGE, 2),
80
+ MALE: (MEDIAN_RANGE, 2),
81
+ "All": (MEDIAN_RANGE, 2),
82
+ }
83
+ row_defs.add(
84
+ RowDefinition(
85
+ colname=self.colname,
86
+ label="Median (range)",
87
+ condition=(self.main_df[self.colname].notna()),
88
+ columns=columns,
89
+ )
90
+ )
91
+
92
+ columns = {
93
+ FEMALE: (MEDIAN_IQR, 2),
94
+ MALE: (MEDIAN_IQR, 2),
95
+ "All": (MEDIAN_IQR, 2),
96
+ }
97
+ row_defs.add(
98
+ RowDefinition(
99
+ colname=self.colname,
100
+ label="Median (IQR)",
101
+ condition=(self.main_df[self.colname].notna()),
102
+ columns=columns,
103
+ )
104
+ )
105
+ return row_defs
edc_analytics/data.py ADDED
@@ -0,0 +1,36 @@
1
+ from datetime import datetime
2
+ from pathlib import Path
3
+
4
+ import pandas as pd
5
+
6
+
7
+ class Data:
8
+
9
+ def __init__(
10
+ self,
11
+ label: str,
12
+ table_df: pd.DataFrame,
13
+ data_df: pd.DataFrame,
14
+ filename_prefix: str,
15
+ folder: str | None = None,
16
+ ):
17
+ self.label = label
18
+ self.table_df = table_df
19
+ self.data_df = data_df
20
+ self.filename_prefix = filename_prefix
21
+ self.folder = folder or "~/"
22
+
23
+ def __repr__(self):
24
+ return f"Data({self.label}) <obs={len(self.data_df)}>"
25
+
26
+ def to_csv(
27
+ self, folder: str | None = None, filename: str | None = None, cols: int | None = None
28
+ ):
29
+ folder = folder or self.folder
30
+ cols = cols or 5
31
+ datestamp = datetime.now().strftime("%Y%m%d%H%M")
32
+ filename = filename or f"{self.filename_prefix}_table_{self.label}_{datestamp}.csv"
33
+ path = Path(folder) / filename
34
+ self.table_df.iloc[:, :cols].to_csv(
35
+ path_or_buf=path, encoding="utf-8", index=0, sep="|"
36
+ )
@@ -0,0 +1,4 @@
1
+ from .row_definition import RowDefinition
2
+ from .row_definitions import RowDefinitions
3
+ from .row_statistics import RowStatistics
4
+ from .row_statistics_with_gender import RowStatisticsWithGender
@@ -0,0 +1,43 @@
1
+ import pandas as pd
2
+
3
+ from ..constants import N_WITH_ROW_PROP, STATISTICS
4
+ from ..styler import StylerError
5
+
6
+
7
+ class RowDefinition:
8
+ def __init__(
9
+ self,
10
+ title: str | None = None,
11
+ label: str = None,
12
+ colname: str | None = None,
13
+ condition: pd.Series = None,
14
+ columns: dict[str, tuple[str, int]] = None,
15
+ drop: bool | None = None,
16
+ ):
17
+ """
18
+ A row definition is passed by Table to the RowStatistics
19
+ class.
20
+
21
+ :param title:
22
+ :param label:
23
+ :param colname:
24
+ :param condition:
25
+ :param columns: dictionary of label: (statistic, places)=
26
+ {
27
+ FEMALE: (N_WITH_ROW_PROP, 2),
28
+ MALE: (N_WITH_ROW_PROP, 2),
29
+ "All": (N_ONLY, 2),
30
+ }
31
+ :param drop: drops rows from the source dataframe once used by
32
+ the row definition.
33
+ """
34
+ self.title = title or ""
35
+ self.label = label
36
+ self.colname = colname
37
+ self.condition = condition # condition to filter DF
38
+ self.drop = False if drop is None else drop # drop index of previous row numerator
39
+ self.columns = columns or {"All": (N_WITH_ROW_PROP, 2)}
40
+ for col, style_info in self.columns.items():
41
+ style, _ = style_info
42
+ if style not in STATISTICS:
43
+ raise StylerError(f"Unknown statistic. Got `{style}` for column `{col}`.")
@@ -0,0 +1,32 @@
1
+ from typing import Iterable
2
+
3
+ from .row_definition import RowDefinition
4
+ from .row_statistics import RowStatistics
5
+ from .row_statistics_with_gender import RowStatisticsWithGender
6
+
7
+
8
+ class RowDefinitions:
9
+ """Collection of RowDefinitions"""
10
+
11
+ def __init__(
12
+ self,
13
+ colname: str = None,
14
+ row_statistics_cls: RowStatistics | RowStatisticsWithGender = None,
15
+ reverse_rows: bool = False,
16
+ ):
17
+ self.definitions: list[RowDefinition] = []
18
+ self.row_statistics_cls = row_statistics_cls
19
+ self.colname = colname
20
+ self.reverse_rows = reverse_rows
21
+
22
+ def add(self, row_definition: RowDefinition):
23
+ self.definitions.append(row_definition)
24
+
25
+ def extend(self, row_definition: list[RowDefinition]):
26
+ self.definitions.extend(row_definition)
27
+
28
+ def reverse(self):
29
+ self.definitions.reverse()
30
+
31
+ def __iter__(self) -> Iterable[RowDefinition]:
32
+ return iter(self.definitions)
@@ -0,0 +1,88 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from pandas.api.types import is_numeric_dtype
4
+
5
+ from ..constants import COUNT_COLUMN, N_ONLY
6
+ from ..styler import Styler
7
+
8
+
9
+ class RowStatistics:
10
+ """A class that calculates descriptive statistics for an
11
+ indictor.
12
+ """
13
+
14
+ def __init__(
15
+ self,
16
+ colname: str = None,
17
+ df_numerator: pd.DataFrame = None,
18
+ df_denominator: pd.DataFrame = None,
19
+ df_all: pd.DataFrame = None,
20
+ coltotal: float | int | None = None,
21
+ style: str | None = None,
22
+ places: int | None = None,
23
+ ):
24
+ self.places = 2 if places is None else places
25
+ self.style = style or N_ONLY
26
+
27
+ # counts (6 columns)
28
+ self.count = 0.0 if df_numerator.empty else len(df_numerator)
29
+ self.total = len(df_all)
30
+ self.coltotal = coltotal or len(df_denominator)
31
+ self.rowtotal = self.count # rowtotal or len(df_denominator)
32
+ self.colprop = self.count / self.coltotal if self.count else 0.0
33
+ self.rowprop = self.count / self.total if self.count else 0.0
34
+
35
+ # numeric stats (9 columns)
36
+ if colname and not df_numerator.empty and is_numeric_dtype(df_numerator[colname]):
37
+ stats = df_numerator[colname].describe()
38
+ self.mean = stats.loc["mean"]
39
+ self.sd = stats.loc["std"]
40
+ self.min = stats.loc["min"]
41
+ self.max = stats.loc["max"]
42
+ self.q25, self.q50, self.q75 = df_numerator[colname].quantile([0.25, 0.50, 0.75])
43
+ stats = df_numerator[colname].agg(["mean", "sem"])
44
+ self.ci95l = stats.loc["mean"] - 1.96 * stats.loc["sem"]
45
+ self.ci95h = stats.loc["mean"] + 1.96 * stats.loc["sem"]
46
+ else:
47
+ (
48
+ self.mean,
49
+ self.sd,
50
+ self.min,
51
+ self.max,
52
+ self.q25,
53
+ self.q50,
54
+ self.q75,
55
+ self.ci95l,
56
+ self.ci95h,
57
+ ) = [np.nan] * 9
58
+
59
+ def values_list(self) -> list:
60
+ return list(self.as_dict().values())
61
+
62
+ def labels(self) -> list:
63
+ return list(self.as_dict().keys())
64
+
65
+ def as_dict(self):
66
+ return {
67
+ COUNT_COLUMN: self.count,
68
+ "coltotal": self.coltotal,
69
+ "rowtotal": self.rowtotal,
70
+ "total": self.total,
71
+ "colprop": self.colprop,
72
+ "rowprop": self.rowprop,
73
+ "mean": self.mean,
74
+ "sd": self.sd,
75
+ "min": self.min,
76
+ "max": self.max,
77
+ "q25": self.q25,
78
+ "q50": self.q50,
79
+ "q75": self.q75,
80
+ "ci95l": self.ci95l,
81
+ "ci95h": self.ci95h,
82
+ }
83
+
84
+ def formatted_cell(self) -> str:
85
+ return Styler(style=self.style, statistics=self, places=self.places).value
86
+
87
+ def row(self):
88
+ return [self.formatted_cell()] + self.values_list()
@@ -0,0 +1,115 @@
1
+ import pandas as pd
2
+ from edc_constants.constants import FEMALE, MALE
3
+
4
+ from .row_statistics import RowStatistics
5
+
6
+
7
+ class RowStatisticsError(Exception):
8
+ pass
9
+
10
+
11
+ class RowStatisticsFemale(RowStatistics):
12
+ def __init__(
13
+ self,
14
+ df_numerator: pd.DataFrame = None,
15
+ df_denominator: pd.DataFrame = None,
16
+ **kwargs,
17
+ ):
18
+ df_numerator = df_numerator.loc[df_numerator["gender"] == FEMALE]
19
+ super().__init__(
20
+ df_numerator=df_numerator,
21
+ df_denominator=df_denominator,
22
+ **kwargs,
23
+ )
24
+
25
+
26
+ class RowStatisticsMale(RowStatistics):
27
+ def __init__(
28
+ self,
29
+ df_numerator: pd.DataFrame = None,
30
+ df_denominator: pd.DataFrame = None,
31
+ **kwargs,
32
+ ):
33
+ df_numerator = df_numerator.loc[df_numerator["gender"] == MALE]
34
+ super().__init__(
35
+ df_numerator=df_numerator,
36
+ df_denominator=df_denominator,
37
+ **kwargs,
38
+ )
39
+
40
+
41
+ class RowStatisticsWithGender(RowStatistics):
42
+ def __init__(
43
+ self,
44
+ columns: dict[str, tuple[str, int]] = None,
45
+ df_all: pd.DataFrame = None,
46
+ coltotal: float | int | None = None,
47
+ **kwargs,
48
+ ):
49
+ """
50
+ custom row for displaying with gender columns: F, M, All
51
+ :param colname:
52
+ :param df_numerator:
53
+ :param df_denominator:
54
+ :param df_all:
55
+ :param columns: dict of {col: (style name, places)} where col
56
+ is "F", "M" or "All"
57
+
58
+ Note: the default df["gender"] is "M" or "F".
59
+ """
60
+
61
+ female_style, female_places = columns[FEMALE]
62
+ male_style, male_places = columns[MALE]
63
+ all_style, all_places = columns["All"]
64
+
65
+ super().__init__(
66
+ places=all_places,
67
+ style=all_style,
68
+ df_all=df_all,
69
+ coltotal=coltotal,
70
+ **kwargs,
71
+ )
72
+
73
+ self.m = RowStatisticsMale(
74
+ places=male_places,
75
+ style=male_style,
76
+ coltotal=len(df_all[df_all["gender"] == MALE]),
77
+ df_all=df_all,
78
+ **kwargs,
79
+ )
80
+ self.f = RowStatisticsFemale(
81
+ places=female_places,
82
+ style=female_style,
83
+ coltotal=len(df_all[df_all["gender"] == FEMALE]),
84
+ df_all=df_all,
85
+ **kwargs,
86
+ )
87
+
88
+ def values_list(self, style: str | None = None, places: int | None = None) -> list:
89
+ values_list = super().values_list()
90
+ return (
91
+ list(self.formatted_cells().values())
92
+ + self.f.values_list()
93
+ + self.m.values_list()
94
+ + values_list
95
+ )
96
+
97
+ def labels(self) -> list[str]:
98
+ labels = super().labels()
99
+ return (
100
+ list(self.formatted_cells().keys())
101
+ + [f"f{x}" for x in self.f.labels()]
102
+ + [f"m{x}" for x in self.m.labels()]
103
+ + labels
104
+ )
105
+
106
+ def row(self):
107
+ return [self.formatted_cells()] + self.values_list()
108
+
109
+ def formatted_cells(self) -> dict:
110
+ formatted_cell = super().formatted_cell()
111
+ return dict(
112
+ F=self.f.formatted_cell(),
113
+ M=self.m.formatted_cell(),
114
+ All=formatted_cell,
115
+ )
@@ -0,0 +1 @@
1
+ from .get_stata_labels_from_model import get_stata_labels_from_model
@@ -0,0 +1,44 @@
1
+ import re
2
+
3
+ import pandas as pd
4
+ from bs4 import BeautifulSoup
5
+ from django.apps import apps as django_apps
6
+
7
+
8
+ def strip_html(text: str) -> str:
9
+ if pd.isna(text):
10
+ return text
11
+ if bool(re.search(r"<[^>]+>", text)):
12
+ return BeautifulSoup(text, "html.parser").get_text()
13
+ return text
14
+
15
+
16
+ # def get_stata_labels_from_model(df: pd.DataFrame, model: str, suffix: str) -> dict[str:str]:
17
+ # """Generate STATA labels"""
18
+ # labels = {}
19
+ # _, model_name = model.split(".")
20
+ # model_cls = django_apps.get_model(model)
21
+ # for fld in model_cls._meta.get_fields():
22
+ # if f"{fld.name}_{suffix}" in df.columns:
23
+ # labels.update({f"{fld.name}_{suffix}": strip_html(str(fld.verbose_name)[:80])})
24
+ # return labels
25
+
26
+
27
+ def get_stata_labels_from_model(
28
+ df: pd.DataFrame, model: str, suffix: str | None = None
29
+ ) -> dict[str:str]:
30
+ """Generate STATA labels"""
31
+ labels = {}
32
+ _, model_name = model.split(".")
33
+ model_cls = django_apps.get_model(model)
34
+ for fld in model_cls._meta.get_fields():
35
+ if suffix:
36
+ if f"{fld.name}_{suffix}" in df.columns:
37
+ labels.update({f"{fld.name}_{suffix}": strip_html(str(fld.verbose_name)[:80])})
38
+ else:
39
+ if f"{fld.name}_{suffix}" in df.columns:
40
+ try:
41
+ labels.update({fld.name: strip_html(str(fld.verbose_name)[:80])})
42
+ except AttributeError:
43
+ pass
44
+ return labels
@@ -0,0 +1,93 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ from .constants import (
6
+ MEAN_95CI,
7
+ MEAN_RANGE,
8
+ MEAN_SD,
9
+ MEDIAN_IQR,
10
+ MEDIAN_RANGE,
11
+ N_MEAN,
12
+ N_ONLY,
13
+ N_WITH_COL_PROP,
14
+ N_WITH_ROW_PROP,
15
+ STATISTICS,
16
+ )
17
+
18
+ if TYPE_CHECKING:
19
+ from .row import RowStatistics
20
+
21
+
22
+ class StylerError(Exception):
23
+ pass
24
+
25
+
26
+ class Styler:
27
+ """A class to format statistics per the format label given."""
28
+
29
+ def __init__(
30
+ self,
31
+ style: str = None,
32
+ statistics: RowStatistics = None,
33
+ places: int | None = None,
34
+ ):
35
+ self.style = style
36
+ self.row = statistics
37
+ self.places = places if places is not None else 2
38
+ if style not in STATISTICS:
39
+ raise StylerError(f"Unknown style. Got `{style}`.")
40
+
41
+ @property
42
+ def value(self):
43
+ """Make sure values are numerics first!
44
+
45
+ For example, when preparing the dataframe convert values to
46
+ numerics:
47
+ df[cols] = df[cols].apply(pd.to_numeric)
48
+ """
49
+ col_value = "no style"
50
+ if self.style == N_WITH_ROW_PROP:
51
+ col_value = (
52
+ f"{self.row.count} ("
53
+ f"{round(self.row.rowprop * 100, self.places):.{self.places}f}%)"
54
+ )
55
+ elif self.style == N_ONLY:
56
+ col_value = f"{self.row.count}"
57
+ elif self.style == N_WITH_COL_PROP:
58
+ col_value = (
59
+ f"{self.row.count:.{self.places}f} "
60
+ f"({round(self.row.colprop * 100, self.places):.{self.places}f}%)"
61
+ )
62
+ elif self.style == N_MEAN:
63
+ col_value = f"{round(self.row.mean, self.places):.{self.places}f}"
64
+ elif self.style == MEDIAN_IQR:
65
+ col_value = (
66
+ f"{round(self.row.q50, self.places):.{self.places}f} "
67
+ f"({round(self.row.q25, self.places)},"
68
+ f"{round(self.row.q75, self.places):.{self.places}f})"
69
+ )
70
+ elif self.style == MEDIAN_RANGE:
71
+ col_value = (
72
+ f"{round(self.row.q50, self.places):.{self.places}f} "
73
+ f"({round(self.row.min, self.places):.{self.places}f}, "
74
+ f"{round(self.row.max, self.places):.{self.places}f})"
75
+ )
76
+ elif self.style == MEAN_RANGE:
77
+ col_value = (
78
+ f"{round(self.row.mean, self.places):.{self.places}f} "
79
+ f"({round(self.row.min, self.places):.{self.places}f}, "
80
+ f"{round(self.row.max, self.places):.{self.places}f})"
81
+ )
82
+ elif self.style == MEAN_SD:
83
+ col_value = (
84
+ f"{round(self.row.mean, self.places):.{self.places}f} "
85
+ f"({round(self.row.sd, self.places):.{self.places}f})"
86
+ )
87
+ elif self.style == MEAN_95CI:
88
+ col_value = (
89
+ f"{round(self.row.mean, self.places):.{self.places}f} "
90
+ f"({round(self.row.ci95l, self.places):.{self.places}f}, "
91
+ f"{round(self.row.ci95h, self.places):.{self.places}f})"
92
+ )
93
+ return col_value