OpenREM 1.0.0b2__py3-none-any.whl → 1.0.0b3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. openrem/locale/de/LC_MESSAGES/django.po +1060 -1059
  2. openrem/locale/django.pot +973 -972
  3. openrem/locale/es_MX/LC_MESSAGES/django.po +1049 -1048
  4. openrem/locale/it/LC_MESSAGES/django.po +1044 -1043
  5. openrem/locale/lt/LC_MESSAGES/django.po +989 -988
  6. openrem/locale/nb_NO/LC_MESSAGES/django.po +985 -984
  7. openrem/locale/pt_BR/LC_MESSAGES/django.po +1003 -1002
  8. openrem/manage.py +10 -10
  9. openrem/openremproject/__init__.py +1 -1
  10. openrem/openremproject/local_settings.py.linux +128 -128
  11. openrem/openremproject/local_settings.py.windows +144 -144
  12. openrem/openremproject/local_settings.py.windows-sqlite3 +129 -129
  13. openrem/openremproject/settings.py +278 -278
  14. openrem/openremproject/urls.py +32 -32
  15. openrem/openremproject/wsgi.py.example +28 -28
  16. openrem/remapp/__init__.py +2 -2
  17. openrem/remapp/admin.py +31 -31
  18. openrem/remapp/exports/ct_export.py +780 -753
  19. openrem/remapp/exports/dx_export.py +817 -805
  20. openrem/remapp/exports/export_common.py +931 -951
  21. openrem/remapp/exports/export_common_pandas.py +2422 -0
  22. openrem/remapp/exports/exportviews.py +815 -860
  23. openrem/remapp/exports/mg_csv_nhsbsp.py +292 -292
  24. openrem/remapp/exports/mg_export.py +673 -510
  25. openrem/remapp/exports/nm_export.py +796 -575
  26. openrem/remapp/exports/rf_export.py +1418 -1431
  27. openrem/remapp/extractors/ct_philips.py +424 -414
  28. openrem/remapp/extractors/ct_toshiba.py +2116 -2108
  29. openrem/remapp/extractors/dx.py +1033 -952
  30. openrem/remapp/extractors/extract_common.py +817 -817
  31. openrem/remapp/extractors/import_views.py +426 -426
  32. openrem/remapp/extractors/mam.py +685 -672
  33. openrem/remapp/extractors/nm_image.py +439 -431
  34. openrem/remapp/extractors/ptsizecsv2db.py +368 -368
  35. openrem/remapp/extractors/rdsr.py +667 -654
  36. openrem/remapp/extractors/rdsr_methods.py +1771 -1768
  37. openrem/remapp/extractors/rrdsr_methods.py +630 -622
  38. openrem/remapp/fixtures/openskin_safelist.json +11 -11
  39. openrem/remapp/forms.py +2286 -2277
  40. openrem/remapp/interface/chart_functions.py +2412 -2393
  41. openrem/remapp/interface/mod_filters.py +1241 -1243
  42. openrem/remapp/migrations/0001_initial.py.1-0-upgrade +1043 -1043
  43. openrem/remapp/models.py +3418 -3407
  44. openrem/remapp/netdicom/dicomviews.py +681 -683
  45. openrem/remapp/netdicom/qrscu.py +2646 -2646
  46. openrem/remapp/netdicom/tools.py +134 -134
  47. openrem/remapp/static/css/bootstrap-theme.css +587 -587
  48. openrem/remapp/static/css/bootstrap-theme.min.css +4 -4
  49. openrem/remapp/static/css/bootstrap.css +6800 -6800
  50. openrem/remapp/static/css/bootstrap.min.css +4 -4
  51. openrem/remapp/static/css/datepicker3.css +790 -790
  52. openrem/remapp/static/css/jquery.qtip.min.css +2 -2
  53. openrem/remapp/static/css/openrem-extra.css +442 -442
  54. openrem/remapp/static/css/openrem.css +96 -96
  55. openrem/remapp/static/css/registration.css +34 -34
  56. openrem/remapp/static/fonts/glyphicons-halflings-regular.svg +287 -287
  57. openrem/remapp/static/js/bootstrap-datepicker.js +1671 -1671
  58. openrem/remapp/static/js/bootstrap.js +2363 -2363
  59. openrem/remapp/static/js/bootstrap.min.js +6 -6
  60. openrem/remapp/static/js/charts/chartCommonFunctions.js +75 -75
  61. openrem/remapp/static/js/charts/chartFullScreen.js +41 -41
  62. openrem/remapp/static/js/charts/ctChartAjax.js +331 -331
  63. openrem/remapp/static/js/charts/dxChartAjax.js +290 -290
  64. openrem/remapp/static/js/charts/mgChartAjax.js +144 -144
  65. openrem/remapp/static/js/charts/nmChartAjax.js +64 -64
  66. openrem/remapp/static/js/charts/plotly-2.35.2.min.js +8 -0
  67. openrem/remapp/static/js/charts/rfChartAjax.js +128 -128
  68. openrem/remapp/static/js/chroma.min.js +32 -32
  69. openrem/remapp/static/js/datepicker.js +5 -5
  70. openrem/remapp/static/js/dicom.js +115 -115
  71. openrem/remapp/static/js/django_reverse/reverse.js +13 -13
  72. openrem/remapp/static/js/formatDate.js +7 -7
  73. openrem/remapp/static/js/html5shiv.min.js +8 -8
  74. openrem/remapp/static/js/jquery-1.11.0.min.js +4 -4
  75. openrem/remapp/static/js/npm.js +12 -12
  76. openrem/remapp/static/js/respond.min.js +4 -4
  77. openrem/remapp/static/js/skin-dose-maps/jquery.qtip.min.js +4 -4
  78. openrem/remapp/static/js/skin-dose-maps/rfSkinDoseMap3dHUDObject.js +112 -112
  79. openrem/remapp/static/js/skin-dose-maps/rfSkinDoseMap3dObject.js +367 -367
  80. openrem/remapp/static/js/skin-dose-maps/rfSkinDoseMap3dPersonObject.js +158 -158
  81. openrem/remapp/static/js/skin-dose-maps/rfSkinDoseMapColourScaleObject.js +153 -153
  82. openrem/remapp/static/js/skin-dose-maps/rfSkinDoseMapObject.js +367 -367
  83. openrem/remapp/static/js/skin-dose-maps/rfSkinDoseMapping.js +584 -584
  84. openrem/remapp/static/js/skin-dose-maps/rfSkinDoseMapping3d.js +255 -255
  85. openrem/remapp/static/js/skin-dose-maps/rfSkinDoseMappingAjax.js +267 -212
  86. openrem/remapp/static/js/skin-dose-maps/three.min.js +835 -835
  87. openrem/remapp/static/js/sorttable.js +495 -495
  88. openrem/remapp/templates/base.html +253 -253
  89. openrem/remapp/templates/registration/changepassword.html +25 -25
  90. openrem/remapp/templates/registration/changepassworddone.html +12 -12
  91. openrem/remapp/templates/registration/login.html +42 -42
  92. openrem/remapp/templates/remapp/backgroundtaskmaximumrows_form.html +29 -29
  93. openrem/remapp/templates/remapp/base.html +1 -1
  94. openrem/remapp/templates/remapp/ctdetail.html +235 -235
  95. openrem/remapp/templates/remapp/ctfiltered.html +310 -310
  96. openrem/remapp/templates/remapp/dicomdeletesettings_form.html +31 -31
  97. openrem/remapp/templates/remapp/dicomqr.html +147 -147
  98. openrem/remapp/templates/remapp/dicomquerydetails.html +83 -83
  99. openrem/remapp/templates/remapp/dicomqueryimages.html +49 -49
  100. openrem/remapp/templates/remapp/dicomqueryseries.html +109 -109
  101. openrem/remapp/templates/remapp/dicomquerysummary.html +48 -48
  102. openrem/remapp/templates/remapp/dicomremoteqr_confirm_delete.html +60 -60
  103. openrem/remapp/templates/remapp/dicomremoteqr_form.html +32 -32
  104. openrem/remapp/templates/remapp/dicomstorescp_confirm_delete.html +53 -53
  105. openrem/remapp/templates/remapp/dicomstorescp_form.html +48 -48
  106. openrem/remapp/templates/remapp/dicomsummary.html +257 -257
  107. openrem/remapp/templates/remapp/displaychartoptions.html +184 -184
  108. openrem/remapp/templates/remapp/displayhomepageoptions.html +57 -57
  109. openrem/remapp/templates/remapp/displayname-count.html +6 -6
  110. openrem/remapp/templates/remapp/displayname-last-date.html +3 -3
  111. openrem/remapp/templates/remapp/displayname-modality.html +86 -105
  112. openrem/remapp/templates/remapp/displayname-skinmap.html +18 -18
  113. openrem/remapp/templates/remapp/displaynameupdate.html +100 -100
  114. openrem/remapp/templates/remapp/displaynameview.html +222 -219
  115. openrem/remapp/templates/remapp/dxdetail.html +176 -176
  116. openrem/remapp/templates/remapp/dxfiltered.html +324 -324
  117. openrem/remapp/templates/remapp/exports-active.html +25 -25
  118. openrem/remapp/templates/remapp/exports-complete.html +35 -35
  119. openrem/remapp/templates/remapp/exports-error.html +26 -26
  120. openrem/remapp/templates/remapp/exports-queue.html +18 -18
  121. openrem/remapp/templates/remapp/exports.html +191 -191
  122. openrem/remapp/templates/remapp/failed_summary_list.html +27 -27
  123. openrem/remapp/templates/remapp/filteredbase.html +162 -162
  124. openrem/remapp/templates/remapp/highdosemetricalertsettings_form.html +76 -76
  125. openrem/remapp/templates/remapp/home-list-modalities.html +94 -94
  126. openrem/remapp/templates/remapp/home.html +202 -202
  127. openrem/remapp/templates/remapp/list_filters.html +24 -24
  128. openrem/remapp/templates/remapp/mgdetail.html +160 -138
  129. openrem/remapp/templates/remapp/mgfiltered.html +311 -311
  130. openrem/remapp/templates/remapp/nmdetail.html +300 -300
  131. openrem/remapp/templates/remapp/nmfiltered.html +255 -255
  132. openrem/remapp/templates/remapp/notpatient.html +190 -190
  133. openrem/remapp/templates/remapp/notpatientindicators_form_base.html +81 -81
  134. openrem/remapp/templates/remapp/notpatientindicatorsid_confirm_delete.html +54 -54
  135. openrem/remapp/templates/remapp/notpatientindicatorsid_form.html +23 -23
  136. openrem/remapp/templates/remapp/notpatientindicatorsname_confirm_delete.html +54 -54
  137. openrem/remapp/templates/remapp/notpatientindicatorsname_form.html +23 -23
  138. openrem/remapp/templates/remapp/notpatientindicatorsname_form_base.html +85 -85
  139. openrem/remapp/templates/remapp/openskinsafelist_add.html +130 -130
  140. openrem/remapp/templates/remapp/openskinsafelist_confirm_delete.html +100 -100
  141. openrem/remapp/templates/remapp/openskinsafelist_form.html +207 -207
  142. openrem/remapp/templates/remapp/patientidsettings_form.html +83 -83
  143. openrem/remapp/templates/remapp/populate_summary_progress.html +83 -83
  144. openrem/remapp/templates/remapp/populate_summary_progress_error.html +36 -36
  145. openrem/remapp/templates/remapp/review_failed_imports.html +157 -157
  146. openrem/remapp/templates/remapp/review_failed_study.html +41 -41
  147. openrem/remapp/templates/remapp/review_studies_delete_button.html +20 -20
  148. openrem/remapp/templates/remapp/review_study.html +19 -19
  149. openrem/remapp/templates/remapp/review_summary_list.html +245 -245
  150. openrem/remapp/templates/remapp/rf_dose_alert_email_template.html +14 -1
  151. openrem/remapp/templates/remapp/rfalertnotificationsview.html +59 -59
  152. openrem/remapp/templates/remapp/rfdetail.html +547 -543
  153. openrem/remapp/templates/remapp/rfdetailbase.html +18 -18
  154. openrem/remapp/templates/remapp/rffiltered.html +404 -404
  155. openrem/remapp/templates/remapp/sizeimports.html +119 -119
  156. openrem/remapp/templates/remapp/sizeprocess.html +96 -96
  157. openrem/remapp/templates/remapp/sizeupload.html +110 -110
  158. openrem/remapp/templates/remapp/skindosemapcalcsettings_form.html +28 -28
  159. openrem/remapp/templates/remapp/standardname-modality.html +69 -69
  160. openrem/remapp/templates/remapp/standardnames_confirm_delete.html +71 -71
  161. openrem/remapp/templates/remapp/standardnames_form.html +87 -87
  162. openrem/remapp/templates/remapp/standardnamesettings_form.html +41 -41
  163. openrem/remapp/templates/remapp/standardnamesrefreshall.html +92 -92
  164. openrem/remapp/templates/remapp/standardnameview.html +103 -103
  165. openrem/remapp/templates/remapp/study_confirm_delete.html +147 -147
  166. openrem/remapp/templates/remapp/task_admin.html +265 -265
  167. openrem/remapp/templates/remapp/tasks.html +76 -76
  168. openrem/remapp/templatetags/formfilters.py +13 -13
  169. openrem/remapp/templatetags/proper_paginate.py +38 -38
  170. openrem/remapp/templatetags/remappduration.py +36 -36
  171. openrem/remapp/templatetags/sigdig.py +38 -38
  172. openrem/remapp/templatetags/sort_class_property_value.py +15 -15
  173. openrem/remapp/templatetags/update_variable.py +20 -20
  174. openrem/remapp/templatetags/url_replace.py +25 -25
  175. openrem/remapp/tests/test_charts_common.py +202 -202
  176. openrem/remapp/tests/test_charts_ct.py +7111 -7111
  177. openrem/remapp/tests/test_charts_dx.py +3513 -3513
  178. openrem/remapp/tests/test_charts_mg.py +1116 -1115
  179. openrem/remapp/tests/test_dcmdatetime.py +189 -189
  180. openrem/remapp/tests/test_dicom_qr.py +2580 -2580
  181. openrem/remapp/tests/test_display_name.py +274 -274
  182. openrem/remapp/tests/test_export_ct_xlsx.py +272 -248
  183. openrem/remapp/tests/test_export_dx_xlsx.py +137 -134
  184. openrem/remapp/tests/test_export_mammo_csv.py +242 -242
  185. openrem/remapp/tests/test_export_rf_xlsx.py +246 -246
  186. openrem/remapp/tests/test_files/DX-Im-DRGEM.dcm +0 -0
  187. openrem/remapp/tests/test_files/MG-RDSR-GEPristina-2D.dcm +0 -0
  188. openrem/remapp/tests/test_files/MG-RDSR-GEPristina-DBT.dcm +0 -0
  189. openrem/remapp/tests/test_files/MG-RDSR-Giotto-DBT.dcm +0 -0
  190. openrem/remapp/tests/test_files/skin_map_alphenix.py +590 -590
  191. openrem/remapp/tests/test_files/skin_map_zee.py +354 -354
  192. openrem/remapp/tests/test_filters_ct.py +321 -321
  193. openrem/remapp/tests/test_filters_dx.py +92 -92
  194. openrem/remapp/tests/test_filters_mammo.py +183 -183
  195. openrem/remapp/tests/test_filters_rf.py +118 -118
  196. openrem/remapp/tests/test_get_values.py +72 -72
  197. openrem/remapp/tests/test_hash_id.py +65 -65
  198. openrem/remapp/tests/test_import_ct_esr_ge.py +3034 -3034
  199. openrem/remapp/tests/test_import_ct_philips_rdsr.py +42 -42
  200. openrem/remapp/tests/test_import_ct_rdsr_multiple.py +256 -256
  201. openrem/remapp/tests/test_import_ct_rdsr_siemens.py +827 -827
  202. openrem/remapp/tests/test_import_ct_rdsr_spectrumdynamics.py +91 -91
  203. openrem/remapp/tests/test_import_ct_rdsr_toshiba_dosecheck.py +67 -67
  204. openrem/remapp/tests/test_import_ct_rdsr_toshiba_multivaluesd.py +33 -33
  205. openrem/remapp/tests/test_import_ct_rdsr_toshiba_pixelmed.py +118 -118
  206. openrem/remapp/tests/test_import_ct_sc_philips.py +44 -44
  207. openrem/remapp/tests/test_import_dual_rdsr.py +110 -110
  208. openrem/remapp/tests/test_import_dx.py +1267 -1191
  209. openrem/remapp/tests/test_import_dx_rdsr.py +1250 -1253
  210. openrem/remapp/tests/test_import_mam.py +438 -438
  211. openrem/remapp/tests/test_import_mg_im_hol_proj.py +46 -46
  212. openrem/remapp/tests/test_import_mg_rdsr.py +586 -586
  213. openrem/remapp/tests/test_import_nm_image.py +420 -420
  214. openrem/remapp/tests/test_import_nm_siemens_rdsr.py +396 -396
  215. openrem/remapp/tests/test_import_px.py +161 -161
  216. openrem/remapp/tests/test_import_rf_rdsr.py +420 -418
  217. openrem/remapp/tests/test_missing_date.py +42 -42
  218. openrem/remapp/tests/test_not_patient.py +60 -60
  219. openrem/remapp/tests/test_openskin.py +272 -272
  220. openrem/remapp/tests/test_patient_id_settings.py +72 -72
  221. openrem/remapp/tests/test_pt_size_import.py +232 -232
  222. openrem/remapp/tests/test_rf_detail.py +113 -113
  223. openrem/remapp/tests/test_rf_high_dose_alert.py +361 -361
  224. openrem/remapp/tools/background.py +361 -361
  225. openrem/remapp/tools/check_standard_name_status.py +47 -0
  226. openrem/remapp/tools/check_uid.py +70 -70
  227. openrem/remapp/tools/dcmdatetime.py +248 -248
  228. openrem/remapp/tools/default_import.py +44 -47
  229. openrem/remapp/tools/get_values.py +230 -230
  230. openrem/remapp/tools/hash_id.py +58 -58
  231. openrem/remapp/tools/make_skin_map.py +448 -406
  232. openrem/remapp/tools/not_patient_indicators.py +72 -72
  233. openrem/remapp/tools/openskin/calc_exp_map.py +173 -173
  234. openrem/remapp/tools/openskin/geomclass.py +475 -475
  235. openrem/remapp/tools/openskin/geomfunc.py +433 -432
  236. openrem/remapp/tools/openskin/skinmap.py +417 -417
  237. openrem/remapp/tools/populate_summary.py +185 -193
  238. openrem/remapp/tools/save_skin_map_structure.py +73 -73
  239. openrem/remapp/tools/send_high_dose_alert_emails.py +238 -207
  240. openrem/remapp/urls.py +456 -448
  241. openrem/remapp/version.py +11 -11
  242. openrem/remapp/views.py +1147 -1052
  243. openrem/remapp/views_admin.py +3876 -3936
  244. openrem/remapp/views_charts_ct.py +2110 -2058
  245. openrem/remapp/views_charts_dx.py +1906 -1836
  246. openrem/remapp/views_charts_mg.py +1349 -1196
  247. openrem/remapp/views_charts_nm.py +535 -535
  248. openrem/remapp/views_charts_rf.py +1219 -1241
  249. openrem/remapp/views_openskin.py +379 -384
  250. openrem/sample-config/openrem-consumer.service +12 -12
  251. openrem/sample-config/openrem-gunicorn.service +13 -13
  252. openrem/sample-config/openrem-server +14 -13
  253. openrem/sample-config/openrem_orthanc_config_linux.lua +454 -454
  254. openrem/sample-config/openrem_orthanc_config_windows.lua +455 -455
  255. openrem/sample-config/queue-init.bat +73 -73
  256. openrem/scripts/openrem_ctphilips.py +25 -25
  257. openrem/scripts/openrem_cttoshiba.py +28 -28
  258. openrem/scripts/openrem_dx.py +22 -22
  259. openrem/scripts/openrem_mg.py +22 -22
  260. openrem/scripts/openrem_nm.py +22 -22
  261. openrem/scripts/openrem_ptsizecsv.py +17 -17
  262. openrem/scripts/openrem_qr.py +12 -12
  263. openrem/scripts/openrem_rdsr.py +25 -25
  264. {OpenREM-1.0.0b2.dist-info → openrem-1.0.0b3.dist-info}/METADATA +39 -29
  265. openrem-1.0.0b3.dist-info/RECORD +379 -0
  266. {OpenREM-1.0.0b2.dist-info → openrem-1.0.0b3.dist-info}/WHEEL +1 -1
  267. {OpenREM-1.0.0b2.dist-info → openrem-1.0.0b3.dist-info/licenses}/COPYING-GPLv3 +674 -674
  268. {OpenREM-1.0.0b2.dist-info → openrem-1.0.0b3.dist-info/licenses}/LICENSE +22 -22
  269. OpenREM-1.0.0b2.dist-info/RECORD +0 -373
  270. openrem/remapp/static/js/charts/plotly-2.17.1.min.js +0 -8
  271. {OpenREM-1.0.0b2.data → openrem-1.0.0b3.data}/scripts/openrem_ctphilips.py +0 -0
  272. {OpenREM-1.0.0b2.data → openrem-1.0.0b3.data}/scripts/openrem_cttoshiba.py +0 -0
  273. {OpenREM-1.0.0b2.data → openrem-1.0.0b3.data}/scripts/openrem_dx.py +0 -0
  274. {OpenREM-1.0.0b2.data → openrem-1.0.0b3.data}/scripts/openrem_mg.py +0 -0
  275. {OpenREM-1.0.0b2.data → openrem-1.0.0b3.data}/scripts/openrem_nm.py +0 -0
  276. {OpenREM-1.0.0b2.data → openrem-1.0.0b3.data}/scripts/openrem_ptsizecsv.py +0 -0
  277. {OpenREM-1.0.0b2.data → openrem-1.0.0b3.data}/scripts/openrem_qr.py +0 -0
  278. {OpenREM-1.0.0b2.data → openrem-1.0.0b3.data}/scripts/openrem_rdsr.py +0 -0
  279. {OpenREM-1.0.0b2.dist-info → openrem-1.0.0b3.dist-info}/top_level.txt +0 -0
@@ -1,2393 +1,2412 @@
1
- # pylint: disable=too-many-lines
2
- # This Python file uses the following encoding: utf-8
3
- # OpenREM - Radiation Exposure Monitoring tools for the physicist
4
- # Copyright (C) 2017 The Royal Marsden NHS Foundation Trust
5
- #
6
- # This program is free software: you can redistribute it and/or modify
7
- # it under the terms of the GNU General Public License as published by
8
- # the Free Software Foundation, either version 3 of the License, or
9
- # (at your option) any later version.
10
- #
11
- # This program is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- # GNU General Public License for more details.
15
- #
16
- # Additional permission under section 7 of GPLv3:
17
- # You shall not make any use of the name of The Royal Marsden NHS
18
- # Foundation trust in connection with this Program in any press or
19
- # other public announcement without the prior written consent of
20
- # The Royal Marsden NHS Foundation Trust.
21
- #
22
- # You should have received a copy of the GNU General Public License
23
- # along with this program. If not, see <http://www.gnu.org/licenses/>.
24
-
25
- """
26
- .. module:: chart_functions
27
- :synopsis: Helper functions for calculating chart data
28
-
29
- .. moduleauthor:: David Platten
30
-
31
- """
32
-
33
- import os
34
- import math
35
- import base64
36
- from builtins import range # pylint: disable=redefined-builtin
37
- from datetime import datetime
38
- import textwrap
39
- import warnings
40
-
41
- from django.conf import settings
42
- from django.utils.translation import gettext as _
43
- import numpy as np
44
- import pandas as pd
45
- import matplotlib.cm
46
- import matplotlib.colors
47
- import plotly.express as px
48
- import plotly.io as pio
49
- import plotly.graph_objects as go
50
- from plotly.offline import plot
51
- from plotly.subplots import make_subplots
52
- from scipy import stats
53
-
54
-
55
- def global_config(
56
- filename,
57
- height_multiplier=1.0,
58
- height=1080,
59
- width=1920,
60
- ):
61
- """
62
- Creates a Plotly global configuration dictionary. The parameters all relate
63
- to the chart bitmap that can be saved by the user.
64
-
65
- :param filename: string containing the file name to use if the user saves the chart as a graphic file
66
- :param height_multiplier: floating point value used to scale the chart height
67
- :param height: int value for the height of the chart graphic file
68
- :param width: int value for the width of the chart graphic file
69
- :return: a dictionary of Plotly options
70
- """
71
- return {
72
- "toImageButtonOptions": {
73
- "format": "png",
74
- "filename": filename,
75
- "height": height * height_multiplier,
76
- "width": width,
77
- "scale": 1,
78
- },
79
- "displaylogo": False,
80
- "scrollZoom": True,
81
- }
82
-
83
-
84
- def create_dataframe(
85
- database_events,
86
- field_dict,
87
- data_point_name_lowercase=None,
88
- data_point_name_remove_whitespace_padding=None,
89
- data_point_value_multipliers=None,
90
- char_wrap=500,
91
- uid=None,
92
- ):
93
- """
94
- Creates a Pandas DataFrame from the supplied database records.
95
- names fields are made categorical to save system memory
96
- Any missing (na) values in names fields are set to Blank
97
-
98
- :param database_events: the database events
99
- :param field_dict: a dictionary of lists, each containing database field names to include in the DataFrame. The
100
- dictionary should include "names", "values", "dates", "times" and optionally "system" items
101
- :param data_point_name_lowercase: boolean flag to determine whether to make all "names" field values lower case
102
- :param data_point_name_remove_whitespace_padding: boolean flag to determine whether to strip whitespace
103
- :param data_point_value_multipliers: list of float valuse to multiply each "values" field value by
104
- :param uid: string containing database field name which contains a unique identifier for each record
105
- :return: a Pandas DataFrame with a column per required field
106
- """
107
- start = None
108
- if settings.DEBUG:
109
- start = datetime.now()
110
-
111
- fields_to_include = set()
112
- if uid:
113
- fields_to_include.add(uid)
114
-
115
- fields_to_include.update(field_dict["names"])
116
- fields_to_include.update(field_dict["values"])
117
- fields_to_include.update(field_dict["dates"])
118
- fields_to_include.update(field_dict["times"])
119
- fields_to_include.update(field_dict["system"])
120
-
121
- start = None
122
- if settings.DEBUG:
123
- start = datetime.now()
124
-
125
- # NOTE: I am not excluding zero-value events from the calculations (zero DLP or zero CTDI)
126
-
127
- # The "order_by()" in the command below removes the custom ordering on the query set that is used to order things
128
- # correctly on the filtered page tables. This ordering isn't required for the DataFrame; removing it speeds up
129
- # the DataFrame.from_records command.
130
- df = pd.DataFrame.from_records(
131
- data=database_events.order_by().values_list(
132
- *fields_to_include
133
- ), # values_list uses less memory than values
134
- columns=fields_to_include, # need to specify the column names as we're now using values_list
135
- coerce_float=True, # force Decimal to float - saves doing a type conversion later
136
- )
137
-
138
- if settings.DEBUG:
139
- print(f"Initial Dataframe created from records in {datetime.now() - start}")
140
- start = datetime.now()
141
- print("Initial DataFrame info, including memory use, is:")
142
- df.info()
143
-
144
- if uid:
145
- df[uid] = df[uid].astype("UInt32")
146
-
147
- # Replace any NaN values in the names columns with "Blank"
148
- df[field_dict["names"]] = df[field_dict["names"]].apply(lambda x: x.fillna("Blank"))
149
-
150
- # Make names column values lowercase if required
151
- if data_point_name_lowercase:
152
- df[field_dict["names"]] = df[field_dict["names"]].apply(lambda x: x.str.lower())
153
-
154
- # Strip whitespace from the beginning and end of any names column values
155
- # Also replace multiple spaces with a single space
156
- if data_point_name_remove_whitespace_padding:
157
- df[field_dict["names"]] = df[field_dict["names"]].apply(
158
- lambda x: x.str.strip().replace("\s+", " ", regex=True)
159
- )
160
-
161
- # Make the names columns all "category" type - this saves memory. Must be done after the above, as the string
162
- # replacement lines revert the columns back to "object"
163
- df[field_dict["names"]] = df[field_dict["names"]].astype("category")
164
-
165
- # Rename the "system" column to "x_ray_system_name" if it is present
166
- if field_dict["system"]:
167
- df.rename(columns={field_dict["system"][0]: "x_ray_system_name"}, inplace=True)
168
- df["x_ray_system_name"] = df["x_ray_system_name"].astype("category")
169
- df.sort_values(by="x_ray_system_name", inplace=True)
170
- # Else create the "x_ray_system_name" column populated with a single "All systems" category
171
- else:
172
- df["x_ray_system_name"] = pd.Categorical(np.full(len(df.index), "All systems"))
173
-
174
- # Loop through each value field, multiplying the values by the corresponding multiplier
175
- for idx, value_field in enumerate(field_dict["values"]):
176
- if data_point_value_multipliers:
177
- df[value_field] *= data_point_value_multipliers[idx]
178
- df[value_field] = df[value_field].astype("float32")
179
-
180
- # Convert each date field to a pd datetime using a specific date format
181
- for date_field in field_dict["dates"]:
182
- df[date_field] = pd.to_datetime(df[date_field], format="%Y-%m-%d")
183
-
184
- # Character wrap the system and name fields
185
- with warnings.catch_warnings():
186
- warnings.filterwarnings("ignore", category=FutureWarning)
187
- df.update(
188
- df["x_ray_system_name"].apply(
189
- lambda x: (textwrap.fill(x, char_wrap)).replace("\n", "<br>")
190
- )
191
- )
192
-
193
- df["x_ray_system_name"] = df["x_ray_system_name"].astype("category")
194
- for field in field_dict["names"]:
195
- with warnings.catch_warnings():
196
- warnings.filterwarnings("ignore", category=FutureWarning)
197
- df.update(
198
- df[field].apply(
199
- lambda x: (textwrap.fill(x, char_wrap)).replace("\n", "<br>")
200
- )
201
- )
202
- df[field] = df[field].astype("category")
203
-
204
- if settings.DEBUG:
205
- print(
206
- f"Dataframe fillna, lower case, whitespace stripping etc took {datetime.now() - start}"
207
- )
208
- print("DataFrame info after processing, including memory use, is:")
209
- df.info()
210
-
211
- return df
212
-
213
-
214
- def create_dataframe_time_series(
215
- df,
216
- df_name_col,
217
- df_value_col,
218
- df_date_col="study_date",
219
- time_period="M",
220
- average_choices=None,
221
- group_by_physician=None,
222
- ):
223
- """
224
- Creates a Pandas DataFrame time series of average values grouped by x_ray_system_name and df_name_col
225
-
226
- :param df: the Pandas DataFrame containing the raw data
227
- :param df_name_col: string containing the DataFrame columnn name used to group the data
228
- :param df_value_col: string containing the DataFrame column containing the values to be averaged
229
- :param df_date_col: string containing the DataFrame column containing the dates
230
- :param time_period: string containing the time period to average over; "A" (years), "Q" (quarters), "M" (months),
231
- "W" (weeks), "D" (days)
232
- :param average_choices: list of strings containing one or both of "mean" and "median"
233
- :param group_by_physician: boolean flag to set whether to group by physician
234
- :return: Pandas DataFrame containing the time series of average values grouped by system and name
235
- """
236
- if average_choices is None:
237
- average_choices = ["mean"]
238
-
239
- group_by_column = "x_ray_system_name"
240
- if group_by_physician:
241
- group_by_column = "performing_physician_name"
242
-
243
- df_time_series = (
244
- df.set_index(df_date_col)
245
- .groupby([group_by_column, df_name_col, pd.Grouper(freq=time_period)])
246
- .agg({df_value_col: average_choices})
247
- )
248
- df_time_series.columns = [s + df_value_col for s in average_choices]
249
- df_time_series = df_time_series.reset_index()
250
- return df_time_series
251
-
252
-
253
- def create_dataframe_weekdays(df, df_name_col, df_date_col="study_date"):
254
- """
255
- Creates a Pandas DataFrame of the number of events in each day of the
256
- week, and in hour of that day.
257
-
258
- :param df: Pandas DataFrame containing the raw data; it must have a "study_time" and "x_ray_system_name" column
259
- :param df_name_col: string containing the df column name to group the results by
260
- :param df_date_col: string containing the df column name containing dates
261
- :return: Pandas DataFrame containing the number of studies per day and hour grouped by name
262
- """
263
- start = None
264
- if settings.DEBUG:
265
- start = datetime.now()
266
-
267
- df["weekday"] = pd.Categorical(pd.DatetimeIndex(df[df_date_col]).day_name())
268
- df["hour"] = df["study_time"].apply(lambda row: row.hour).astype("int8")
269
-
270
- df_time_series = (
271
- df.groupby(["x_ray_system_name", "weekday", "hour"])
272
- .agg({df_name_col: "count"})
273
- .reset_index()
274
- )
275
-
276
- if settings.DEBUG:
277
- print(f"Weekday and hour dataframe created in {datetime.now() - start}")
278
-
279
- return df_time_series
280
-
281
-
282
- def create_dataframe_aggregates(df, df_name_cols, df_agg_col, stats_to_use=None):
283
- """
284
- Creates a Pandas DataFrame with the specified statistics (mean, median, count, for example) grouped by
285
- x-ray system name and by the list of provided df_name_cols.
286
-
287
- :param df: Pandas DataFrame containing the raw data; it must have an "x_ray_system_name" column
288
- :param df_name_cols: list of strings representing the DataFrame column names to group by
289
- :param df_agg_col: string containing the DataFrame column over which to calculate the statistics
290
- :param stats_to_use: list of strings containing the statistics to calculate, such as "mean", "median", "count"
291
- :return: Pandas DataFrame containing the grouped aggregate data
292
- """
293
- start = None
294
- if settings.DEBUG:
295
- start = datetime.now()
296
-
297
- # Make it possible to have multiple value cols (DLP, CTDI, for example)
298
- if stats_to_use is None:
299
- stats_to_use = ["count"]
300
-
301
- groupby_cols = ["x_ray_system_name"] + df_name_cols
302
- grouped_df = df.groupby(groupby_cols).agg({df_agg_col: stats_to_use})
303
- grouped_df.columns = grouped_df.columns.droplevel(level=0)
304
- grouped_df = grouped_df.reset_index()
305
-
306
- if settings.DEBUG:
307
- print(f"Aggregated dataframe created in {datetime.now() - start}")
308
-
309
- return grouped_df
310
-
311
-
312
- def plotly_set_default_theme(theme_name):
313
- """
314
- A short method to set the plotly chart theme
315
-
316
- :param theme_name: the name of the theme
317
- :return:
318
- """
319
- pio.templates.default = theme_name
320
-
321
-
322
- def calculate_colour_sequence(scale_name="RdYlBu", n_colours=10):
323
- """
324
- Calculates a sequence of n_colours from the matplotlib colourmap scale_name
325
-
326
- :param scale_name: string containing the name of the matplotlib colour scale to use
327
- :param n_colours: int representing the number of colours required
328
- :return: list of hexadecimal colours from a matplotlib colormap
329
- """
330
- colour_seq = []
331
- cmap = matplotlib.cm.get_cmap(scale_name)
332
- if n_colours > 1:
333
- for i in range(n_colours):
334
- c = cmap(i / (n_colours - 1))
335
- colour_seq.append(matplotlib.colors.rgb2hex(c))
336
- else:
337
- c = cmap(0)
338
- colour_seq.append(matplotlib.colors.rgb2hex(c))
339
-
340
- return colour_seq
341
-
342
-
343
- def empty_dataframe_msg(params=None):
344
- """
345
- Returns a string containing an HTML DIV with a message warning that the DataFrame is empty
346
-
347
- :param params: parameters which may contain a custom_msg_line
348
- :return: string containing an html div with the empty DataFrame message
349
- """
350
- msg_line = ""
351
- if params:
352
- if "custom_msg_line" in params:
353
- msg_line = params["custom_msg_line"]
354
-
355
- msg = "<div class='alert alert-warning' role='alert'>"
356
- msg += _("No data left after excluding missing values.")
357
- msg += msg_line
358
- msg += "</div>"
359
-
360
- return msg
361
-
362
-
363
- def failed_chart_message_div(custom_msg_line, e):
364
- """
365
- Returns a string containing an HTML DIV with a failed chart message
366
-
367
- :param custom_msg_line: string containing a custom line to add to the message
368
- :param e: Python error object
369
- :return: string containing the message in an HTML DIV
370
- """
371
- msg = "<div class='alert alert-warning' role='alert'>"
372
- if settings.DEBUG:
373
- msg += custom_msg_line
374
- msg += "<p>Error is:</p>"
375
- msg += "<pre>" + e.args[0].replace("\n", "<br>") + "</pre>"
376
- else:
377
- msg += custom_msg_line
378
- msg += "</div>"
379
- return msg
380
-
381
-
382
- def csv_data_barchart(fig, params):
383
- """
384
- Calculates a Pandas DataFrame containing chart data to be used for csv download
385
-
386
- :param fig: Plotly figure containing the data to extract
387
- :param params: a dictionary of parameters
388
- :param params["df_name_col"]: (string) DataFrame column containing categories
389
- :param params["name_axis_title"]: (string) title for the name data
390
- :param params["value_axis_title"]: (string) title for the value data
391
- :param params["facet_col"]: (string) DataFrame column used to split data into subgroups
392
- :return: DataFrame containing the data for download
393
- """
394
- fig_data_dict = fig.to_dict()["data"]
395
-
396
- if params["df_name_col"] != "performing_physician_name":
397
- df = pd.DataFrame(
398
- data=fig_data_dict[0]["x"], columns=[params["name_axis_title"]]
399
- )
400
- df = df.replace("<br>", " ", regex=True)
401
-
402
- for data_set in fig_data_dict:
403
- new_col_df = pd.DataFrame(
404
- data=list(zip(data_set["y"], [x[1] for x in data_set["customdata"]])),
405
- columns=[
406
- data_set["name"]
407
- + " "
408
- + params["value_axis_title"]
409
- .replace("<sup>2</sup>", "^2")
410
- .replace("<sub>vol</sub>", "vol"),
411
- "Frequency",
412
- ], # pylint: disable=line-too-long
413
- )
414
- df = pd.concat([df, new_col_df], axis=1)
415
-
416
- return df
417
-
418
- else:
419
- df = pd.DataFrame(
420
- data=fig_data_dict[0]["x"], columns=[params["name_axis_title"]]
421
- )
422
- df = df.replace("<br>", " ", regex=True)
423
-
424
- for data_set in fig_data_dict:
425
- series_name = (
426
- data_set["hovertemplate"]
427
- .split(params["facet_col"] + "=")[1]
428
- .split("<br>Performing")[0]
429
- ).replace("<br>", " ")
430
- new_col_df = pd.DataFrame(
431
- data=list(zip(data_set["y"], [x[1] for x in data_set["customdata"]])),
432
- columns=[
433
- data_set["name"]
434
- + " "
435
- + series_name
436
- + " "
437
- + params["value_axis_title"]
438
- .replace("<sup>2</sup>", "^2")
439
- .replace("<sub>vol</sub>", "vol"),
440
- "Frequency",
441
- ], # pylint: disable=line-too-long
442
- )
443
- df = pd.concat([df, new_col_df], axis=1)
444
- return df
445
-
446
-
447
- def csv_data_frequency(fig, params):
448
- """
449
- Calculates a Pandas DataFrame containing chart data to be used for csv download
450
-
451
- :param fig: Plotly figure containing the data to extract
452
- :param params: a dictionary of parameters; must include "x_axis_title"
453
- :return: DataFrame containing the data for download
454
- """
455
- fig_data_dict = fig.to_dict()["data"]
456
-
457
- if params["df_name_col"] != "performing_physician_name":
458
- df = pd.DataFrame(data=fig_data_dict[0]["x"], columns=[params["x_axis_title"]])
459
- for data_set in fig_data_dict:
460
- df = pd.concat(
461
- [df, pd.DataFrame(data=data_set["y"], columns=[data_set["name"]])],
462
- axis=1,
463
- )
464
-
465
- return df
466
-
467
- else:
468
- df = pd.DataFrame(data=fig_data_dict[0]["x"], columns=[params["x_axis_title"]])
469
- for data_set in fig_data_dict:
470
- series_name = data_set["customdata"][0][1]
471
- new_col_df = pd.DataFrame(
472
- data=data_set["y"], # pylint: disable=line-too-long
473
- columns=[
474
- data_set["name"] + " " + series_name + " frequency",
475
- ], # pylint: disable=line-too-long
476
- )
477
- df = pd.concat([df, new_col_df], axis=1)
478
- return df
479
-
480
-
481
- def calc_facet_rows_and_height(df, facet_col_name, facet_col_wrap):
482
- """
483
- Calculates the required total chart height and the number of facet rows. Each row has a hard-coded height
484
- of 500 pixels.
485
-
486
- :param df: Pandas DataFrame containing the data
487
- :param facet_col_name: string containing the DataFrame column name containing the facet names
488
- :param facet_col_wrap: int representing the number of subplots to have on each row
489
- :return: two-element list containing the chart height in pixels (int) and the number of facet rows (int)
490
- """
491
- n_facet_rows = math.ceil(len(df[facet_col_name].unique()) / facet_col_wrap)
492
- chart_height = n_facet_rows * 750
493
- if chart_height < 750:
494
- chart_height = 750
495
- return chart_height, n_facet_rows
496
-
497
-
498
- def save_fig_as_html_div(fig, filename, active=settings.SAVE_CHARTS_AS_HTML):
499
- """
500
- Saves the Plotly figure as an HTML file containing a single DIV. The file is saved on the OpenREM server in
501
- MEDIA_ROOT\charts\yyyy\mm\dd\. Viewing the saved file requires an active internet connection as the Plotly
502
- JavaScript library is not included in the file.
503
-
504
- This method is not currently accessible to an OpenREM user or administrator - it is present to assist developers
505
- when producing example charts for the OpenREM documentation. It must be manually activated by setting active=True
506
- in the method definition.
507
-
508
- Args:
509
- fig: a Plotly figure
510
- filename: (string )the filename to use
511
- active: (boolean) to set whether to save the figure
512
- """
513
- if active:
514
- datestamp = datetime.now()
515
- path = os.path.join(
516
- settings.MEDIA_ROOT, "charts", datestamp.strftime("%Y/%m/%d")
517
- )
518
- os.makedirs(
519
- os.path.join(settings.MEDIA_ROOT, "charts", datestamp.strftime("%Y/%m/%d")),
520
- exist_ok=True,
521
- )
522
- fig.write_html(
523
- os.path.join(path, filename + ".html"),
524
- include_plotlyjs="cdn",
525
- full_html=False,
526
- )
527
-
528
-
529
- def plotly_boxplot(
530
- df,
531
- params,
532
- ):
533
- """
534
- Produce a plotly boxplot
535
-
536
- :param df: Pandas DataFrame containing the data
537
- :param params: a dictionary of parameters
538
- :param params["df_value_col"]: (string) DataFrame column containing values
539
- :param params["value_axis_title"]: (string) x-axis title
540
- :param params["df_name_col"]: (string) DataFrame column containing categories
541
- :param params["name_axis_title"]: (string) y-axis title
542
- :param params["df_facet_col"]: (string) DataFrame column used to create subplots
543
- :param params["df_facet_col_wrap"]: (int) number of subplots per row
544
- :param params["sorting_choice"]: 2-element list. [0] sets sort direction, [1] used to determine which field to sort
545
- :param params["colourmap"]: (string) colourmap to use
546
- :param params["return_as_dict"]: (boolean) flag to trigger return as a dictionary rather than a HTML DIV
547
- :return: Plotly figure embedded in an HTML DIV; or Plotly figure as a dictionary (if params["return_as_dict"] is
548
- True); or an error message embedded in an HTML DIV if there was a ValueError when calculating the figure
549
- """
550
- chart_height = 500
551
- n_facet_rows = 1
552
-
553
- try:
554
- # Drop any rows with nan values in the df_value column
555
- df = df.dropna(subset=[params["df_value_col"]])
556
- if df.empty:
557
- return empty_dataframe_msg(params)
558
-
559
- if params["facet_col"]:
560
- chart_height, n_facet_rows = calc_facet_rows_and_height(
561
- df, params["facet_col"], params["facet_col_wrap"]
562
- )
563
-
564
- sort_ascending = True
565
- if params["sorting_choice"][0] == 0:
566
- sort_ascending = False
567
-
568
- sorting_categories = None
569
- if params["sorting_choice"][1].lower() == "name":
570
- sorting_categories = {
571
- params["df_name_col"]: (
572
- df.sort_values(by=params["df_name_col"], ascending=sort_ascending)[
573
- params["df_name_col"]
574
- ]
575
- )
576
- .unique()
577
- .tolist()
578
- }
579
- sorting_categories["x_ray_system_name"] = (
580
- (
581
- df.sort_values(by="x_ray_system_name", ascending=sort_ascending)[
582
- "x_ray_system_name"
583
- ]
584
- )
585
- .unique()
586
- .tolist()
587
- )
588
- if params["facet_col"]:
589
- sorting_categories[params["facet_col"]] = (
590
- (
591
- df.sort_values(
592
- by=params["facet_col"], ascending=sort_ascending
593
- )[params["facet_col"]]
594
- )
595
- .unique()
596
- .tolist()
597
- )
598
- elif params["sorting_choice"][1].lower() == "frequency":
599
- sorting_categories = {
600
- params["df_name_col"]: df.groupby(params["df_name_col"])
601
- .agg(freq=(params["df_name_col"], "count"))
602
- .sort_values(by="freq", ascending=sort_ascending)
603
- .reset_index()[params["df_name_col"]]
604
- .tolist()
605
- }
606
- sorting_categories["x_ray_system_name"] = (
607
- df.groupby("x_ray_system_name")
608
- .agg(freq=("x_ray_system_name", "count"))
609
- .sort_values(by="freq", ascending=sort_ascending)
610
- .reset_index()["x_ray_system_name"]
611
- .tolist()
612
- )
613
- if params["facet_col"]:
614
- sorting_categories[params["facet_col"]] = (
615
- df.groupby(params["facet_col"])
616
- .agg(freq=(params["facet_col"], "count"))
617
- .sort_values(by="freq", ascending=sort_ascending)
618
- .reset_index()[params["facet_col"]]
619
- .tolist()
620
- )
621
- else:
622
- sorting_categories = {
623
- params["df_name_col"]: df.groupby(params["df_name_col"])
624
- .agg(mean=(params["df_value_col"], "mean"))
625
- .sort_values(by="mean", ascending=sort_ascending)
626
- .reset_index()[params["df_name_col"]]
627
- .tolist()
628
- }
629
- sorting_categories["x_ray_system_name"] = (
630
- df.groupby("x_ray_system_name")
631
- .agg(mean=(params["df_value_col"], "mean"))
632
- .sort_values(by="mean", ascending=sort_ascending)
633
- .reset_index()["x_ray_system_name"]
634
- .tolist()
635
- )
636
- if params["facet_col"]:
637
- sorting_categories[params["facet_col"]] = (
638
- df.groupby(params["facet_col"])
639
- .agg(mean=(params["df_value_col"], "mean"))
640
- .sort_values(by="mean", ascending=sort_ascending)
641
- .reset_index()[params["facet_col"]]
642
- .tolist()
643
- )
644
-
645
- n_colours = len(df.x_ray_system_name.unique())
646
- colour_sequence = calculate_colour_sequence(params["colourmap"], n_colours)
647
-
648
- fig = px.box(
649
- df,
650
- x=params["df_name_col"],
651
- y=params["df_value_col"],
652
- facet_col=params["facet_col"],
653
- facet_col_wrap=params["facet_col_wrap"],
654
- facet_row_spacing=0.50 / n_facet_rows,
655
- color=df["x_ray_system_name"],
656
- labels={
657
- params["df_value_col"]: params["value_axis_title"],
658
- params["df_name_col"]: params["name_axis_title"],
659
- "x_ray_system_name": "System",
660
- },
661
- color_discrete_sequence=colour_sequence,
662
- category_orders=sorting_categories,
663
- height=chart_height,
664
- )
665
-
666
- fig.update_traces(quartilemethod="exclusive")
667
-
668
- fig.update_xaxes(
669
- tickson="boundaries",
670
- ticks="outside",
671
- ticklen=5,
672
- showticklabels=True,
673
- title=params["name_axis_title"],
674
- )
675
- fig.update_yaxes(showticklabels=True, matches=None)
676
-
677
- fig.update_layout(legend_title_text="System")
678
-
679
- fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
680
-
681
- save_fig_as_html_div(fig, params["filename"])
682
-
683
- if params["return_as_dict"]:
684
- return fig.to_dict()
685
- else:
686
- return plot(
687
- fig,
688
- output_type="div",
689
- include_plotlyjs=False,
690
- config=global_config(
691
- params["filename"], height_multiplier=chart_height / 500.0
692
- ),
693
- )
694
-
695
- except ValueError as e:
696
- return failed_chart_message_div(
697
- "Could not resolve chart. Try filtering the data to reduce the number of systems.",
698
- e,
699
- )
700
-
701
-
702
- def create_freq_sorted_category_list(df, df_name_col, sorting):
703
- """
704
- Create a sorted list of categories for frequency charts. Makes use of Pandas DataFrame sort_values
705
- (https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sort_values.html).
706
-
707
- sorting[0] sets sort direction
708
-
709
- sorting[1] used to determine field to sort on: "name" sorts by df_name_col; otherwise sorted by "x_ray_system_name"
710
-
711
- :param df: Pandas DataFrame containing the data
712
- :param df_name_col: DataFrame column containing the category names
713
- :param sorting: 2-element list. [0] sets sort direction, [1] used to determine which field to sort on
714
- :return: dictionary with key df_name_col and a list of sorted categories as the value
715
- """
716
- category_sorting_df = df.groupby(df_name_col).count().reset_index()
717
- if sorting[1] == "name":
718
- sort_by = df_name_col
719
- else:
720
- sort_by = "x_ray_system_name"
721
-
722
- sorted_categories = {
723
- df_name_col: list(
724
- category_sorting_df.sort_values(by=sort_by, ascending=sorting[0])[
725
- df_name_col
726
- ]
727
- )
728
- }
729
-
730
- return sorted_categories
731
-
732
-
733
- def create_sorted_category_list(df, df_name_col, df_value_col, sorting):
734
- """
735
- Create a sorted list of categories for scatter and over-time charts. The data is grouped by df_name_col and the
736
- mean and count calculated for each. The grouped DataFrame is then sorted according to the provided sorting.
737
- Makes use of Pandas DataFrame sort_values
738
- (https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sort_values.html).
739
-
740
- sorting[0] sets sort direction
741
-
742
- sorting[1] used to determine sort order: "name" sorts by df_name_col; otherwise sorted by "x_ray_system_name"
743
-
744
- :param df: Pandas DataFrame containing the data
745
- :param df_name_col: DataFrame column containing the category names. Used to group the data
746
- :param df_value_col: DataFrame column containing values to count and calculate the mean
747
- :param sorting: 2-element list. [0] sets sort direction, [1] used to determine which field to sort on
748
- :return: dictionary with key df_name_col and a list of sorted categories as the value
749
- """
750
- # Calculate the required aggregates for creating a list of categories for sorting
751
- grouped_df = df.groupby(df_name_col)
752
-
753
- if sorting[1].lower() == "name":
754
- sort_by = df_name_col
755
- grouped_df = df
756
- elif sorting[1].lower() == "frequency":
757
- sort_by = "count"
758
- elif sorting[1].lower() == "mean":
759
- sort_by = "mean"
760
- else:
761
- sort_by = "median"
762
-
763
- if sort_by in ["count", "mean", "median"]:
764
- grouped_df = grouped_df.agg({df_value_col: [sort_by]})
765
- grouped_df.columns = grouped_df.columns.droplevel(level=0)
766
-
767
- grouped_df = grouped_df.reset_index()
768
-
769
- ascending_order = True
770
- if sorting[0] == 0:
771
- ascending_order = False
772
-
773
- categories_sorted = {
774
- df_name_col: list(
775
- grouped_df.sort_values(by=sort_by, ascending=ascending_order)[df_name_col]
776
- )
777
- }
778
-
779
- return categories_sorted
780
-
781
-
782
- def plotly_barchart(
783
- df,
784
- params,
785
- csv_name="OpenREM chart data.csv",
786
- ):
787
- """
788
- Create a plotly bar chart
789
-
790
- :param df: Pandas DataFrame containing the data
791
- :param params: a dictionary of parameters
792
- :param params["average_choice"]: (string) DataFrame column containing values ("mean" or "median")
793
- :param params["value_axis_title"]: (string) y-axis title
794
- :param params["df_name_col"]: (string) DataFrame column containing categories
795
- :param params["name_axis_title"]: (string) x-axis title
796
- :param params["facet_col"]: (string) DataFrame column used to create subplots
797
- :param params["facet_col_wrap"]: (int) number of subplots per row
798
- :param params["sorting_choice"]: 2-element list. [0] sets sort direction, [1] used to determine which field to sort
799
- :param params["colourmap"]: (string) colourmap to use
800
- :param params["return_as_dict"]: (boolean) flag to trigger return as a dictionary rather than a HTML DIV
801
- :param params["filename"]: (string) default filename to use for plot bitmap export
802
- :param csv_name: (string) default filename to use for plot csv export
803
- :return: Plotly figure embedded in an HTML DIV; or Plotly figure as a dictionary (if params["return_as_dict"] is
804
- True); or an error message embedded in an HTML DIV if there was a ValueError when calculating the figure
805
- """
806
- if df.empty:
807
- return empty_dataframe_msg(params), None
808
-
809
- chart_height = 500
810
- n_facet_rows = 1
811
-
812
- if params["facet_col"]:
813
- chart_height, n_facet_rows = calc_facet_rows_and_height(
814
- df, params["facet_col"], params["facet_col_wrap"]
815
- )
816
-
817
- sort_ascending = True
818
- if params["sorting_choice"][0] == 0:
819
- sort_ascending = False
820
-
821
- sorting_categories = None
822
- if params["sorting_choice"][1].lower() == "name":
823
- sorting_categories = {
824
- params["df_name_col"]: (
825
- df.sort_values(by=params["df_name_col"], ascending=sort_ascending)[
826
- params["df_name_col"]
827
- ]
828
- )
829
- .unique()
830
- .tolist()
831
- }
832
- sorting_categories["x_ray_system_name"] = (
833
- (
834
- df.sort_values(by="x_ray_system_name", ascending=sort_ascending)[
835
- "x_ray_system_name"
836
- ]
837
- )
838
- .unique()
839
- .tolist()
840
- )
841
- if params["facet_col"]:
842
- sorting_categories[params["facet_col"]] = (
843
- (
844
- df.sort_values(by=params["facet_col"], ascending=sort_ascending)[
845
- params["facet_col"]
846
- ]
847
- )
848
- .unique()
849
- .tolist()
850
- )
851
- elif params["sorting_choice"][1].lower() == "frequency":
852
- sorting_categories = {
853
- params["df_name_col"]: df.groupby(params["df_name_col"])
854
- .agg({"count": "sum"})
855
- .sort_values(by="count", ascending=sort_ascending)
856
- .reset_index()[params["df_name_col"]]
857
- .tolist()
858
- }
859
- sorting_categories["x_ray_system_name"] = (
860
- df.groupby("x_ray_system_name")
861
- .agg({"count": "sum"})
862
- .sort_values(by="count", ascending=sort_ascending)
863
- .reset_index()["x_ray_system_name"]
864
- .tolist()
865
- )
866
- if params["facet_col"]:
867
- sorting_categories[params["facet_col"]] = (
868
- df.groupby(params["facet_col"])
869
- .agg({"count": "sum"})
870
- .sort_values(by="count", ascending=sort_ascending)
871
- .reset_index()[params["facet_col"]]
872
- .tolist()
873
- )
874
- else:
875
- sorting_categories = {
876
- params["df_name_col"]: df.groupby(params["df_name_col"])
877
- .agg({params["average_choice"]: "mean"})
878
- .sort_values(by=params["average_choice"], ascending=sort_ascending)
879
- .reset_index()[params["df_name_col"]]
880
- .tolist()
881
- }
882
- sorting_categories["x_ray_system_name"] = (
883
- df.groupby("x_ray_system_name")
884
- .agg({params["average_choice"]: "mean"})
885
- .sort_values(by=params["average_choice"], ascending=sort_ascending)
886
- .reset_index()["x_ray_system_name"]
887
- .tolist()
888
- )
889
- if params["facet_col"]:
890
- sorting_categories[params["facet_col"]] = (
891
- df.groupby(params["facet_col"])
892
- .agg({params["average_choice"]: "mean"})
893
- .sort_values(by=params["average_choice"], ascending=sort_ascending)
894
- .reset_index()[params["facet_col"]]
895
- .tolist()
896
- )
897
-
898
- n_colours = len(df.x_ray_system_name.unique())
899
- colour_sequence = calculate_colour_sequence(params["colourmap"], n_colours)
900
-
901
- fig = px.bar(
902
- df,
903
- x=params["df_name_col"],
904
- y=params["average_choice"],
905
- color="x_ray_system_name",
906
- barmode="group",
907
- facet_col=params["facet_col"],
908
- facet_col_wrap=params["facet_col_wrap"],
909
- facet_row_spacing=0.50 / n_facet_rows,
910
- labels={
911
- params["average_choice"]: params["value_axis_title"],
912
- params["df_name_col"]: params["name_axis_title"],
913
- "x_ray_system_name": "System",
914
- "count": "Frequency",
915
- },
916
- category_orders=sorting_categories,
917
- color_discrete_sequence=colour_sequence,
918
- hover_name="x_ray_system_name",
919
- hover_data={
920
- "x_ray_system_name": False,
921
- params["average_choice"]: ":.2f",
922
- "count": ":.0d",
923
- },
924
- height=chart_height,
925
- )
926
-
927
- fig.update_xaxes(
928
- tickson="boundaries", ticks="outside", ticklen=5, showticklabels=True
929
- )
930
- fig.update_yaxes(showticklabels=True, matches=None)
931
-
932
- fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
933
-
934
- save_fig_as_html_div(fig, params["filename"])
935
-
936
- if params["return_as_dict"]:
937
- return fig.to_dict(), None
938
- else:
939
- csv_data = download_link(
940
- csv_data_barchart(fig, params),
941
- csv_name,
942
- )
943
-
944
- return (
945
- plot(
946
- fig,
947
- output_type="div",
948
- include_plotlyjs=False,
949
- config=global_config(
950
- params["filename"], height_multiplier=chart_height / 500.0
951
- ),
952
- ),
953
- csv_data,
954
- )
955
-
956
-
957
- def plotly_histogram_barchart(
958
- df,
959
- params,
960
- ):
961
- """
962
- Create a plotly histogram bar chart
963
-
964
- :param df: Pandas DataFrame containing the data
965
- :param params: a dictionary of parameters
966
- :param params["df_value_col"]: (string) DataFrame column containing values
967
- :param params["value_axis_title"]: (string) y-axis title
968
- :param params["df_facet_col"]: (string) DataFrame column used to create subplots
969
- :param params["df_category_name_list"]: string list of each category name
970
- :param params["df_facet_col_wrap"]: (int) number of subplots per row
971
- :param params["n_bins"]: (int) number of hisgogram bins to use
972
- :param params["colourmap"]: (string) colourmap to use
973
- :param params["sorting_choice"]: 2-element list. [0] sets sort direction, [1] used to determine which field to sort
974
- :param params["global_max_min"]: (boolean) flag to calculate global max and min or per-subplot max and min
975
- :param params["legend_title"]: (string) legend title
976
- :param params["return_as_dict"]: (boolean) flag to trigger return as a dictionary rather than a HTML DIV
977
- :param params["filename"]: (string) default filename to use for plot bitmap export
978
- :return: Plotly figure embedded in an HTML DIV; or Plotly figure as a dictionary (if params["return_as_dict"] is
979
- True); or an error message embedded in an HTML DIV if there was a ValueError when calculating the figure
980
- """
981
- # pylint: disable=too-many-locals
982
- # pylint: disable=too-many-branches
983
- # pylint: disable=too-many-statements
984
- if df.empty:
985
- return empty_dataframe_msg(params)
986
-
987
- sort_ascending = True
988
- if params["sorting_choice"][0] == 0:
989
- sort_ascending = False
990
-
991
- df_facet_list = None
992
- df_category_list = None
993
- if params["sorting_choice"][1].lower() == "name":
994
- df_facet_list = (
995
- (
996
- df.sort_values(by=params["df_facet_col"], ascending=sort_ascending)[
997
- params["df_facet_col"]
998
- ]
999
- )
1000
- .unique()
1001
- .tolist()
1002
- )
1003
- df_category_list = (
1004
- (
1005
- df.sort_values(by=params["df_category_col"], ascending=sort_ascending)[
1006
- params["df_category_col"]
1007
- ]
1008
- )
1009
- .unique()
1010
- .tolist()
1011
- )
1012
- elif params["sorting_choice"][1].lower() == "frequency":
1013
- df_facet_list = (
1014
- df.groupby(params["df_facet_col"])
1015
- .agg(freq=(params["df_facet_col"], "count"))
1016
- .sort_values(by="freq", ascending=sort_ascending)
1017
- .reset_index()[params["df_facet_col"]]
1018
- .tolist()
1019
- )
1020
- df_category_list = (
1021
- df.groupby(params["df_category_col"])
1022
- .agg(freq=(params["df_category_col"], "count"))
1023
- .sort_values(by="freq", ascending=sort_ascending)
1024
- .reset_index()[params["df_category_col"]]
1025
- .tolist()
1026
- )
1027
- else:
1028
- df_facet_list = (
1029
- df.groupby(params["df_facet_col"])
1030
- .agg(mean=(params["df_value_col"], "mean"))
1031
- .sort_values(by="mean", ascending=sort_ascending)
1032
- .reset_index()[params["df_facet_col"]]
1033
- .tolist()
1034
- )
1035
- df_category_list = (
1036
- df.groupby(params["df_category_col"])
1037
- .agg(mean=(params["df_value_col"], "mean"))
1038
- .sort_values(by="mean", ascending=sort_ascending)
1039
- .reset_index()[params["df_category_col"]]
1040
- .tolist()
1041
- )
1042
-
1043
- chart_height, n_facet_rows = calc_facet_rows_and_height(
1044
- df, params["df_facet_col"], params["facet_col_wrap"]
1045
- )
1046
-
1047
- n_colours = len(df[params["df_category_col"]].unique())
1048
- colour_sequence = calculate_colour_sequence(params["colourmap"], n_colours)
1049
-
1050
- bins = None
1051
- mid_bins = None
1052
- bin_labels = None
1053
- if params["global_max_min"]:
1054
- bin_labels, bins, mid_bins = calc_histogram_bin_data(
1055
- df, params["df_value_col"], n_bins=params["n_bins"]
1056
- )
1057
-
1058
- try:
1059
- n_cols = params["facet_col_wrap"]
1060
- if len(df_facet_list) < n_cols:
1061
- n_cols = len(df_facet_list)
1062
-
1063
- fig = make_subplots(
1064
- rows=n_facet_rows,
1065
- cols=n_cols,
1066
- vertical_spacing=0.40 / n_facet_rows,
1067
- )
1068
-
1069
- current_row = 1
1070
- current_col = 1
1071
- current_facet = 0
1072
- category_names = []
1073
-
1074
- for facet_name in df_facet_list:
1075
- facet_subset = df[df[params["df_facet_col"]] == facet_name].dropna(
1076
- subset=[params["df_value_col"]]
1077
- )
1078
-
1079
- # If the subset is empty then skip to the next facet
1080
- if facet_subset.empty:
1081
- continue
1082
-
1083
- if not params["global_max_min"]:
1084
- bin_labels, bins, mid_bins = calc_histogram_bin_data(
1085
- facet_subset, params["df_value_col"], n_bins=params["n_bins"]
1086
- )
1087
-
1088
- for category_name in df_category_list:
1089
- category_subset = facet_subset[
1090
- facet_subset[params["df_category_col"]] == category_name
1091
- ].dropna(subset=[params["df_value_col"]])
1092
-
1093
- # If the subset is empty then skip to the next category
1094
- if category_subset.empty:
1095
- continue
1096
-
1097
- if category_name in category_names:
1098
- show_legend = False
1099
- else:
1100
- show_legend = True
1101
- category_names.append(category_name)
1102
-
1103
- category_idx = category_names.index(category_name)
1104
-
1105
- histogram_data = np.histogram(
1106
- category_subset[params["df_value_col"]].values, bins=bins
1107
- )
1108
-
1109
- trace = go.Bar(
1110
- x=mid_bins,
1111
- y=histogram_data[0],
1112
- name=category_name,
1113
- marker_color=colour_sequence[category_idx],
1114
- legendgroup=category_idx,
1115
- showlegend=show_legend,
1116
- customdata=bin_labels,
1117
- hovertemplate=f"<b>{facet_name}</b><br>"
1118
- + f"{category_name}<br>"
1119
- + "Frequency: %{y:.0d}<br>"
1120
- + "Bin range: %{customdata}<br>"
1121
- + "Mid-bin: %{x:.2f}<br>"
1122
- + "<extra></extra>",
1123
- )
1124
-
1125
- fig.append_trace(trace, row=current_row, col=current_col)
1126
-
1127
- fig.update_xaxes(
1128
- title_text=facet_name + " " + params["value_axis_title"],
1129
- tickvals=bins,
1130
- ticks="outside",
1131
- ticklen=5,
1132
- row=current_row,
1133
- col=current_col,
1134
- )
1135
-
1136
- if current_col == 1:
1137
- fig.update_yaxes(
1138
- title_text="Frequency", row=current_row, col=current_col
1139
- )
1140
-
1141
- current_facet += 1
1142
- current_col += 1
1143
- if current_col > n_cols:
1144
- current_row += 1
1145
- current_col = 1
1146
-
1147
- layout = go.Layout(height=chart_height)
1148
-
1149
- fig.update_layout(layout)
1150
- fig.update_layout(legend_title_text=params["legend_title"])
1151
-
1152
- save_fig_as_html_div(fig, params["filename"])
1153
-
1154
- if params["return_as_dict"]:
1155
- return fig.to_dict()
1156
- else:
1157
- return plot(
1158
- fig,
1159
- output_type="div",
1160
- include_plotlyjs=False,
1161
- config=global_config(
1162
- params["filename"], height_multiplier=chart_height / 500.0
1163
- ),
1164
- )
1165
-
1166
- except ValueError as e:
1167
- return failed_chart_message_div(
1168
- "Could not resolve chart. Try filtering the data to reduce the number of categories or systems.",
1169
- e,
1170
- )
1171
-
1172
-
1173
- def calc_histogram_bin_data(df, value_col_name, n_bins=10):
1174
- """
1175
- Calculates histogram bin label text, bin boundaries and bin mid-points
1176
-
1177
- :param df: the Pandas DataFrame containing the data
1178
- :param value_col_name: (string )name of the DataFrame column that contains the values
1179
- :param n_bins: (int) the number of bins to use
1180
- :return: a three element list containing the bin labels, bin boundaries and bin mid-points
1181
- """
1182
- min_bin_value, max_bin_value = df[value_col_name].agg([min, max])
1183
- bins = np.linspace(min_bin_value, max_bin_value, n_bins + 1)
1184
- mid_bins = 0.5 * (bins[:-1] + bins[1:])
1185
- bin_labels = np.array(
1186
- ["{:.2f}≤x<{:.2f}".format(i, j) for i, j in zip(bins[:-1], bins[1:])]
1187
- )
1188
- return bin_labels, bins, mid_bins
1189
-
1190
-
1191
- def plotly_binned_statistic_barchart(
1192
- df,
1193
- params,
1194
- ):
1195
- """
1196
- Create a plotly binned statistic bar chart
1197
-
1198
- :param df: Pandas DataFrame containing the data
1199
- :param params: a dictionary of parameters
1200
- :param params["df_category_col"]: (string) DataFrame column containing categories
1201
- :param params["df_facet_col"]: (string) DataFrame column used to create subplots
1202
- :param params["facet_title"]: (string) Subplot title
1203
- :param params["facet_col_wrap"]: (int) number of subplots per row
1204
- :param params["user_bins"]: list of ints containing bin edges for binning
1205
- :param params["df_category_col"]: (string) DataFrame column containing categories
1206
- :param params["df_x_value_col"]: (string) DataFrame column containing x data
1207
- :param params["df_y_value_col"]: (string) DataFrame column containing y data
1208
- :param params["x_axis_title"]: (string) Title for x-axis
1209
- :param params["y_axis_title"]: (string) Title for y-axis
1210
- :param params["stat_name"]: (string) "mean" or "median"
1211
- :param params["sorting_choice"]: 2-element list. [0] sets sort direction, [1] used to determine which field to sort
1212
- :param params["colourmap"]: (string) colourmap to use
1213
- :param params["return_as_dict"]: (boolean) flag to trigger return as a dictionary rather than a HTML DIV
1214
- :param params["filename"]: (string) default filename to use for plot bitmap export
1215
- :return: Plotly figure embedded in an HTML DIV; or Plotly figure as a dictionary (if params["return_as_dict"] is
1216
- True); or an error message embedded in an HTML DIV if there was a ValueError when calculating the figure
1217
- """
1218
- # pylint: disable=too-many-locals
1219
- # pylint: disable=too-many-branches
1220
- # pylint: disable=too-many-statements
1221
- if df.empty:
1222
- return empty_dataframe_msg(params)
1223
-
1224
- chart_height, n_facet_rows = calc_facet_rows_and_height(
1225
- df, params["df_facet_col"], params["facet_col_wrap"]
1226
- )
1227
-
1228
- n_colours = len(df[params["df_category_col"]].unique())
1229
- colour_sequence = calculate_colour_sequence(params["colourmap"], n_colours)
1230
-
1231
- sort_ascending = True
1232
- if params["sorting_choice"][0] == 0:
1233
- sort_ascending = False
1234
-
1235
- if params["sorting_choice"][1].lower() == "name":
1236
- df_category_name_list = (
1237
- (
1238
- df.sort_values(by=params["df_category_col"], ascending=sort_ascending)[
1239
- params["df_category_col"]
1240
- ]
1241
- )
1242
- .unique()
1243
- .tolist()
1244
- )
1245
- df_facet_category_list = (
1246
- (
1247
- df.sort_values(by=params["df_facet_col"], ascending=sort_ascending)[
1248
- params["df_facet_col"]
1249
- ]
1250
- )
1251
- .unique()
1252
- .tolist()
1253
- )
1254
- elif params["sorting_choice"][1].lower() == "frequency":
1255
- df_category_name_list = (
1256
- df.groupby(params["df_category_col"])
1257
- .agg(freq=(params["df_category_col"], "count"))
1258
- .sort_values(by="freq", ascending=sort_ascending)
1259
- .reset_index()[params["df_category_col"]]
1260
- .tolist()
1261
- )
1262
- df_facet_category_list = (
1263
- df.groupby(params["df_facet_col"])
1264
- .agg(freq=(params["df_facet_col"], "count"))
1265
- .sort_values(by="freq", ascending=sort_ascending)
1266
- .reset_index()[params["df_facet_col"]]
1267
- .tolist()
1268
- )
1269
- else:
1270
- df_category_name_list = (
1271
- df.groupby(params["df_category_col"])
1272
- .agg(avg=(params["df_y_value_col"], params["stat_name"]))
1273
- .sort_values(by="avg", ascending=sort_ascending)
1274
- .reset_index()[params["df_category_col"]]
1275
- .tolist()
1276
- )
1277
- df_facet_category_list = (
1278
- df.groupby(params["df_facet_col"])
1279
- .agg(avg=(params["df_y_value_col"], params["stat_name"]))
1280
- .sort_values(by="avg", ascending=sort_ascending)
1281
- .reset_index()[params["df_facet_col"]]
1282
- .tolist()
1283
- )
1284
-
1285
- try:
1286
- n_cols = params["facet_col_wrap"]
1287
- if len(df_facet_category_list) < n_cols:
1288
- n_cols = len(df_facet_category_list)
1289
-
1290
- fig = make_subplots(
1291
- rows=n_facet_rows,
1292
- cols=n_cols,
1293
- vertical_spacing=0.40 / n_facet_rows,
1294
- )
1295
-
1296
- current_row = 1
1297
- current_col = 1
1298
- current_facet = 0
1299
- category_names = []
1300
-
1301
- bins = np.sort(np.array(params["user_bins"]))
1302
-
1303
- for facet_name in df_facet_category_list:
1304
- facet_subset = df[df[params["df_facet_col"]] == facet_name].dropna(
1305
- subset=[params["df_x_value_col"], params["df_y_value_col"]]
1306
- )
1307
-
1308
- # Skip to the next facet if the subset is empty
1309
- if facet_subset.empty:
1310
- continue
1311
-
1312
- facet_x_min = facet_subset[params["df_x_value_col"]].min()
1313
- facet_x_max = facet_subset[params["df_x_value_col"]].max()
1314
-
1315
- if np.isfinite(facet_x_min):
1316
- if facet_x_min < np.amin(bins):
1317
- bins = np.concatenate([[facet_x_min], bins])
1318
- if np.isfinite(facet_x_max):
1319
- if facet_x_max > np.amax(bins):
1320
- bins = np.concatenate([bins, [facet_x_max]])
1321
-
1322
- bin_labels = np.array(
1323
- ["{:.0f}≤x<{:.0f}".format(i, j) for i, j in zip(bins[:-1], bins[1:])]
1324
- )
1325
-
1326
- for category_name in df_category_name_list:
1327
- category_subset = facet_subset[
1328
- facet_subset[params["df_category_col"]] == category_name
1329
- ].dropna(subset=[params["df_x_value_col"], params["df_y_value_col"]])
1330
-
1331
- # Skip to the next category name if the subset is empty
1332
- if category_subset.empty:
1333
- continue
1334
-
1335
- if len(category_subset.index) > 0:
1336
- if category_name in category_names:
1337
- show_legend = False
1338
- else:
1339
- show_legend = True
1340
- category_names.append(category_name)
1341
-
1342
- category_idx = category_names.index(category_name)
1343
-
1344
- binned_stats = stats.binned_statistic(
1345
- category_subset[params["df_x_value_col"]].values,
1346
- category_subset[params["df_y_value_col"]].values,
1347
- statistic=params["stat_name"],
1348
- bins=bins,
1349
- )
1350
- bin_counts = np.bincount(binned_stats[2])
1351
- trace_labels = np.array(
1352
- [
1353
- "Frequency: {}<br>Bin range: {}".format(i, j)
1354
- for i, j in zip(bin_counts[1:], bin_labels)
1355
- ]
1356
- )
1357
-
1358
- trace = go.Bar(
1359
- x=bin_labels,
1360
- y=binned_stats[0],
1361
- name=category_name,
1362
- marker_color=colour_sequence[category_idx],
1363
- legendgroup=category_idx,
1364
- showlegend=show_legend,
1365
- customdata=trace_labels,
1366
- hovertemplate=f"<b>{facet_name}</b><br>"
1367
- + f"{category_name}<br>"
1368
- + f"{params['stat_name'].capitalize()}: "
1369
- + "%{y:.2f}<br>"
1370
- + "%{customdata}<br>"
1371
- + "<extra></extra>",
1372
- )
1373
-
1374
- fig.append_trace(trace, row=current_row, col=current_col)
1375
-
1376
- fig.update_xaxes(
1377
- title_text=facet_name + " " + params["x_axis_title"],
1378
- tickson="boundaries",
1379
- ticks="outside",
1380
- ticklen=5,
1381
- row=current_row,
1382
- col=current_col,
1383
- )
1384
-
1385
- if current_col == 1:
1386
- fig.update_yaxes(
1387
- title_text=params["stat_name"].capitalize()
1388
- + " "
1389
- + params["y_axis_title"],
1390
- row=current_row,
1391
- col=current_col,
1392
- )
1393
-
1394
- current_facet += 1
1395
- current_col += 1
1396
- if current_col > n_cols:
1397
- current_row += 1
1398
- current_col = 1
1399
-
1400
- layout = go.Layout(height=chart_height)
1401
-
1402
- fig.update_layout(layout)
1403
- fig.update_layout(legend_title_text=params["facet_title"])
1404
-
1405
- save_fig_as_html_div(fig, params["filename"])
1406
-
1407
- if params["return_as_dict"]:
1408
- return fig.to_dict()
1409
- else:
1410
- return plot(
1411
- fig,
1412
- output_type="div",
1413
- include_plotlyjs=False,
1414
- config=global_config(
1415
- params["filename"], height_multiplier=chart_height / 500.0
1416
- ),
1417
- )
1418
-
1419
- except ValueError as e:
1420
- return failed_chart_message_div(
1421
- "Could not resolve chart. Try filtering the data to reduce the number of categories or systems.",
1422
- e,
1423
- )
1424
-
1425
-
1426
- def plotly_timeseries_linechart(
1427
- df,
1428
- params,
1429
- ):
1430
- """
1431
- Create a plotly line chart of data over time
1432
-
1433
- :param df: Pandas DataFrame containing the data
1434
- :param params: a dictionary of parameters
1435
- :param params["df_facet_col"]: (string) DataFrame column used to create subplots
1436
- :param params["df_facet_col_wrap"]: (int) number of subplots per row
1437
- :param params["facet_title"]: (string) subplot title
1438
- :param params["df_value_col"]: (string) DataFrame column containing values
1439
- :param params["value_axis_title"]: (string) y-axis title
1440
- :param params["colourmap"]: (string) colourmap to use
1441
- :param params["colourmap"]: (string) colourmap to use
1442
- :param params["df_date_col"]: (string) DataFrame column containing dates
1443
- :param params["df_count_col"]: (string) DataFrame column containing frequency data
1444
- :param params["df_name_col"]: (string) DataFrame column containing categories
1445
- :param params["legend_title"]: (string) legend title
1446
- :param params["name_axis_title"]: (string) x-axis title
1447
- :param params["return_as_dict"]: (boolean) flag to trigger return as a dictionary rather than a HTML DIV
1448
- :param params["filename"]: (string) default filename to use for plot bitmap export
1449
- :return: Plotly figure embedded in an HTML DIV; or Plotly figure as a dictionary (if "return_as_dict" is True);
1450
- or an error message embedded in an HTML DIV if there was a ValueError when calculating the figure
1451
- """
1452
- if df.empty:
1453
- return empty_dataframe_msg(params)
1454
-
1455
- chart_height, n_facet_rows = calc_facet_rows_and_height(
1456
- df, params["facet_col"], params["facet_col_wrap"]
1457
- )
1458
-
1459
- n_colours = len(df[params["df_name_col"]].unique())
1460
- colour_sequence = calculate_colour_sequence(params["colourmap"], n_colours)
1461
-
1462
- try:
1463
- fig = px.scatter(
1464
- df,
1465
- x=params["df_date_col"],
1466
- y=params["df_value_col"],
1467
- color=params["df_name_col"],
1468
- facet_col=params["facet_col"],
1469
- facet_col_wrap=params["facet_col_wrap"],
1470
- facet_row_spacing=0.40 / n_facet_rows,
1471
- labels={
1472
- params["facet_col"]: params["facet_title"],
1473
- params["df_value_col"]: params["value_axis_title"],
1474
- params["df_count_col"]: "Frequency",
1475
- params["df_name_col"]: params["legend_title"],
1476
- params["df_date_col"]: params["name_axis_title"],
1477
- "x_ray_system_name": "System",
1478
- },
1479
- hover_name=params["df_name_col"],
1480
- hover_data={
1481
- params["df_name_col"]: False,
1482
- params["df_value_col"]: ":.2f",
1483
- params["df_count_col"]: ":.0f",
1484
- },
1485
- color_discrete_sequence=colour_sequence,
1486
- category_orders=params["sorted_category_list"],
1487
- height=chart_height,
1488
- render_mode="svg",
1489
- )
1490
-
1491
- for data_set in fig.data:
1492
- data_set.update(mode="markers+lines")
1493
-
1494
- fig.update_xaxes(
1495
- showticklabels=True,
1496
- ticks="outside",
1497
- ticklen=5,
1498
- )
1499
- fig.update_yaxes(showticklabels=True, matches=None)
1500
-
1501
- fig.update_layout(legend_title_text=params["legend_title"])
1502
-
1503
- fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
1504
-
1505
- save_fig_as_html_div(fig, params["filename"])
1506
-
1507
- if params["return_as_dict"]:
1508
- return fig.to_dict()
1509
- else:
1510
- return plot(
1511
- fig,
1512
- output_type="div",
1513
- include_plotlyjs=False,
1514
- config=global_config(
1515
- params["filename"], height_multiplier=chart_height / 500.0
1516
- ),
1517
- )
1518
-
1519
- except ValueError as e:
1520
- return failed_chart_message_div(
1521
- "Could not resolve chart. Try filtering the data to reduce the number of categories or systems.",
1522
- e,
1523
- )
1524
-
1525
-
1526
- def plotly_scatter(
1527
- df,
1528
- params,
1529
- ):
1530
- """
1531
- Create a plotly scatter chart
1532
-
1533
- :param df: Pandas DataFrame containing the data
1534
- :param params: a dictionary of parameters
1535
- :param params["df_name_col"]: (string) DataFrame column containing categories
1536
- :param params["df_x_col"]: (string) DataFrame column containing x values
1537
- :param params["df_y_col"]: (string) DataFrame column containing y values
1538
- :param params["sorting_choice"]: 2-element list. [0] sets sort direction, [1] used to determine which field to sort
1539
- :param params["grouping_choice"]: (string) "series" or "system"
1540
- :param params["legend_title"]: (string) legend title
1541
- :param params["facet_col_wrap"]: (int) number of subplots per row
1542
- :param params["colourmap"]: (string) colourmap to use
1543
- :param params["x_axis_title"]: (string) x-axis title
1544
- :param params["y_axis_title"]: (string) y-axis title
1545
- :param params["filename"]: (string) default filename to use for plot bitmap export
1546
- :param params["return_as_dict"]: (boolean) flag to trigger return as a dictionary rather than a HTML DIV
1547
- :return: Plotly figure embedded in an HTML DIV; or Plotly figure as a dictionary (if "return_as_dict" is True);
1548
- or an error message embedded in an HTML DIV if there was a ValueError when calculating the figure
1549
- """
1550
- if df.empty:
1551
- return empty_dataframe_msg(params)
1552
-
1553
- params["df_category_name_col"] = params["df_name_col"]
1554
- params["df_group_col"] = "x_ray_system_name"
1555
- if params["grouping_choice"] == "series":
1556
- params["df_category_name_col"] = "x_ray_system_name"
1557
- params["df_group_col"] = params["df_name_col"]
1558
- params["legend_title"] = "System"
1559
-
1560
- sort_ascending = True
1561
- if params["sorting_choice"][0] == 0:
1562
- sort_ascending = False
1563
-
1564
- sorting_categories = None
1565
- if params["sorting_choice"][1].lower() == "name":
1566
- sorting_categories = {
1567
- params["df_category_name_col"]: (
1568
- df.sort_values(
1569
- by=params["df_category_name_col"], ascending=sort_ascending
1570
- )[params["df_category_name_col"]]
1571
- )
1572
- .unique()
1573
- .tolist()
1574
- }
1575
- sorting_categories["x_ray_system_name"] = (
1576
- (
1577
- df.sort_values(by="x_ray_system_name", ascending=sort_ascending)[
1578
- "x_ray_system_name"
1579
- ]
1580
- )
1581
- .unique()
1582
- .tolist()
1583
- )
1584
- sorting_categories[params["df_group_col"]] = (
1585
- (
1586
- df.sort_values(by=params["df_group_col"], ascending=sort_ascending)[
1587
- params["df_group_col"]
1588
- ]
1589
- )
1590
- .unique()
1591
- .tolist()
1592
- )
1593
- elif params["sorting_choice"][1].lower() == "frequency":
1594
- sorting_categories = {
1595
- params["df_category_name_col"]: df.groupby(params["df_category_name_col"])
1596
- .agg(freq=(params["df_category_name_col"], "count"))
1597
- .sort_values(by="freq", ascending=sort_ascending)
1598
- .reset_index()[params["df_category_name_col"]]
1599
- .tolist()
1600
- }
1601
- sorting_categories["x_ray_system_name"] = (
1602
- df.groupby("x_ray_system_name")
1603
- .agg(freq=("x_ray_system_name", "count"))
1604
- .sort_values(by="freq", ascending=sort_ascending)
1605
- .reset_index()["x_ray_system_name"]
1606
- .tolist()
1607
- )
1608
- sorting_categories[params["df_group_col"]] = (
1609
- df.groupby(params["df_group_col"])
1610
- .agg(freq=(params["df_group_col"], "count"))
1611
- .sort_values(by="freq", ascending=sort_ascending)
1612
- .reset_index()[params["df_group_col"]]
1613
- .tolist()
1614
- )
1615
- else:
1616
- sorting_categories = {
1617
- params["df_category_name_col"]: df.groupby(params["df_category_name_col"])
1618
- .agg(mean=(params["df_y_col"], "mean"))
1619
- .sort_values(by="mean", ascending=sort_ascending)
1620
- .reset_index()[params["df_category_name_col"]]
1621
- .tolist()
1622
- }
1623
- sorting_categories["x_ray_system_name"] = (
1624
- df.groupby("x_ray_system_name")
1625
- .agg(mean=(params["df_y_col"], "mean"))
1626
- .sort_values(by="mean", ascending=sort_ascending)
1627
- .reset_index()["x_ray_system_name"]
1628
- .tolist()
1629
- )
1630
- sorting_categories[params["df_group_col"]] = (
1631
- df.groupby(params["df_group_col"])
1632
- .agg(mean=(params["df_y_col"], "mean"))
1633
- .sort_values(by="mean", ascending=sort_ascending)
1634
- .reset_index()[params["df_group_col"]]
1635
- .tolist()
1636
- )
1637
-
1638
- try:
1639
- # Drop any rows with nan values in x or y
1640
- df = df.dropna(subset=[params["df_x_col"], params["df_y_col"]])
1641
- if df.empty:
1642
- return empty_dataframe_msg(params)
1643
-
1644
- chart_height, n_facet_rows = calc_facet_rows_and_height(
1645
- df, params["df_group_col"], params["facet_col_wrap"]
1646
- )
1647
-
1648
- n_colours = len(df[params["df_category_name_col"]].unique())
1649
- colour_sequence = calculate_colour_sequence(params["colourmap"], n_colours)
1650
-
1651
- fig = px.scatter(
1652
- df,
1653
- x=params["df_x_col"],
1654
- y=params["df_y_col"],
1655
- color=params["df_category_name_col"],
1656
- facet_col=params["df_group_col"],
1657
- facet_col_wrap=params["facet_col_wrap"],
1658
- facet_row_spacing=0.40 / n_facet_rows,
1659
- labels={
1660
- params["df_x_col"]: params["x_axis_title"],
1661
- params["df_y_col"]: params["y_axis_title"],
1662
- params["df_category_name_col"]: params["legend_title"],
1663
- },
1664
- color_discrete_sequence=colour_sequence,
1665
- category_orders=sorting_categories,
1666
- opacity=0.6,
1667
- height=chart_height,
1668
- render_mode="svg", # "webgl",
1669
- )
1670
-
1671
- fig.update_traces(marker_line=dict(width=1, color="LightSlateGray"))
1672
-
1673
- fig.update_xaxes(showticklabels=True, matches=None)
1674
- fig.update_yaxes(showticklabels=True, matches=None)
1675
-
1676
- fig.update_layout(legend_title_text=params["legend_title"])
1677
-
1678
- fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
1679
-
1680
- save_fig_as_html_div(fig, params["filename"])
1681
-
1682
- if params["return_as_dict"]:
1683
- return fig.to_dict()
1684
- else:
1685
- return plot(
1686
- fig,
1687
- output_type="div",
1688
- include_plotlyjs=False,
1689
- config=global_config(
1690
- params["filename"], height_multiplier=chart_height / 500.0
1691
- ),
1692
- )
1693
-
1694
- except ValueError as e:
1695
- return failed_chart_message_div(
1696
- "Could not resolve chart. Try filtering the data to reduce the number of categories or systems.",
1697
- e,
1698
- )
1699
-
1700
-
1701
- def plotly_barchart_weekdays(
1702
- df,
1703
- df_name_col,
1704
- df_value_col,
1705
- name_axis_title="",
1706
- value_axis_title="",
1707
- colourmap="RdYlBu",
1708
- filename="OpenREM_workload_chart",
1709
- facet_col_wrap=3,
1710
- sorting_choice=None,
1711
- return_as_dict=False,
1712
- ):
1713
- """
1714
- Create a plotly bar chart of event workload
1715
-
1716
- :param df: Pandas DataFrame containing the data
1717
- :param df_name_col: (string) DataFrame column containing categories
1718
- :param df_value_col: (string) DataFrame column containing values
1719
- :param name_axis_title: (string) x-axis title
1720
- :param value_axis_title: (string) y-axis title
1721
- :param colourmap: (string) colourmap to use
1722
- :param filename: (string) default filename to use for plot bitmap export
1723
- :param facet_col_wrap: (int) number of subplots per row
1724
- :param sorting_choice: 2-element list. [0] sets sort direction, [1] used to determine which field to sort
1725
- :param return_as_dict: (boolean) flag to trigger return as a dictionary rather than a HTML DIV
1726
- :return: Plotly figure embedded in an HTML DIV; or Plotly figure as a dictionary (if "return_as_dict" is True);
1727
- or an error message embedded in an HTML DIV if there was a ValueError when calculating the figure
1728
- """
1729
- # pylint: disable=too-many-locals
1730
- if df.empty:
1731
- return empty_dataframe_msg()
1732
-
1733
- if sorting_choice is None:
1734
- # Set default sorting to be ascending name order
1735
- sorting_choice = [1, "name"]
1736
-
1737
- sort_ascending = True
1738
- if sorting_choice[0] == 0:
1739
- sort_ascending = False
1740
-
1741
- if sorting_choice[1].lower() == "name":
1742
- sorting_categories = {
1743
- "x_ray_system_name": (
1744
- df.sort_values(by="x_ray_system_name", ascending=sort_ascending)[
1745
- "x_ray_system_name"
1746
- ]
1747
- )
1748
- .unique()
1749
- .tolist()
1750
- }
1751
- else:
1752
- sorting_categories = {
1753
- "x_ray_system_name": df.groupby("x_ray_system_name")
1754
- .agg(freq=(df_value_col, "sum"))
1755
- .sort_values(by="freq", ascending=sort_ascending)
1756
- .reset_index()["x_ray_system_name"]
1757
- .tolist()
1758
- }
1759
-
1760
- chart_height, n_facet_rows = calc_facet_rows_and_height(
1761
- df, "x_ray_system_name", facet_col_wrap
1762
- )
1763
-
1764
- try:
1765
- fig = px.bar(
1766
- df,
1767
- x=df_name_col,
1768
- y=df_value_col,
1769
- facet_col="x_ray_system_name",
1770
- facet_col_wrap=facet_col_wrap,
1771
- facet_row_spacing=0.40 / n_facet_rows,
1772
- color=df_value_col,
1773
- labels={
1774
- df_name_col: name_axis_title,
1775
- df_value_col: value_axis_title,
1776
- "x_ray_system_name": "System",
1777
- "hour": "Hour",
1778
- },
1779
- color_continuous_scale=colourmap,
1780
- category_orders=sorting_categories,
1781
- hover_name="x_ray_system_name",
1782
- hover_data={
1783
- "x_ray_system_name": False,
1784
- "weekday": True,
1785
- "hour": ":.2f",
1786
- df_value_col: True,
1787
- },
1788
- height=chart_height,
1789
- )
1790
-
1791
- fig.update_xaxes(
1792
- categoryarray=[
1793
- "Monday",
1794
- "Tuesday",
1795
- "Wednesday",
1796
- "Thursday",
1797
- "Friday",
1798
- "Saturday",
1799
- "Sunday",
1800
- ],
1801
- tickson="boundaries",
1802
- showticklabels=True,
1803
- )
1804
-
1805
- fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
1806
-
1807
- save_fig_as_html_div(fig, filename)
1808
-
1809
- if return_as_dict:
1810
- return fig.to_dict()
1811
- else:
1812
- return plot(
1813
- fig,
1814
- output_type="div",
1815
- include_plotlyjs=False,
1816
- config=global_config(filename, height_multiplier=chart_height / 500.0),
1817
- )
1818
-
1819
- except ValueError as e:
1820
- return failed_chart_message_div(
1821
- "Could not resolve chart. Try filtering the data to reduce the number of systems.",
1822
- e,
1823
- )
1824
-
1825
-
1826
- def plotly_frequency_barchart(
1827
- df,
1828
- params,
1829
- csv_name="OpenREM chart data.csv",
1830
- ):
1831
- """
1832
- Create a plotly bar chart of event frequency
1833
-
1834
- :param df: Pandas DataFrame containing the data
1835
- :param params: a dictionary of parameters
1836
- :param params["df_x_axis_col"]: (string) DataFrame column containing categories
1837
- :param params["x_axis_title"]: (string) x-axis title
1838
- :param params["groupby_cols"]: list of strings with DataFrame columns to group data by
1839
- :param params["grouping_choice"]: (string) "series" or "system"
1840
- :param params["sorting_choice"]: 2-element list. [0] sets sort direction, [1] used to determine which field to sort
1841
- :param params["legend_title"]: (string) legend title
1842
- :param params["facet_col"]: (string) DataFrame column used to create subplots
1843
- :param params["facet_col_wrap"]: (int) number of subplots per row
1844
- :param params["return_as_dict"]: (boolean) flag to trigger return as a dictionary rather than a HTML DIV
1845
- :param params["colourmap"]: (string) colourmap to use
1846
- :param params["filename"]: (string) default filename to use for plot bitmap export
1847
- :param csv_name: (string) default filename to use for plot csv export
1848
- :return: Plotly figure embedded in an HTML DIV; or Plotly figure as a dictionary (if "return_as_dict" is True);
1849
- or an error message embedded in an HTML DIV if there was a ValueError when calculating the figure
1850
- """
1851
- if df.empty:
1852
- return empty_dataframe_msg(params), None
1853
-
1854
- if params["groupby_cols"] is None:
1855
- params["groupby_cols"] = [params["df_name_col"]]
1856
-
1857
- df_aggregated = create_dataframe_aggregates(
1858
- df, params["groupby_cols"], params["df_name_col"], ["count"]
1859
- )
1860
-
1861
- df_legend_col = params["df_name_col"]
1862
- if params["grouping_choice"] == "series":
1863
- df_legend_col = "x_ray_system_name"
1864
- params["x_axis_title"] = params["legend_title"]
1865
- params["legend_title"] = "System"
1866
- params["df_x_axis_col"] = params["df_name_col"]
1867
-
1868
- chart_height = 500
1869
- n_facet_rows = 1
1870
-
1871
- if params["facet_col"]:
1872
- chart_height, n_facet_rows = calc_facet_rows_and_height(
1873
- df, params["facet_col"], params["facet_col_wrap"]
1874
- )
1875
-
1876
- sort_ascending = True
1877
- if params["sorting_choice"][0] == 0:
1878
- sort_ascending = False
1879
-
1880
- sorting_categories = None
1881
- if params["sorting_choice"][1].lower() == "name":
1882
- sorting_categories = {
1883
- params["df_x_axis_col"]: (
1884
- df_aggregated.sort_values(
1885
- by=params["df_x_axis_col"], ascending=sort_ascending
1886
- )[params["df_x_axis_col"]]
1887
- )
1888
- .unique()
1889
- .tolist()
1890
- }
1891
- sorting_categories[df_legend_col] = (
1892
- (
1893
- df_aggregated.sort_values(by=df_legend_col, ascending=sort_ascending)[
1894
- df_legend_col
1895
- ]
1896
- )
1897
- .unique()
1898
- .tolist()
1899
- )
1900
- if params["facet_col"]:
1901
- sorting_categories[params["facet_col"]] = (
1902
- (
1903
- df_aggregated.sort_values(
1904
- by=params["facet_col"], ascending=sort_ascending
1905
- )[params["facet_col"]]
1906
- )
1907
- .unique()
1908
- .tolist()
1909
- )
1910
- else:
1911
- sorting_categories = {
1912
- params["df_x_axis_col"]: df_aggregated.groupby(params["df_x_axis_col"])
1913
- .agg({"count": "sum"})
1914
- .sort_values(by="count", ascending=sort_ascending)
1915
- .reset_index()[params["df_x_axis_col"]]
1916
- .tolist()
1917
- }
1918
- sorting_categories[df_legend_col] = (
1919
- df_aggregated.groupby(df_legend_col)
1920
- .agg({"count": "sum"})
1921
- .sort_values(by="count", ascending=sort_ascending)
1922
- .reset_index()[df_legend_col]
1923
- .tolist()
1924
- )
1925
- if params["facet_col"]:
1926
- sorting_categories[params["facet_col"]] = (
1927
- df_aggregated.groupby(params["facet_col"])
1928
- .agg({"count": "sum"})
1929
- .sort_values(by="count", ascending=sort_ascending)
1930
- .reset_index()[params["facet_col"]]
1931
- .tolist()
1932
- )
1933
-
1934
- n_colours = len(df_aggregated[df_legend_col].unique())
1935
- colour_sequence = calculate_colour_sequence(params["colourmap"], n_colours)
1936
-
1937
- custom_data_fields = [df_legend_col]
1938
- if params["facet_col"] is not None:
1939
- custom_data_fields.append(params["facet_col"])
1940
-
1941
- fig = px.bar(
1942
- df_aggregated,
1943
- x=params["df_x_axis_col"],
1944
- y="count",
1945
- color=df_legend_col,
1946
- facet_col=params["facet_col"],
1947
- facet_col_wrap=params["facet_col_wrap"],
1948
- facet_row_spacing=0.50 / n_facet_rows,
1949
- color_discrete_sequence=colour_sequence,
1950
- height=chart_height,
1951
- custom_data=custom_data_fields,
1952
- category_orders=sorting_categories,
1953
- )
1954
-
1955
- fig.update_xaxes(
1956
- tickson="boundaries",
1957
- ticks="outside",
1958
- ticklen=5,
1959
- showticklabels=True,
1960
- title=params["x_axis_title"],
1961
- )
1962
- fig.update_yaxes(showticklabels=True, matches=None)
1963
-
1964
- fig.update_layout(
1965
- legend_title_text=params["legend_title"],
1966
- )
1967
-
1968
- fig.update_traces(
1969
- hovertemplate="<b>"
1970
- + params["legend_title"]
1971
- + ": %{customdata[0]}</b>"
1972
- + "<br>"
1973
- + params["x_axis_title"]
1974
- + ": %{x}"
1975
- + "<br>Frequency: %{y:.0d}"
1976
- + "<extra></extra>",
1977
- )
1978
-
1979
- fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
1980
-
1981
- save_fig_as_html_div(fig, params["filename"])
1982
-
1983
- if params["return_as_dict"]:
1984
- return fig.to_dict(), None
1985
- else:
1986
- csv_data = download_link(
1987
- csv_data_frequency(fig, params),
1988
- csv_name,
1989
- )
1990
-
1991
- return (
1992
- plot(
1993
- fig,
1994
- output_type="div",
1995
- include_plotlyjs=False,
1996
- config=global_config(
1997
- params["filename"], height_multiplier=chart_height / 500.0
1998
- ),
1999
- ),
2000
- csv_data,
2001
- )
2002
-
2003
-
2004
- def construct_over_time_charts(
2005
- df,
2006
- params,
2007
- group_by_physician=None,
2008
- ):
2009
- """
2010
- Construct a Plotly line chart of average values over time, optionally grouped by performing physician name.
2011
- For "boxplot" a plotly boxplot of values over time is returned instead of an plotly line chart.
2012
-
2013
- :param df: the Pandas DataFrame containing the data
2014
- :param params: a dictionary of processing parameters
2015
-
2016
- :param params["df_name_col"]: (string) DataFrame column containing categories
2017
- :param params["name_title"]: (string) name title
2018
- :param params["df_value_col"]: (string) DataFrame column containing values
2019
- :param params["value_title"]: (string) y-axis title
2020
- :param params["df_date_col"]: (string) DataFrame column containing dates
2021
- :param params["date_title"]: (string) date title
2022
- :param params["facet_title"]: (string) subplot title
2023
- :param params["sorting_choice"]: 2-element list. [0] sets sort direction, [1] used to determine which field to sort
2024
- :param params["average_choices"]: list of strings containing requred averages ("mean", "median", "boxplot")
2025
- :param params["time_period"]: string containing the time period to average over; "A" (years), "Q" (quarters),
2026
- "M" (months), "W" (weeks), "D" (days)
2027
- :param params["grouping_choice"]: (string) "series" or "system"
2028
- :param params["colourmap"]: (string) colourmap to use
2029
- :param params["filename"]: (string) default filename to use for plot bitmap export
2030
- :param params["facet_col_wrap"]: (int) number of subplots per row
2031
- :param params["return_as_dict"]: (boolean) flag to trigger return as a dictionary rather than a HTML DIV
2032
- :param group_by_physician: boolean flag to set whether to group by physician name
2033
- :return: a dictionary containing a combination of ["mean"], ["median"] and ["boxplot"] entries,
2034
- each of which contains a Plotly figure embedded in an HTML DIV; or Plotly figure as a
2035
- dictionary (if params["return_as_dict"] is True); or an error message embedded in an HTML DIV
2036
- if there was a ValueError when calculating the figure
2037
- """
2038
- sorted_categories = create_sorted_category_list(
2039
- df, params["df_name_col"], params["df_value_col"], params["sorting_choice"]
2040
- )
2041
-
2042
- df = df.dropna(subset=[params["df_value_col"]])
2043
- if df.empty:
2044
- return_value = {}
2045
- if "mean" in params["average_choices"]:
2046
- return_value["mean"] = empty_dataframe_msg(params)
2047
- if "median" in params["average_choices"]:
2048
- return_value["median"] = empty_dataframe_msg(params)
2049
- if "boxplot" in params["average_choices"]:
2050
- return_value["boxplot"] = empty_dataframe_msg(params)
2051
- return return_value
2052
-
2053
- if "mean" in params["average_choices"] or "median" in params["average_choices"]:
2054
- df_time_series = create_dataframe_time_series(
2055
- df,
2056
- params["df_name_col"],
2057
- params["df_value_col"],
2058
- df_date_col=params["df_date_col"],
2059
- time_period=params["time_period"],
2060
- average_choices=list(
2061
- set(params["average_choices"]).intersection(["mean", "median", "count"])
2062
- ),
2063
- group_by_physician=group_by_physician,
2064
- )
2065
-
2066
- category_names_col = params["df_name_col"]
2067
- group_by_col = "x_ray_system_name"
2068
- if group_by_physician:
2069
- group_by_col = "performing_physician_name"
2070
-
2071
- if params["grouping_choice"] == "series":
2072
- category_names_col = "x_ray_system_name"
2073
- group_by_col = params["df_name_col"]
2074
- if group_by_physician:
2075
- category_names_col = "performing_physician_name"
2076
- params["name_title"] = "Physician"
2077
-
2078
- return_value = {}
2079
-
2080
- parameter_dict = {
2081
- "df_count_col": "count" + params["df_value_col"],
2082
- "df_name_col": category_names_col,
2083
- "df_date_col": params["df_date_col"],
2084
- "facet_col": group_by_col,
2085
- "facet_title": params["facet_title"],
2086
- "value_axis_title": params["value_title"],
2087
- "name_axis_title": params["date_title"],
2088
- "legend_title": params["name_title"],
2089
- "colourmap": params["colourmap"],
2090
- "filename": params["filename"],
2091
- "facet_col_wrap": params["facet_col_wrap"],
2092
- "sorted_category_list": sorted_categories,
2093
- "return_as_dict": params["return_as_dict"],
2094
- }
2095
- if "mean" in params["average_choices"]:
2096
- parameter_dict["df_value_col"] = "mean" + params["df_value_col"]
2097
- return_value["mean"] = plotly_timeseries_linechart(
2098
- df_time_series,
2099
- parameter_dict,
2100
- )
2101
-
2102
- if "median" in params["average_choices"]:
2103
- parameter_dict["df_value_col"] = "median" + params["df_value_col"]
2104
- return_value["median"] = plotly_timeseries_linechart(
2105
- df_time_series,
2106
- parameter_dict,
2107
- )
2108
-
2109
- if "boxplot" in params["average_choices"]:
2110
- df.set_index(params["df_date_col"], inplace=True)
2111
- df = df.to_period(freq=params["time_period"], copy=False)
2112
- df.reset_index(inplace=True)
2113
- df[params["df_date_col"]] = df[params["df_date_col"]].map(
2114
- lambda x: x.start_time
2115
- )
2116
- df.sort_values(params["df_date_col"], inplace=True)
2117
-
2118
- parameter_dict["df_name_col"] = params["df_date_col"]
2119
- parameter_dict["df_value_col"] = params["df_value_col"]
2120
- parameter_dict["sorting_choice"] = params["sorting_choice"]
2121
-
2122
- return_value["boxplot"] = plotly_boxplot(
2123
- df,
2124
- parameter_dict,
2125
- )
2126
-
2127
- return return_value
2128
-
2129
-
2130
- def download_link(
2131
- object_to_download, download_filename, download_link_text="Download csv"
2132
- ):
2133
- """
2134
- Adapted from:
2135
- https://discuss.streamlit.io/t/heres-a-download-function-that-works-for-dataframes-and-txt/4052
2136
-
2137
- Generates a link to download the given object_to_download.
2138
-
2139
- object_to_download (str, pd.DataFrame): The object to be downloaded.
2140
- download_filename (str): filename and extension of file. e.g. mydata.csv, some_txt_output.txt
2141
- download_link_text (str): Text to display for download link.
2142
-
2143
- Examples:
2144
-
2145
- ``download_link(YOUR_DF, 'YOUR_DF.csv', 'Click here to download data!')``
2146
-
2147
- ``download_link(YOUR_STRING, 'YOUR_STRING.txt', 'Click here to download your text!')``
2148
-
2149
- """
2150
- if isinstance(object_to_download, pd.DataFrame):
2151
- object_to_download.columns = object_to_download.columns.str.replace(
2152
- "<br>", " ", regex=True
2153
- )
2154
- object_to_download = object_to_download.replace("<br>", " ", regex=True).to_csv(
2155
- index=False
2156
- )
2157
-
2158
- # some strings <-> bytes conversions necessary here
2159
- b64 = base64.b64encode(object_to_download.encode()).decode()
2160
-
2161
- return f'<a class="btn btn-default btn-sm" role="button" href="data:file/txt;base64,{b64}" download="{download_filename}">{download_link_text}</a>' # pylint: disable=line-too-long
2162
-
2163
-
2164
- def generate_average_chart_group(
2165
- average_choices,
2166
- chart_message,
2167
- df,
2168
- modality_text,
2169
- name_field,
2170
- name_text,
2171
- return_as_dict,
2172
- return_structure,
2173
- units_text,
2174
- user_profile,
2175
- value_field,
2176
- value_text,
2177
- variable_name_start,
2178
- variable_value_name,
2179
- sorting_choice,
2180
- ):
2181
- # pylint: disable=too-many-locals
2182
- if user_profile.plotBoxplots and "median" not in average_choices:
2183
- average_choices = average_choices + ["median"]
2184
-
2185
- if user_profile.plotMean or user_profile.plotMedian:
2186
-
2187
- df_aggregated = create_dataframe_aggregates(
2188
- df,
2189
- [name_field],
2190
- value_field,
2191
- stats_to_use=average_choices + ["count"],
2192
- )
2193
-
2194
- parameter_dict = {
2195
- "df_name_col": name_field,
2196
- "name_axis_title": name_text,
2197
- "colourmap": user_profile.plotColourMapChoice,
2198
- "facet_col": None,
2199
- "facet_col_wrap": user_profile.plotFacetColWrapVal,
2200
- "return_as_dict": return_as_dict,
2201
- "sorting_choice": [
2202
- user_profile.plotInitialSortingDirection,
2203
- sorting_choice,
2204
- ],
2205
- "custom_msg_line": chart_message,
2206
- }
2207
-
2208
- if user_profile.plotMean:
2209
- parameter_dict["value_axis_title"] = "Mean " + value_text + " " + units_text
2210
- parameter_dict["filename"] = (
2211
- "OpenREM "
2212
- + modality_text
2213
- + " "
2214
- + name_text
2215
- + " "
2216
- + value_text
2217
- + " mean"
2218
- )
2219
- parameter_dict["average_choice"] = "mean"
2220
- (
2221
- return_structure[
2222
- variable_name_start + "Mean" + variable_value_name + "Data"
2223
- ],
2224
- return_structure[
2225
- variable_name_start + "Mean" + variable_value_name + "DataCSV"
2226
- ],
2227
- ) = plotly_barchart( # pylint: disable=line-too-long
2228
- df_aggregated,
2229
- parameter_dict,
2230
- csv_name=variable_name_start + "Mean" + value_text + "Data.csv",
2231
- )
2232
-
2233
- # Create a data frame to use to display the data to the user in an html table
2234
- table_df = df_aggregated[
2235
- ["x_ray_system_name", name_field, "mean", "count"]
2236
- ].round({"mean": 2})
2237
-
2238
- # Rename the data frame columns to have user-friendly names
2239
- table_df.columns = [
2240
- "X-ray system name",
2241
- name_text,
2242
- "Mean " + value_text + " " + units_text,
2243
- "Count",
2244
- ]
2245
-
2246
- # Pivot the table so that there is a column per system for the median and count
2247
- table_df = table_df.pivot(index=name_text, columns="X-ray system name")
2248
- table_df.columns = [
2249
- "<br>".join((col[1], str(col[0]))) for col in table_df.columns
2250
- ]
2251
- table_df = table_df.reset_index()
2252
-
2253
- # Add a html table version of the data frame to the return structure
2254
- tableName = variable_name_start + "Mean" + variable_value_name + "DataTable"
2255
- return_structure[tableName] = table_df.to_html(
2256
- classes="table table-bordered table-sm small sortable chart-data-table-contents",
2257
- table_id=tableName,
2258
- index=False,
2259
- na_rep="-",
2260
- escape=False,
2261
- )
2262
-
2263
- if user_profile.plotMedian:
2264
- parameter_dict["value_axis_title"] = (
2265
- "Median " + value_text + " " + units_text
2266
- )
2267
- parameter_dict["filename"] = (
2268
- "OpenREM "
2269
- + modality_text
2270
- + " "
2271
- + name_text
2272
- + " "
2273
- + value_text
2274
- + " median"
2275
- )
2276
- parameter_dict["average_choice"] = "median"
2277
- (
2278
- return_structure[
2279
- variable_name_start + "Median" + variable_value_name + "Data"
2280
- ],
2281
- return_structure[
2282
- variable_name_start + "Median" + variable_value_name + "DataCSV"
2283
- ],
2284
- ) = plotly_barchart( # pylint: disable=line-too-long
2285
- df_aggregated,
2286
- parameter_dict,
2287
- csv_name=variable_name_start + "Median" + value_text + "Data.csv",
2288
- )
2289
-
2290
- # Create a data frame to use to display the data to the user in an html table
2291
- table_df = df_aggregated[
2292
- ["x_ray_system_name", name_field, "median", "count"]
2293
- ].round({"median": 2})
2294
-
2295
- # Rename the data frame columns to have user-friendly names
2296
- table_df.columns = [
2297
- "X-ray system name",
2298
- name_text,
2299
- "Median " + value_text + " " + units_text,
2300
- "Count",
2301
- ]
2302
-
2303
- # Pivot the table so that there is a column per system for the median and count
2304
- table_df = table_df.pivot(index=name_text, columns="X-ray system name")
2305
- table_df.columns = [
2306
- "<br>".join((col[1], str(col[0]))) for col in table_df.columns
2307
- ]
2308
- table_df = table_df.reset_index()
2309
-
2310
- # Add a html table version of the data frame to the return structure
2311
- tableName = (
2312
- variable_name_start + "Median" + variable_value_name + "DataTable"
2313
- )
2314
- return_structure[tableName] = table_df.to_html(
2315
- classes="table table-bordered table-sm small sortable chart-data-table-contents",
2316
- table_id=tableName,
2317
- index=False,
2318
- na_rep="-",
2319
- escape=False,
2320
- )
2321
-
2322
- if user_profile.plotBoxplots:
2323
- parameter_dict = {
2324
- "df_name_col": name_field,
2325
- "df_value_col": value_field,
2326
- "value_axis_title": value_text + " " + units_text,
2327
- "name_axis_title": name_text,
2328
- "colourmap": user_profile.plotColourMapChoice,
2329
- "filename": "OpenREM "
2330
- + modality_text
2331
- + " "
2332
- + name_text
2333
- + " "
2334
- + variable_value_name
2335
- + " boxplot",
2336
- "facet_col": None,
2337
- "sorting_choice": [
2338
- user_profile.plotInitialSortingDirection,
2339
- sorting_choice,
2340
- ],
2341
- "facet_col_wrap": user_profile.plotFacetColWrapVal,
2342
- "return_as_dict": return_as_dict,
2343
- "custom_msg_line": chart_message,
2344
- }
2345
-
2346
- return_structure[
2347
- variable_name_start + "Boxplot" + variable_value_name + "Data"
2348
- ] = plotly_boxplot(
2349
- df,
2350
- parameter_dict,
2351
- )
2352
-
2353
- if user_profile.plotHistograms:
2354
- category_names_col = name_field
2355
- group_by_col = "x_ray_system_name"
2356
- legend_title = name_text
2357
-
2358
- if user_profile.plotGroupingChoice == "series":
2359
- category_names_col = "x_ray_system_name"
2360
- group_by_col = name_field
2361
- legend_title = "System"
2362
-
2363
- parameter_dict = {
2364
- "df_facet_col": group_by_col,
2365
- "df_category_col": category_names_col,
2366
- "df_value_col": value_field,
2367
- "value_axis_title": value_text + " " + units_text,
2368
- "legend_title": legend_title,
2369
- "n_bins": user_profile.plotHistogramBins,
2370
- "colourmap": user_profile.plotColourMapChoice,
2371
- "filename": "OpenREM "
2372
- + modality_text
2373
- + " "
2374
- + name_text
2375
- + " "
2376
- + variable_value_name
2377
- + " histogram",
2378
- "facet_col_wrap": user_profile.plotFacetColWrapVal,
2379
- "sorting_choice": [
2380
- user_profile.plotInitialSortingDirection,
2381
- sorting_choice,
2382
- ],
2383
- "global_max_min": user_profile.plotHistogramGlobalBins,
2384
- "return_as_dict": return_as_dict,
2385
- "custom_msg_line": chart_message,
2386
- }
2387
- return_structure[
2388
- variable_name_start + "Histogram" + variable_value_name + "Data"
2389
- ] = plotly_histogram_barchart(
2390
- df,
2391
- parameter_dict,
2392
- )
2393
- return return_structure
1
+ # pylint: disable=too-many-lines
2
+ # This Python file uses the following encoding: utf-8
3
+ # OpenREM - Radiation Exposure Monitoring tools for the physicist
4
+ # Copyright (C) 2017 The Royal Marsden NHS Foundation Trust
5
+ #
6
+ # This program is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License as published by
8
+ # the Free Software Foundation, either version 3 of the License, or
9
+ # (at your option) any later version.
10
+ #
11
+ # This program is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License for more details.
15
+ #
16
+ # Additional permission under section 7 of GPLv3:
17
+ # You shall not make any use of the name of The Royal Marsden NHS
18
+ # Foundation trust in connection with this Program in any press or
19
+ # other public announcement without the prior written consent of
20
+ # The Royal Marsden NHS Foundation Trust.
21
+ #
22
+ # You should have received a copy of the GNU General Public License
23
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
24
+
25
+ """
26
+ .. module:: chart_functions
27
+ :synopsis: Helper functions for calculating chart data
28
+
29
+ .. moduleauthor:: David Platten
30
+
31
+ """
32
+
33
+ import os
34
+ import math
35
+ import base64
36
+ from builtins import range # pylint: disable=redefined-builtin
37
+ from datetime import datetime
38
+ import textwrap
39
+
40
+ from django.conf import settings
41
+ from django.utils.translation import gettext as _
42
+ import numpy as np
43
+ import pandas as pd
44
+ import matplotlib.cm
45
+ import matplotlib.colors
46
+ import plotly.express as px
47
+ import plotly.io as pio
48
+ import plotly.graph_objects as go
49
+ from plotly.offline import plot
50
+ from plotly.subplots import make_subplots
51
+ from scipy import stats
52
+
53
+
54
+ def global_config(
55
+ filename,
56
+ height_multiplier=1.0,
57
+ height=1080,
58
+ width=1920,
59
+ ):
60
+ """
61
+ Creates a Plotly global configuration dictionary. The parameters all relate
62
+ to the chart bitmap that can be saved by the user.
63
+
64
+ :param filename: string containing the file name to use if the user saves the chart as a graphic file
65
+ :param height_multiplier: floating point value used to scale the chart height
66
+ :param height: int value for the height of the chart graphic file
67
+ :param width: int value for the width of the chart graphic file
68
+ :return: a dictionary of Plotly options
69
+ """
70
+ return {
71
+ "toImageButtonOptions": {
72
+ "format": "png",
73
+ "filename": filename,
74
+ "height": height * height_multiplier,
75
+ "width": width,
76
+ "scale": 1,
77
+ },
78
+ "displaylogo": False,
79
+ "scrollZoom": True,
80
+ }
81
+
82
+
83
+ def create_dataframe(
84
+ database_events,
85
+ field_dict,
86
+ data_point_name_lowercase=None,
87
+ data_point_name_remove_whitespace_padding=None,
88
+ data_point_value_multipliers=None,
89
+ char_wrap=500,
90
+ uid=None,
91
+ ):
92
+ """
93
+ Creates a Pandas DataFrame from the supplied database records.
94
+ names fields are made categorical to save system memory
95
+ Any missing (na) values in names fields are set to Blank
96
+
97
+ :param database_events: the database events
98
+ :param field_dict: a dictionary of lists, each containing database field names to include in the DataFrame. The
99
+ dictionary should include "names", "values", "dates", "times" and optionally "system" items
100
+ :param data_point_name_lowercase: boolean flag to determine whether to make all "names" field values lower case
101
+ :param data_point_name_remove_whitespace_padding: boolean flag to determine whether to strip whitespace
102
+ :param data_point_value_multipliers: list of float valuse to multiply each "values" field value by
103
+ :param char_wrap: the maximum length of series and system names before characters are wrapped with <br>
104
+ :param uid: string containing database field name which contains a unique identifier for each record
105
+ :return: a Pandas DataFrame with a column per required field
106
+ """
107
+ start = None
108
+ if settings.DEBUG:
109
+ start = datetime.now()
110
+
111
+ fields_to_include = set()
112
+ if uid:
113
+ fields_to_include.add(uid)
114
+
115
+ fields_to_include.update(field_dict["names"])
116
+ fields_to_include.update(field_dict["values"])
117
+ fields_to_include.update(field_dict["dates"])
118
+ fields_to_include.update(field_dict["times"])
119
+ fields_to_include.update(field_dict["system"])
120
+
121
+ start = None
122
+ if settings.DEBUG:
123
+ start = datetime.now()
124
+
125
+ # NOTE: I am not excluding zero-value events from the calculations (zero DLP or zero CTDI)
126
+
127
+ # The "order_by()" in the command below removes the custom ordering on the query set that is used to order things
128
+ # correctly on the filtered page tables. This ordering isn't required for the DataFrame; removing it speeds up
129
+ # the DataFrame.from_records command.
130
+ df = pd.DataFrame.from_records(
131
+ data=database_events.order_by().values_list(
132
+ *fields_to_include
133
+ ), # values_list uses less memory than values
134
+ columns=fields_to_include, # need to specify the column names as we're now using values_list
135
+ coerce_float=True, # force Decimal to float - saves doing a type conversion later
136
+ )
137
+
138
+ if settings.DEBUG:
139
+ print(f"Initial Dataframe created from records in {datetime.now() - start}")
140
+ start = datetime.now()
141
+ print("Initial DataFrame info, including memory use, is:")
142
+ df.info()
143
+
144
+ if uid:
145
+ df[uid] = df[uid].astype("UInt32")
146
+
147
+ # Replace any NaN values in the names columns with "Blank"
148
+ df[field_dict["names"]] = df[field_dict["names"]].apply(lambda x: x.fillna("Blank"))
149
+
150
+ # Make names column values lowercase if required
151
+ if data_point_name_lowercase:
152
+ df[field_dict["names"]] = df[field_dict["names"]].apply(lambda x: x.str.lower())
153
+
154
+ # Strip whitespace from the beginning and end of any names column values
155
+ # Also replace multiple spaces with a single space
156
+ if data_point_name_remove_whitespace_padding:
157
+ df[field_dict["names"]] = df[field_dict["names"]].apply(
158
+ lambda x: x.str.strip().replace("\s+", " ", regex=True)
159
+ )
160
+
161
+ # Make the names columns all "category" type - this saves memory. Must be done after the above, as the string
162
+ # replacement lines revert the columns back to "object"
163
+ df[field_dict["names"]] = df[field_dict["names"]].astype("category")
164
+
165
+ # Rename the "system" column to "x_ray_system_name" if it is present
166
+ if field_dict["system"]:
167
+ df.rename(columns={field_dict["system"][0]: "x_ray_system_name"}, inplace=True)
168
+ df["x_ray_system_name"] = df["x_ray_system_name"].astype("category")
169
+ df.sort_values(by="x_ray_system_name", inplace=True)
170
+ # Else create the "x_ray_system_name" column populated with a single "All systems" category
171
+ else:
172
+ df["x_ray_system_name"] = pd.Categorical(np.full(len(df.index), "All systems"))
173
+
174
+ # Loop through each value field, multiplying the values by the corresponding multiplier
175
+ for idx, value_field in enumerate(field_dict["values"]):
176
+ if data_point_value_multipliers:
177
+ df[value_field] *= data_point_value_multipliers[idx]
178
+ df[value_field] = df[value_field].astype("float32")
179
+
180
+ # Convert each date field to a pd datetime using a specific date format
181
+ for date_field in field_dict["dates"]:
182
+ df[date_field] = pd.to_datetime(df[date_field], format="%Y-%m-%d")
183
+
184
+ # Character wrap the system and name fields
185
+ # First make the column "string" dtype rather than "category" dtype because otherwise
186
+ # the df.update line fails if the word-wrapping has changed any of the entries.
187
+ if isinstance(df["x_ray_system_name"].dtype, pd.CategoricalDtype):
188
+ df["x_ray_system_name"] = df["x_ray_system_name"].astype("string")
189
+ df.update(
190
+ df["x_ray_system_name"].apply(
191
+ lambda x: (textwrap.fill(x, char_wrap)).replace("\n", "<br>")
192
+ )
193
+ )
194
+ df["x_ray_system_name"] = df["x_ray_system_name"].astype("category")
195
+
196
+ for field in field_dict["names"]:
197
+ if isinstance(df[field].dtype, pd.CategoricalDtype):
198
+ df[field] = df[field].astype("string")
199
+ df.update(
200
+ df[field].apply(
201
+ lambda x: (textwrap.fill(x, char_wrap)).replace("\n", "<br>")
202
+ )
203
+ )
204
+ df[field] = df[field].astype("category")
205
+
206
+ if settings.DEBUG:
207
+ print(
208
+ f"Dataframe fillna, lower case, whitespace stripping etc took {datetime.now() - start}"
209
+ )
210
+ print("DataFrame info after processing, including memory use, is:")
211
+ df.info()
212
+
213
+ return df
214
+
215
+
216
+ def create_dataframe_time_series(
217
+ df,
218
+ df_name_col,
219
+ df_value_col,
220
+ df_date_col="study_date",
221
+ time_period="M",
222
+ average_choices=None,
223
+ group_by_physician=None,
224
+ ):
225
+ """
226
+ Creates a Pandas DataFrame time series of average values grouped by x_ray_system_name and df_name_col
227
+
228
+ :param df: the Pandas DataFrame containing the raw data
229
+ :param df_name_col: string containing the DataFrame columnn name used to group the data
230
+ :param df_value_col: string containing the DataFrame column containing the values to be averaged
231
+ :param df_date_col: string containing the DataFrame column containing the dates
232
+ :param time_period: string containing the time period to average over; "A" (years), "Q" (quarters), "M" (months),
233
+ "W" (weeks), "D" (days)
234
+ :param average_choices: list of strings containing one or both of "mean" and "median"
235
+ :param group_by_physician: boolean flag to set whether to group by physician
236
+ :return: Pandas DataFrame containing the time series of average values grouped by system and name
237
+ """
238
+ if average_choices is None:
239
+ average_choices = ["mean"]
240
+
241
+ group_by_column = "x_ray_system_name"
242
+ if group_by_physician:
243
+ group_by_column = "performing_physician_name"
244
+
245
+ df_time_series = (
246
+ df.set_index(df_date_col)
247
+ .groupby(
248
+ [group_by_column, df_name_col, pd.Grouper(freq=time_period)], observed=False
249
+ )
250
+ .agg({df_value_col: average_choices})
251
+ )
252
+ df_time_series.columns = [s + df_value_col for s in average_choices]
253
+ df_time_series = df_time_series.reset_index()
254
+ return df_time_series
255
+
256
+
257
+ def create_dataframe_weekdays(df, df_name_col, df_date_col="study_date"):
258
+ """
259
+ Creates a Pandas DataFrame of the number of events in each day of the
260
+ week, and in hour of that day.
261
+
262
+ :param df: Pandas DataFrame containing the raw data; it must have a "study_time" and "x_ray_system_name" column
263
+ :param df_name_col: string containing the df column name to group the results by
264
+ :param df_date_col: string containing the df column name containing dates
265
+ :return: Pandas DataFrame containing the number of studies per day and hour grouped by name
266
+ """
267
+ start = None
268
+ if settings.DEBUG:
269
+ start = datetime.now()
270
+
271
+ df["weekday"] = pd.Categorical(pd.DatetimeIndex(df[df_date_col]).day_name())
272
+ df["hour"] = df["study_time"].apply(lambda row: row.hour).astype("int8")
273
+
274
+ df_time_series = (
275
+ df.groupby(["x_ray_system_name", "weekday", "hour"], observed=False)
276
+ .agg({df_name_col: "count"})
277
+ .reset_index()
278
+ )
279
+
280
+ if settings.DEBUG:
281
+ print(f"Weekday and hour dataframe created in {datetime.now() - start}")
282
+
283
+ return df_time_series
284
+
285
+
286
+ def create_dataframe_aggregates(df, df_name_cols, df_agg_col, stats_to_use=None):
287
+ """
288
+ Creates a Pandas DataFrame with the specified statistics (mean, median, count, for example) grouped by
289
+ x-ray system name and by the list of provided df_name_cols.
290
+
291
+ :param df: Pandas DataFrame containing the raw data; it must have an "x_ray_system_name" column
292
+ :param df_name_cols: list of strings representing the DataFrame column names to group by
293
+ :param df_agg_col: string containing the DataFrame column over which to calculate the statistics
294
+ :param stats_to_use: list of strings containing the statistics to calculate, such as "mean", "median", "count"
295
+ :return: Pandas DataFrame containing the grouped aggregate data
296
+ """
297
+ start = None
298
+ if settings.DEBUG:
299
+ start = datetime.now()
300
+
301
+ # Make it possible to have multiple value cols (DLP, CTDI, for example)
302
+ if stats_to_use is None:
303
+ stats_to_use = ["count"]
304
+
305
+ groupby_cols = ["x_ray_system_name"] + df_name_cols
306
+ grouped_df = df.groupby(groupby_cols, observed=False).agg(
307
+ {df_agg_col: stats_to_use}
308
+ )
309
+ grouped_df.columns = grouped_df.columns.droplevel(level=0)
310
+ grouped_df = grouped_df.reset_index()
311
+
312
+ if settings.DEBUG:
313
+ print(f"Aggregated dataframe created in {datetime.now() - start}")
314
+
315
+ return grouped_df
316
+
317
+
318
+ def plotly_set_default_theme(theme_name):
319
+ """
320
+ A short method to set the plotly chart theme
321
+
322
+ :param theme_name: the name of the theme
323
+ :return:
324
+ """
325
+ pio.templates.default = theme_name
326
+
327
+
328
+ def calculate_colour_sequence(scale_name="RdYlBu", n_colours=10):
329
+ """
330
+ Calculates a sequence of n_colours from the matplotlib colourmap scale_name
331
+
332
+ :param scale_name: string containing the name of the matplotlib colour scale to use
333
+ :param n_colours: int representing the number of colours required
334
+ :return: list of hexadecimal colours from a matplotlib colormap
335
+ """
336
+ colour_seq = []
337
+ cmap = matplotlib.cm.get_cmap(scale_name)
338
+ if n_colours > 1:
339
+ for i in range(n_colours):
340
+ c = cmap(i / (n_colours - 1))
341
+ colour_seq.append(matplotlib.colors.rgb2hex(c))
342
+ else:
343
+ c = cmap(0)
344
+ colour_seq.append(matplotlib.colors.rgb2hex(c))
345
+
346
+ return colour_seq
347
+
348
+
349
+ def empty_dataframe_msg(params=None):
350
+ """
351
+ Returns a string containing an HTML DIV with a message warning that the DataFrame is empty
352
+
353
+ :param params: parameters which may contain a custom_msg_line
354
+ :return: string containing an html div with the empty DataFrame message
355
+ """
356
+ msg_line = ""
357
+ if params:
358
+ if "custom_msg_line" in params:
359
+ msg_line = params["custom_msg_line"]
360
+
361
+ msg = "<div class='alert alert-warning' role='alert'>"
362
+ msg += _("No data left after excluding missing values.")
363
+ msg += msg_line
364
+ msg += "</div>"
365
+
366
+ return msg
367
+
368
+
369
+ def failed_chart_message_div(custom_msg_line, e):
370
+ """
371
+ Returns a string containing an HTML DIV with a failed chart message
372
+
373
+ :param custom_msg_line: string containing a custom line to add to the message
374
+ :param e: Python error object
375
+ :return: string containing the message in an HTML DIV
376
+ """
377
+ msg = "<div class='alert alert-warning' role='alert'>"
378
+ if settings.DEBUG:
379
+ msg += custom_msg_line
380
+ msg += "<p>Error is:</p>"
381
+ msg += "<pre>" + e.args[0].replace("\n", "<br>") + "</pre>"
382
+ else:
383
+ msg += custom_msg_line
384
+ msg += "</div>"
385
+ return msg
386
+
387
+
388
+ def csv_data_barchart(fig, params):
389
+ """
390
+ Calculates a Pandas DataFrame containing chart data to be used for csv download
391
+
392
+ :param fig: Plotly figure containing the data to extract
393
+ :param params: a dictionary of parameters
394
+ :param params["df_name_col"]: (string) DataFrame column containing categories
395
+ :param params["name_axis_title"]: (string) title for the name data
396
+ :param params["value_axis_title"]: (string) title for the value data
397
+ :param params["facet_col"]: (string) DataFrame column used to split data into subgroups
398
+ :return: DataFrame containing the data for download
399
+ """
400
+ fig_data_dict = fig.to_dict()["data"]
401
+
402
+ if params["df_name_col"] != "performing_physician_name":
403
+ df = pd.DataFrame(
404
+ data=fig_data_dict[0]["x"], columns=[params["name_axis_title"]]
405
+ )
406
+ df = df.replace("<br>", " ", regex=True)
407
+
408
+ for data_set in fig_data_dict:
409
+ new_col_df = pd.DataFrame(
410
+ data=list(zip(data_set["y"], [x[1] for x in data_set["customdata"]])),
411
+ columns=[
412
+ data_set["name"]
413
+ + " "
414
+ + params["value_axis_title"]
415
+ .replace("<sup>2</sup>", "^2")
416
+ .replace("<sub>vol</sub>", "vol"),
417
+ "Frequency",
418
+ ], # pylint: disable=line-too-long
419
+ )
420
+ df = pd.concat([df, new_col_df], axis=1)
421
+
422
+ return df
423
+
424
+ else:
425
+ df = pd.DataFrame(
426
+ data=fig_data_dict[0]["x"], columns=[params["name_axis_title"]]
427
+ )
428
+ df = df.replace("<br>", " ", regex=True)
429
+
430
+ for data_set in fig_data_dict:
431
+ series_name = (
432
+ data_set["hovertemplate"]
433
+ .split(params["facet_col"] + "=")[1]
434
+ .split("<br>Performing")[0]
435
+ ).replace("<br>", " ")
436
+ new_col_df = pd.DataFrame(
437
+ data=list(zip(data_set["y"], [x[1] for x in data_set["customdata"]])),
438
+ columns=[
439
+ data_set["name"]
440
+ + " "
441
+ + series_name
442
+ + " "
443
+ + params["value_axis_title"]
444
+ .replace("<sup>2</sup>", "^2")
445
+ .replace("<sub>vol</sub>", "vol"),
446
+ "Frequency",
447
+ ], # pylint: disable=line-too-long
448
+ )
449
+ df = pd.concat([df, new_col_df], axis=1)
450
+ return df
451
+
452
+
453
+ def csv_data_frequency(fig, params):
454
+ """
455
+ Calculates a Pandas DataFrame containing chart data to be used for csv download
456
+
457
+ :param fig: Plotly figure containing the data to extract
458
+ :param params: a dictionary of parameters; must include "x_axis_title"
459
+ :return: DataFrame containing the data for download
460
+ """
461
+ fig_data_dict = fig.to_dict()["data"]
462
+
463
+ if params["df_name_col"] != "performing_physician_name":
464
+ df = pd.DataFrame(data=fig_data_dict[0]["x"], columns=[params["x_axis_title"]])
465
+ for data_set in fig_data_dict:
466
+ df = pd.concat(
467
+ [df, pd.DataFrame(data=data_set["y"], columns=[data_set["name"]])],
468
+ axis=1,
469
+ )
470
+
471
+ return df
472
+
473
+ else:
474
+ df = pd.DataFrame(data=fig_data_dict[0]["x"], columns=[params["x_axis_title"]])
475
+ for data_set in fig_data_dict:
476
+ series_name = data_set["customdata"][0][1]
477
+ new_col_df = pd.DataFrame(
478
+ data=data_set["y"], # pylint: disable=line-too-long
479
+ columns=[
480
+ data_set["name"] + " " + series_name + " frequency",
481
+ ], # pylint: disable=line-too-long
482
+ )
483
+ df = pd.concat([df, new_col_df], axis=1)
484
+ return df
485
+
486
+
487
+ def calc_facet_rows_and_height(df, facet_col_name, facet_col_wrap):
488
+ """
489
+ Calculates the required total chart height and the number of facet rows. Each row has a hard-coded height
490
+ of 500 pixels.
491
+
492
+ :param df: Pandas DataFrame containing the data
493
+ :param facet_col_name: string containing the DataFrame column name containing the facet names
494
+ :param facet_col_wrap: int representing the number of subplots to have on each row
495
+ :return: two-element list containing the chart height in pixels (int) and the number of facet rows (int)
496
+ """
497
+ n_facet_rows = math.ceil(len(df[facet_col_name].unique()) / facet_col_wrap)
498
+ chart_height = n_facet_rows * 750
499
+ if chart_height < 750:
500
+ chart_height = 750
501
+ return chart_height, n_facet_rows
502
+
503
+
504
+ def save_fig_as_html_div(fig, filename, active=settings.SAVE_CHARTS_AS_HTML):
505
+ """
506
+ Saves the Plotly figure as an HTML file containing a single DIV. The file is saved on the OpenREM server in
507
+ MEDIA_ROOT\charts\yyyy\mm\dd\. Viewing the saved file requires an active internet connection as the Plotly
508
+ JavaScript library is not included in the file.
509
+
510
+ This method is not currently accessible to an OpenREM user or administrator - it is present to assist developers
511
+ when producing example charts for the OpenREM documentation. It must be manually activated by setting active=True
512
+ in the method definition.
513
+
514
+ Args:
515
+ fig: a Plotly figure
516
+ filename: (string )the filename to use
517
+ active: (boolean) to set whether to save the figure
518
+ """
519
+ if active:
520
+ datestamp = datetime.now()
521
+ path = os.path.join(
522
+ settings.MEDIA_ROOT, "charts", datestamp.strftime("%Y/%m/%d")
523
+ )
524
+ os.makedirs(
525
+ os.path.join(settings.MEDIA_ROOT, "charts", datestamp.strftime("%Y/%m/%d")),
526
+ exist_ok=True,
527
+ )
528
+ fig.write_html(
529
+ os.path.join(path, filename + ".html"),
530
+ include_plotlyjs="cdn",
531
+ full_html=False,
532
+ )
533
+
534
+
535
+ def plotly_boxplot(
536
+ df,
537
+ params,
538
+ ):
539
+ """
540
+ Produce a plotly boxplot
541
+
542
+ :param df: Pandas DataFrame containing the data
543
+ :param params: a dictionary of parameters
544
+ :param params["df_value_col"]: (string) DataFrame column containing values
545
+ :param params["value_axis_title"]: (string) x-axis title
546
+ :param params["df_name_col"]: (string) DataFrame column containing categories
547
+ :param params["name_axis_title"]: (string) y-axis title
548
+ :param params["df_facet_col"]: (string) DataFrame column used to create subplots
549
+ :param params["df_facet_col_wrap"]: (int) number of subplots per row
550
+ :param params["sorting_choice"]: 2-element list. [0] sets sort direction, [1] used to determine which field to sort
551
+ :param params["colourmap"]: (string) colourmap to use
552
+ :param params["return_as_dict"]: (boolean) flag to trigger return as a dictionary rather than a HTML DIV
553
+ :return: Plotly figure embedded in an HTML DIV; or Plotly figure as a dictionary (if params["return_as_dict"] is
554
+ True); or an error message embedded in an HTML DIV if there was a ValueError when calculating the figure
555
+ """
556
+ chart_height = 500
557
+ n_facet_rows = 1
558
+
559
+ try:
560
+ # Drop any rows with nan values in the df_value column
561
+ df = df.dropna(subset=[params["df_value_col"]])
562
+ if df.empty:
563
+ return empty_dataframe_msg(params)
564
+
565
+ # Remove any unused categories that are as a result of the dropna above: these would cause the boxplot to fail.
566
+ # This command results in a SettingWithCopyWarning that does not affect the underlying data in this use case.
567
+ # I am temporarily disabling the warning for this one command, then setting it back to the default of "warn".
568
+ pd.options.mode.chained_assignment = None
569
+ df["x_ray_system_name"] = df["x_ray_system_name"].cat.remove_unused_categories()
570
+ pd.options.mode.chained_assignment = "warn"
571
+
572
+ if params["facet_col"]:
573
+ chart_height, n_facet_rows = calc_facet_rows_and_height(
574
+ df, params["facet_col"], params["facet_col_wrap"]
575
+ )
576
+
577
+ sort_ascending = True
578
+ if params["sorting_choice"][0] == 0:
579
+ sort_ascending = False
580
+
581
+ sorting_categories = None
582
+ if params["sorting_choice"][1].lower() == "name":
583
+ sorting_categories = {
584
+ params["df_name_col"]: (
585
+ df.sort_values(by=params["df_name_col"], ascending=sort_ascending)[
586
+ params["df_name_col"]
587
+ ]
588
+ )
589
+ .unique()
590
+ .tolist()
591
+ }
592
+ sorting_categories["x_ray_system_name"] = (
593
+ (
594
+ df.sort_values(by="x_ray_system_name", ascending=sort_ascending)[
595
+ "x_ray_system_name"
596
+ ]
597
+ )
598
+ .unique()
599
+ .tolist()
600
+ )
601
+ if params["facet_col"]:
602
+ sorting_categories[params["facet_col"]] = (
603
+ (
604
+ df.sort_values(
605
+ by=params["facet_col"], ascending=sort_ascending
606
+ )[params["facet_col"]]
607
+ )
608
+ .unique()
609
+ .tolist()
610
+ )
611
+ elif params["sorting_choice"][1].lower() == "frequency":
612
+ sorting_categories = {
613
+ params["df_name_col"]: df.groupby(params["df_name_col"], observed=False)
614
+ .agg(freq=(params["df_name_col"], "count"))
615
+ .sort_values(by="freq", ascending=sort_ascending)
616
+ .reset_index()[params["df_name_col"]]
617
+ .tolist()
618
+ }
619
+ sorting_categories["x_ray_system_name"] = (
620
+ df.groupby("x_ray_system_name", observed=False)
621
+ .agg(freq=("x_ray_system_name", "count"))
622
+ .sort_values(by="freq", ascending=sort_ascending)
623
+ .reset_index()["x_ray_system_name"]
624
+ .tolist()
625
+ )
626
+ if params["facet_col"]:
627
+ sorting_categories[params["facet_col"]] = (
628
+ df.groupby(params["facet_col"], observed=False)
629
+ .agg(freq=(params["facet_col"], "count"))
630
+ .sort_values(by="freq", ascending=sort_ascending)
631
+ .reset_index()[params["facet_col"]]
632
+ .tolist()
633
+ )
634
+ else:
635
+ sorting_categories = {
636
+ params["df_name_col"]: df.groupby(params["df_name_col"], observed=False)
637
+ .agg(mean=(params["df_value_col"], "mean"))
638
+ .sort_values(by="mean", ascending=sort_ascending)
639
+ .reset_index()[params["df_name_col"]]
640
+ .tolist()
641
+ }
642
+ sorting_categories["x_ray_system_name"] = (
643
+ df.groupby("x_ray_system_name", observed=False)
644
+ .agg(mean=(params["df_value_col"], "mean"))
645
+ .sort_values(by="mean", ascending=sort_ascending)
646
+ .reset_index()["x_ray_system_name"]
647
+ .tolist()
648
+ )
649
+ if params["facet_col"]:
650
+ sorting_categories[params["facet_col"]] = (
651
+ df.groupby(params["facet_col"], observed=False)
652
+ .agg(mean=(params["df_value_col"], "mean"))
653
+ .sort_values(by="mean", ascending=sort_ascending)
654
+ .reset_index()[params["facet_col"]]
655
+ .tolist()
656
+ )
657
+
658
+ n_colours = len(df.x_ray_system_name.unique())
659
+ colour_sequence = calculate_colour_sequence(params["colourmap"], n_colours)
660
+
661
+ fig = px.box(
662
+ df,
663
+ x=params["df_name_col"],
664
+ y=params["df_value_col"],
665
+ facet_col=params["facet_col"],
666
+ facet_col_wrap=params["facet_col_wrap"],
667
+ facet_row_spacing=0.50 / n_facet_rows,
668
+ color=df["x_ray_system_name"],
669
+ labels={
670
+ params["df_value_col"]: params["value_axis_title"],
671
+ params["df_name_col"]: params["name_axis_title"],
672
+ "x_ray_system_name": "System",
673
+ },
674
+ color_discrete_sequence=colour_sequence,
675
+ category_orders=sorting_categories,
676
+ height=chart_height,
677
+ )
678
+
679
+ fig.update_traces(quartilemethod="exclusive")
680
+
681
+ fig.update_xaxes(
682
+ tickson="boundaries",
683
+ ticks="outside",
684
+ ticklen=5,
685
+ showticklabels=True,
686
+ title=params["name_axis_title"],
687
+ )
688
+ fig.update_yaxes(showticklabels=True, matches=None)
689
+
690
+ fig.update_layout(legend_title_text="System")
691
+
692
+ fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
693
+
694
+ save_fig_as_html_div(fig, params["filename"])
695
+
696
+ if params["return_as_dict"]:
697
+ return fig.to_dict()
698
+ else:
699
+ return plot(
700
+ fig,
701
+ output_type="div",
702
+ include_plotlyjs=False,
703
+ config=global_config(
704
+ params["filename"], height_multiplier=chart_height / 500.0
705
+ ),
706
+ )
707
+
708
+ except ValueError as e:
709
+ return failed_chart_message_div(
710
+ "Could not resolve chart. Try filtering the data to reduce the number of systems.",
711
+ e,
712
+ )
713
+
714
+
715
+ def create_freq_sorted_category_list(df, df_name_col, sorting):
716
+ """
717
+ Create a sorted list of categories for frequency charts. Makes use of Pandas DataFrame sort_values
718
+ (https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sort_values.html).
719
+
720
+ sorting[0] sets sort direction
721
+
722
+ sorting[1] used to determine field to sort on: "name" sorts by df_name_col; otherwise sorted by "x_ray_system_name"
723
+
724
+ :param df: Pandas DataFrame containing the data
725
+ :param df_name_col: DataFrame column containing the category names
726
+ :param sorting: 2-element list. [0] sets sort direction, [1] used to determine which field to sort on
727
+ :return: dictionary with key df_name_col and a list of sorted categories as the value
728
+ """
729
+ category_sorting_df = df.groupby(df_name_col, observed=False).count().reset_index()
730
+ if sorting[1] == "name":
731
+ sort_by = df_name_col
732
+ else:
733
+ sort_by = "x_ray_system_name"
734
+
735
+ sorted_categories = {
736
+ df_name_col: list(
737
+ category_sorting_df.sort_values(by=sort_by, ascending=sorting[0])[
738
+ df_name_col
739
+ ]
740
+ )
741
+ }
742
+
743
+ return sorted_categories
744
+
745
+
746
+ def create_sorted_category_list(df, df_name_col, df_value_col, sorting):
747
+ """
748
+ Create a sorted list of categories for scatter and over-time charts. The data is grouped by df_name_col and the
749
+ mean and count calculated for each. The grouped DataFrame is then sorted according to the provided sorting.
750
+ Makes use of Pandas DataFrame sort_values
751
+ (https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sort_values.html).
752
+
753
+ sorting[0] sets sort direction
754
+
755
+ sorting[1] used to determine sort order: "name" sorts by df_name_col; otherwise sorted by "x_ray_system_name"
756
+
757
+ :param df: Pandas DataFrame containing the data
758
+ :param df_name_col: DataFrame column containing the category names. Used to group the data
759
+ :param df_value_col: DataFrame column containing values to count and calculate the mean
760
+ :param sorting: 2-element list. [0] sets sort direction, [1] used to determine which field to sort on
761
+ :return: dictionary with key df_name_col and a list of sorted categories as the value
762
+ """
763
+ # Calculate the required aggregates for creating a list of categories for sorting
764
+ grouped_df = df.groupby(df_name_col, observed=False)
765
+
766
+ if sorting[1].lower() == "name":
767
+ sort_by = df_name_col
768
+ grouped_df = df
769
+ elif sorting[1].lower() == "frequency":
770
+ sort_by = "count"
771
+ elif sorting[1].lower() == "mean":
772
+ sort_by = "mean"
773
+ else:
774
+ sort_by = "median"
775
+
776
+ if sort_by in ["count", "mean", "median"]:
777
+ grouped_df = grouped_df.agg({df_value_col: [sort_by]})
778
+ grouped_df.columns = grouped_df.columns.droplevel(level=0)
779
+
780
+ grouped_df = grouped_df.reset_index()
781
+
782
+ ascending_order = True
783
+ if sorting[0] == 0:
784
+ ascending_order = False
785
+
786
+ categories_sorted = {
787
+ df_name_col: list(
788
+ grouped_df.sort_values(by=sort_by, ascending=ascending_order)[df_name_col]
789
+ )
790
+ }
791
+
792
+ return categories_sorted
793
+
794
+
795
+ def plotly_barchart(
796
+ df,
797
+ params,
798
+ csv_name="OpenREM chart data.csv",
799
+ ):
800
+ """
801
+ Create a plotly bar chart
802
+
803
+ :param df: Pandas DataFrame containing the data
804
+ :param params: a dictionary of parameters
805
+ :param params["average_choice"]: (string) DataFrame column containing values ("mean" or "median")
806
+ :param params["value_axis_title"]: (string) y-axis title
807
+ :param params["df_name_col"]: (string) DataFrame column containing categories
808
+ :param params["name_axis_title"]: (string) x-axis title
809
+ :param params["facet_col"]: (string) DataFrame column used to create subplots
810
+ :param params["facet_col_wrap"]: (int) number of subplots per row
811
+ :param params["sorting_choice"]: 2-element list. [0] sets sort direction, [1] used to determine which field to sort
812
+ :param params["colourmap"]: (string) colourmap to use
813
+ :param params["return_as_dict"]: (boolean) flag to trigger return as a dictionary rather than a HTML DIV
814
+ :param params["filename"]: (string) default filename to use for plot bitmap export
815
+ :param csv_name: (string) default filename to use for plot csv export
816
+ :return: Plotly figure embedded in an HTML DIV; or Plotly figure as a dictionary (if params["return_as_dict"] is
817
+ True); or an error message embedded in an HTML DIV if there was a ValueError when calculating the figure
818
+ """
819
+ if df.empty:
820
+ return empty_dataframe_msg(params), None
821
+
822
+ chart_height = 500
823
+ n_facet_rows = 1
824
+
825
+ if params["facet_col"]:
826
+ chart_height, n_facet_rows = calc_facet_rows_and_height(
827
+ df, params["facet_col"], params["facet_col_wrap"]
828
+ )
829
+
830
+ sort_ascending = True
831
+ if params["sorting_choice"][0] == 0:
832
+ sort_ascending = False
833
+
834
+ sorting_categories = None
835
+ if params["sorting_choice"][1].lower() == "name":
836
+ sorting_categories = {
837
+ params["df_name_col"]: (
838
+ df.sort_values(by=params["df_name_col"], ascending=sort_ascending)[
839
+ params["df_name_col"]
840
+ ]
841
+ )
842
+ .unique()
843
+ .tolist()
844
+ }
845
+ sorting_categories["x_ray_system_name"] = (
846
+ (
847
+ df.sort_values(by="x_ray_system_name", ascending=sort_ascending)[
848
+ "x_ray_system_name"
849
+ ]
850
+ )
851
+ .unique()
852
+ .tolist()
853
+ )
854
+ if params["facet_col"]:
855
+ sorting_categories[params["facet_col"]] = (
856
+ (
857
+ df.sort_values(by=params["facet_col"], ascending=sort_ascending)[
858
+ params["facet_col"]
859
+ ]
860
+ )
861
+ .unique()
862
+ .tolist()
863
+ )
864
+ elif params["sorting_choice"][1].lower() == "frequency":
865
+ sorting_categories = {
866
+ params["df_name_col"]: df.groupby(params["df_name_col"], observed=False)
867
+ .agg({"count": "sum"})
868
+ .sort_values(by="count", ascending=sort_ascending)
869
+ .reset_index()[params["df_name_col"]]
870
+ .tolist()
871
+ }
872
+ sorting_categories["x_ray_system_name"] = (
873
+ df.groupby("x_ray_system_name", observed=False)
874
+ .agg({"count": "sum"})
875
+ .sort_values(by="count", ascending=sort_ascending)
876
+ .reset_index()["x_ray_system_name"]
877
+ .tolist()
878
+ )
879
+ if params["facet_col"]:
880
+ sorting_categories[params["facet_col"]] = (
881
+ df.groupby(params["facet_col"], observed=False)
882
+ .agg({"count": "sum"})
883
+ .sort_values(by="count", ascending=sort_ascending)
884
+ .reset_index()[params["facet_col"]]
885
+ .tolist()
886
+ )
887
+ else:
888
+ sorting_categories = {
889
+ params["df_name_col"]: df.groupby(params["df_name_col"], observed=False)
890
+ .agg({params["average_choice"]: "mean"})
891
+ .sort_values(by=params["average_choice"], ascending=sort_ascending)
892
+ .reset_index()[params["df_name_col"]]
893
+ .tolist()
894
+ }
895
+ sorting_categories["x_ray_system_name"] = (
896
+ df.groupby("x_ray_system_name", observed=False)
897
+ .agg({params["average_choice"]: "mean"})
898
+ .sort_values(by=params["average_choice"], ascending=sort_ascending)
899
+ .reset_index()["x_ray_system_name"]
900
+ .tolist()
901
+ )
902
+ if params["facet_col"]:
903
+ sorting_categories[params["facet_col"]] = (
904
+ df.groupby(params["facet_col"], observed=False)
905
+ .agg({params["average_choice"]: "mean"})
906
+ .sort_values(by=params["average_choice"], ascending=sort_ascending)
907
+ .reset_index()[params["facet_col"]]
908
+ .tolist()
909
+ )
910
+
911
+ n_colours = len(df.x_ray_system_name.unique())
912
+ colour_sequence = calculate_colour_sequence(params["colourmap"], n_colours)
913
+
914
+ fig = px.bar(
915
+ df,
916
+ x=params["df_name_col"],
917
+ y=params["average_choice"],
918
+ color="x_ray_system_name",
919
+ barmode="group",
920
+ facet_col=params["facet_col"],
921
+ facet_col_wrap=params["facet_col_wrap"],
922
+ facet_row_spacing=0.50 / n_facet_rows,
923
+ labels={
924
+ params["average_choice"]: params["value_axis_title"],
925
+ params["df_name_col"]: params["name_axis_title"],
926
+ "x_ray_system_name": "System",
927
+ "count": "Frequency",
928
+ },
929
+ category_orders=sorting_categories,
930
+ color_discrete_sequence=colour_sequence,
931
+ hover_name="x_ray_system_name",
932
+ hover_data={
933
+ "x_ray_system_name": False,
934
+ params["average_choice"]: ":.2f",
935
+ "count": ":.0d",
936
+ },
937
+ height=chart_height,
938
+ )
939
+
940
+ fig.update_xaxes(
941
+ tickson="boundaries", ticks="outside", ticklen=5, showticklabels=True
942
+ )
943
+ fig.update_yaxes(showticklabels=True, matches=None)
944
+
945
+ fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
946
+
947
+ save_fig_as_html_div(fig, params["filename"])
948
+
949
+ if params["return_as_dict"]:
950
+ return fig.to_dict(), None
951
+ else:
952
+ csv_data = download_link(
953
+ csv_data_barchart(fig, params),
954
+ csv_name,
955
+ )
956
+
957
+ return (
958
+ plot(
959
+ fig,
960
+ output_type="div",
961
+ include_plotlyjs=False,
962
+ config=global_config(
963
+ params["filename"], height_multiplier=chart_height / 500.0
964
+ ),
965
+ ),
966
+ csv_data,
967
+ )
968
+
969
+
970
+ def plotly_histogram_barchart(
971
+ df,
972
+ params,
973
+ ):
974
+ """
975
+ Create a plotly histogram bar chart
976
+
977
+ :param df: Pandas DataFrame containing the data
978
+ :param params: a dictionary of parameters
979
+ :param params["df_value_col"]: (string) DataFrame column containing values
980
+ :param params["value_axis_title"]: (string) y-axis title
981
+ :param params["df_facet_col"]: (string) DataFrame column used to create subplots
982
+ :param params["df_category_name_list"]: string list of each category name
983
+ :param params["df_facet_col_wrap"]: (int) number of subplots per row
984
+ :param params["n_bins"]: (int) number of hisgogram bins to use
985
+ :param params["colourmap"]: (string) colourmap to use
986
+ :param params["sorting_choice"]: 2-element list. [0] sets sort direction, [1] used to determine which field to sort
987
+ :param params["global_max_min"]: (boolean) flag to calculate global max and min or per-subplot max and min
988
+ :param params["legend_title"]: (string) legend title
989
+ :param params["return_as_dict"]: (boolean) flag to trigger return as a dictionary rather than a HTML DIV
990
+ :param params["filename"]: (string) default filename to use for plot bitmap export
991
+ :return: Plotly figure embedded in an HTML DIV; or Plotly figure as a dictionary (if params["return_as_dict"] is
992
+ True); or an error message embedded in an HTML DIV if there was a ValueError when calculating the figure
993
+ """
994
+ # pylint: disable=too-many-locals
995
+ # pylint: disable=too-many-branches
996
+ # pylint: disable=too-many-statements
997
+ if df.empty:
998
+ return empty_dataframe_msg(params)
999
+
1000
+ sort_ascending = True
1001
+ if params["sorting_choice"][0] == 0:
1002
+ sort_ascending = False
1003
+
1004
+ df_facet_list = None
1005
+ df_category_list = None
1006
+ if params["sorting_choice"][1].lower() == "name":
1007
+ df_facet_list = (
1008
+ (
1009
+ df.sort_values(by=params["df_facet_col"], ascending=sort_ascending)[
1010
+ params["df_facet_col"]
1011
+ ]
1012
+ )
1013
+ .unique()
1014
+ .tolist()
1015
+ )
1016
+ df_category_list = (
1017
+ (
1018
+ df.sort_values(by=params["df_category_col"], ascending=sort_ascending)[
1019
+ params["df_category_col"]
1020
+ ]
1021
+ )
1022
+ .unique()
1023
+ .tolist()
1024
+ )
1025
+ elif params["sorting_choice"][1].lower() == "frequency":
1026
+ df_facet_list = (
1027
+ df.groupby(params["df_facet_col"], observed=False)
1028
+ .agg(freq=(params["df_facet_col"], "count"))
1029
+ .sort_values(by="freq", ascending=sort_ascending)
1030
+ .reset_index()[params["df_facet_col"]]
1031
+ .tolist()
1032
+ )
1033
+ df_category_list = (
1034
+ df.groupby(params["df_category_col"], observed=False)
1035
+ .agg(freq=(params["df_category_col"], "count"))
1036
+ .sort_values(by="freq", ascending=sort_ascending)
1037
+ .reset_index()[params["df_category_col"]]
1038
+ .tolist()
1039
+ )
1040
+ else:
1041
+ df_facet_list = (
1042
+ df.groupby(params["df_facet_col"], observed=False)
1043
+ .agg(mean=(params["df_value_col"], "mean"))
1044
+ .sort_values(by="mean", ascending=sort_ascending)
1045
+ .reset_index()[params["df_facet_col"]]
1046
+ .tolist()
1047
+ )
1048
+ df_category_list = (
1049
+ df.groupby(params["df_category_col"], observed=False)
1050
+ .agg(mean=(params["df_value_col"], "mean"))
1051
+ .sort_values(by="mean", ascending=sort_ascending)
1052
+ .reset_index()[params["df_category_col"]]
1053
+ .tolist()
1054
+ )
1055
+
1056
+ chart_height, n_facet_rows = calc_facet_rows_and_height(
1057
+ df, params["df_facet_col"], params["facet_col_wrap"]
1058
+ )
1059
+
1060
+ n_colours = len(df[params["df_category_col"]].unique())
1061
+ colour_sequence = calculate_colour_sequence(params["colourmap"], n_colours)
1062
+
1063
+ bins = None
1064
+ mid_bins = None
1065
+ bin_labels = None
1066
+ if params["global_max_min"]:
1067
+ bin_labels, bins, mid_bins = calc_histogram_bin_data(
1068
+ df, params["df_value_col"], n_bins=params["n_bins"]
1069
+ )
1070
+
1071
+ try:
1072
+ n_cols = params["facet_col_wrap"]
1073
+ if len(df_facet_list) < n_cols:
1074
+ n_cols = len(df_facet_list)
1075
+
1076
+ fig = make_subplots(
1077
+ rows=n_facet_rows,
1078
+ cols=n_cols,
1079
+ vertical_spacing=0.40 / n_facet_rows,
1080
+ )
1081
+
1082
+ current_row = 1
1083
+ current_col = 1
1084
+ current_facet = 0
1085
+ category_names = []
1086
+
1087
+ for facet_name in df_facet_list:
1088
+ facet_subset = df[df[params["df_facet_col"]] == facet_name].dropna(
1089
+ subset=[params["df_value_col"]]
1090
+ )
1091
+
1092
+ # If the subset is empty then skip to the next facet
1093
+ if facet_subset.empty:
1094
+ continue
1095
+
1096
+ if not params["global_max_min"]:
1097
+ bin_labels, bins, mid_bins = calc_histogram_bin_data(
1098
+ facet_subset, params["df_value_col"], n_bins=params["n_bins"]
1099
+ )
1100
+
1101
+ for category_name in df_category_list:
1102
+ category_subset = facet_subset[
1103
+ facet_subset[params["df_category_col"]] == category_name
1104
+ ].dropna(subset=[params["df_value_col"]])
1105
+
1106
+ # If the subset is empty then skip to the next category
1107
+ if category_subset.empty:
1108
+ continue
1109
+
1110
+ if category_name in category_names:
1111
+ show_legend = False
1112
+ else:
1113
+ show_legend = True
1114
+ category_names.append(category_name)
1115
+
1116
+ category_idx = category_names.index(category_name)
1117
+
1118
+ histogram_data = np.histogram(
1119
+ category_subset[params["df_value_col"]].values, bins=bins
1120
+ )
1121
+
1122
+ trace = go.Bar(
1123
+ x=mid_bins,
1124
+ y=histogram_data[0],
1125
+ name=category_name,
1126
+ marker_color=colour_sequence[category_idx],
1127
+ legendgroup=category_idx,
1128
+ showlegend=show_legend,
1129
+ customdata=bin_labels,
1130
+ hovertemplate=f"<b>{facet_name}</b><br>"
1131
+ + f"{category_name}<br>"
1132
+ + "Frequency: %{y:.0d}<br>"
1133
+ + "Bin range: %{customdata}<br>"
1134
+ + "Mid-bin: %{x:.2f}<br>"
1135
+ + "<extra></extra>",
1136
+ )
1137
+
1138
+ fig.append_trace(trace, row=current_row, col=current_col)
1139
+
1140
+ fig.update_xaxes(
1141
+ title_text=facet_name + " " + params["value_axis_title"],
1142
+ tickvals=bins,
1143
+ ticks="outside",
1144
+ ticklen=5,
1145
+ row=current_row,
1146
+ col=current_col,
1147
+ )
1148
+
1149
+ if current_col == 1:
1150
+ fig.update_yaxes(
1151
+ title_text="Frequency", row=current_row, col=current_col
1152
+ )
1153
+
1154
+ current_facet += 1
1155
+ current_col += 1
1156
+ if current_col > n_cols:
1157
+ current_row += 1
1158
+ current_col = 1
1159
+
1160
+ layout = go.Layout(height=chart_height)
1161
+
1162
+ fig.update_layout(layout)
1163
+ fig.update_layout(legend_title_text=params["legend_title"])
1164
+
1165
+ save_fig_as_html_div(fig, params["filename"])
1166
+
1167
+ if params["return_as_dict"]:
1168
+ return fig.to_dict()
1169
+ else:
1170
+ return plot(
1171
+ fig,
1172
+ output_type="div",
1173
+ include_plotlyjs=False,
1174
+ config=global_config(
1175
+ params["filename"], height_multiplier=chart_height / 500.0
1176
+ ),
1177
+ )
1178
+
1179
+ except ValueError as e:
1180
+ return failed_chart_message_div(
1181
+ "Could not resolve chart. Try filtering the data to reduce the number of categories or systems.",
1182
+ e,
1183
+ )
1184
+
1185
+
1186
+ def calc_histogram_bin_data(df, value_col_name, n_bins=10):
1187
+ """
1188
+ Calculates histogram bin label text, bin boundaries and bin mid-points
1189
+
1190
+ :param df: the Pandas DataFrame containing the data
1191
+ :param value_col_name: (string )name of the DataFrame column that contains the values
1192
+ :param n_bins: (int) the number of bins to use
1193
+ :return: a three element list containing the bin labels, bin boundaries and bin mid-points
1194
+ """
1195
+ min_bin_value, max_bin_value = df[value_col_name].agg(["min", "max"])
1196
+ bins = np.linspace(min_bin_value, max_bin_value, n_bins + 1)
1197
+ mid_bins = 0.5 * (bins[:-1] + bins[1:])
1198
+ bin_labels = np.array(
1199
+ ["{:.2f}≤x<{:.2f}".format(i, j) for i, j in zip(bins[:-1], bins[1:])]
1200
+ )
1201
+ return bin_labels, bins, mid_bins
1202
+
1203
+
1204
+ def plotly_binned_statistic_barchart(
1205
+ df,
1206
+ params,
1207
+ ):
1208
+ """
1209
+ Create a plotly binned statistic bar chart
1210
+
1211
+ :param df: Pandas DataFrame containing the data
1212
+ :param params: a dictionary of parameters
1213
+ :param params["df_category_col"]: (string) DataFrame column containing categories
1214
+ :param params["df_facet_col"]: (string) DataFrame column used to create subplots
1215
+ :param params["facet_title"]: (string) Subplot title
1216
+ :param params["facet_col_wrap"]: (int) number of subplots per row
1217
+ :param params["user_bins"]: list of ints containing bin edges for binning
1218
+ :param params["df_category_col"]: (string) DataFrame column containing categories
1219
+ :param params["df_x_value_col"]: (string) DataFrame column containing x data
1220
+ :param params["df_y_value_col"]: (string) DataFrame column containing y data
1221
+ :param params["x_axis_title"]: (string) Title for x-axis
1222
+ :param params["y_axis_title"]: (string) Title for y-axis
1223
+ :param params["stat_name"]: (string) "mean" or "median"
1224
+ :param params["sorting_choice"]: 2-element list. [0] sets sort direction, [1] used to determine which field to sort
1225
+ :param params["colourmap"]: (string) colourmap to use
1226
+ :param params["return_as_dict"]: (boolean) flag to trigger return as a dictionary rather than a HTML DIV
1227
+ :param params["filename"]: (string) default filename to use for plot bitmap export
1228
+ :return: Plotly figure embedded in an HTML DIV; or Plotly figure as a dictionary (if params["return_as_dict"] is
1229
+ True); or an error message embedded in an HTML DIV if there was a ValueError when calculating the figure
1230
+ """
1231
+ # pylint: disable=too-many-locals
1232
+ # pylint: disable=too-many-branches
1233
+ # pylint: disable=too-many-statements
1234
+ if df.empty:
1235
+ return empty_dataframe_msg(params)
1236
+
1237
+ chart_height, n_facet_rows = calc_facet_rows_and_height(
1238
+ df, params["df_facet_col"], params["facet_col_wrap"]
1239
+ )
1240
+
1241
+ n_colours = len(df[params["df_category_col"]].unique())
1242
+ colour_sequence = calculate_colour_sequence(params["colourmap"], n_colours)
1243
+
1244
+ sort_ascending = True
1245
+ if params["sorting_choice"][0] == 0:
1246
+ sort_ascending = False
1247
+
1248
+ if params["sorting_choice"][1].lower() == "name":
1249
+ df_category_name_list = (
1250
+ (
1251
+ df.sort_values(by=params["df_category_col"], ascending=sort_ascending)[
1252
+ params["df_category_col"]
1253
+ ]
1254
+ )
1255
+ .unique()
1256
+ .tolist()
1257
+ )
1258
+ df_facet_category_list = (
1259
+ (
1260
+ df.sort_values(by=params["df_facet_col"], ascending=sort_ascending)[
1261
+ params["df_facet_col"]
1262
+ ]
1263
+ )
1264
+ .unique()
1265
+ .tolist()
1266
+ )
1267
+ elif params["sorting_choice"][1].lower() == "frequency":
1268
+ df_category_name_list = (
1269
+ df.groupby(params["df_category_col"], observed=False)
1270
+ .agg(freq=(params["df_category_col"], "count"))
1271
+ .sort_values(by="freq", ascending=sort_ascending)
1272
+ .reset_index()[params["df_category_col"]]
1273
+ .tolist()
1274
+ )
1275
+ df_facet_category_list = (
1276
+ df.groupby(params["df_facet_col"], observed=False)
1277
+ .agg(freq=(params["df_facet_col"], "count"))
1278
+ .sort_values(by="freq", ascending=sort_ascending)
1279
+ .reset_index()[params["df_facet_col"]]
1280
+ .tolist()
1281
+ )
1282
+ else:
1283
+ df_category_name_list = (
1284
+ df.groupby(params["df_category_col"], observed=False)
1285
+ .agg(avg=(params["df_y_value_col"], params["stat_name"]))
1286
+ .sort_values(by="avg", ascending=sort_ascending)
1287
+ .reset_index()[params["df_category_col"]]
1288
+ .tolist()
1289
+ )
1290
+ df_facet_category_list = (
1291
+ df.groupby(params["df_facet_col"], observed=False)
1292
+ .agg(avg=(params["df_y_value_col"], params["stat_name"]))
1293
+ .sort_values(by="avg", ascending=sort_ascending)
1294
+ .reset_index()[params["df_facet_col"]]
1295
+ .tolist()
1296
+ )
1297
+
1298
+ try:
1299
+ n_cols = params["facet_col_wrap"]
1300
+ if len(df_facet_category_list) < n_cols:
1301
+ n_cols = len(df_facet_category_list)
1302
+
1303
+ fig = make_subplots(
1304
+ rows=n_facet_rows,
1305
+ cols=n_cols,
1306
+ vertical_spacing=0.40 / n_facet_rows,
1307
+ )
1308
+
1309
+ current_row = 1
1310
+ current_col = 1
1311
+ current_facet = 0
1312
+ category_names = []
1313
+
1314
+ bins = np.sort(np.array(params["user_bins"]))
1315
+
1316
+ for facet_name in df_facet_category_list:
1317
+ facet_subset = df[df[params["df_facet_col"]] == facet_name].dropna(
1318
+ subset=[params["df_x_value_col"], params["df_y_value_col"]]
1319
+ )
1320
+
1321
+ # Skip to the next facet if the subset is empty
1322
+ if facet_subset.empty:
1323
+ continue
1324
+
1325
+ facet_x_min = facet_subset[params["df_x_value_col"]].min()
1326
+ facet_x_max = facet_subset[params["df_x_value_col"]].max()
1327
+
1328
+ if np.isfinite(facet_x_min):
1329
+ if facet_x_min < np.amin(bins):
1330
+ bins = np.concatenate([[facet_x_min], bins])
1331
+ if np.isfinite(facet_x_max):
1332
+ if facet_x_max > np.amax(bins):
1333
+ bins = np.concatenate([bins, [facet_x_max]])
1334
+
1335
+ bin_labels = np.array(
1336
+ ["{:.0f}≤x<{:.0f}".format(i, j) for i, j in zip(bins[:-1], bins[1:])]
1337
+ )
1338
+
1339
+ for category_name in df_category_name_list:
1340
+ category_subset = facet_subset[
1341
+ facet_subset[params["df_category_col"]] == category_name
1342
+ ].dropna(subset=[params["df_x_value_col"], params["df_y_value_col"]])
1343
+
1344
+ # Skip to the next category name if the subset is empty
1345
+ if category_subset.empty:
1346
+ continue
1347
+
1348
+ if len(category_subset.index) > 0:
1349
+ if category_name in category_names:
1350
+ show_legend = False
1351
+ else:
1352
+ show_legend = True
1353
+ category_names.append(category_name)
1354
+
1355
+ category_idx = category_names.index(category_name)
1356
+
1357
+ binned_stats = stats.binned_statistic(
1358
+ category_subset[params["df_x_value_col"]].values,
1359
+ category_subset[params["df_y_value_col"]].values,
1360
+ statistic=params["stat_name"],
1361
+ bins=bins,
1362
+ )
1363
+ bin_counts = np.bincount(binned_stats[2])
1364
+ trace_labels = np.array(
1365
+ [
1366
+ "Frequency: {}<br>Bin range: {}".format(i, j)
1367
+ for i, j in zip(bin_counts[1:], bin_labels)
1368
+ ]
1369
+ )
1370
+
1371
+ trace = go.Bar(
1372
+ x=bin_labels,
1373
+ y=binned_stats[0],
1374
+ name=category_name,
1375
+ marker_color=colour_sequence[category_idx],
1376
+ legendgroup=category_idx,
1377
+ showlegend=show_legend,
1378
+ customdata=trace_labels,
1379
+ hovertemplate=f"<b>{facet_name}</b><br>"
1380
+ + f"{category_name}<br>"
1381
+ + f"{params['stat_name'].capitalize()}: "
1382
+ + "%{y:.2f}<br>"
1383
+ + "%{customdata}<br>"
1384
+ + "<extra></extra>",
1385
+ )
1386
+
1387
+ fig.append_trace(trace, row=current_row, col=current_col)
1388
+
1389
+ fig.update_xaxes(
1390
+ title_text=facet_name + " " + params["x_axis_title"],
1391
+ tickson="boundaries",
1392
+ ticks="outside",
1393
+ ticklen=5,
1394
+ row=current_row,
1395
+ col=current_col,
1396
+ )
1397
+
1398
+ if current_col == 1:
1399
+ fig.update_yaxes(
1400
+ title_text=params["stat_name"].capitalize()
1401
+ + " "
1402
+ + params["y_axis_title"],
1403
+ row=current_row,
1404
+ col=current_col,
1405
+ )
1406
+
1407
+ current_facet += 1
1408
+ current_col += 1
1409
+ if current_col > n_cols:
1410
+ current_row += 1
1411
+ current_col = 1
1412
+
1413
+ layout = go.Layout(height=chart_height)
1414
+
1415
+ fig.update_layout(layout)
1416
+ fig.update_layout(legend_title_text=params["facet_title"])
1417
+
1418
+ save_fig_as_html_div(fig, params["filename"])
1419
+
1420
+ if params["return_as_dict"]:
1421
+ return fig.to_dict()
1422
+ else:
1423
+ return plot(
1424
+ fig,
1425
+ output_type="div",
1426
+ include_plotlyjs=False,
1427
+ config=global_config(
1428
+ params["filename"], height_multiplier=chart_height / 500.0
1429
+ ),
1430
+ )
1431
+
1432
+ except ValueError as e:
1433
+ return failed_chart_message_div(
1434
+ "Could not resolve chart. Try filtering the data to reduce the number of categories or systems.",
1435
+ e,
1436
+ )
1437
+
1438
+
1439
+ def plotly_timeseries_linechart(
1440
+ df,
1441
+ params,
1442
+ ):
1443
+ """
1444
+ Create a plotly line chart of data over time
1445
+
1446
+ :param df: Pandas DataFrame containing the data
1447
+ :param params: a dictionary of parameters
1448
+ :param params["df_facet_col"]: (string) DataFrame column used to create subplots
1449
+ :param params["df_facet_col_wrap"]: (int) number of subplots per row
1450
+ :param params["facet_title"]: (string) subplot title
1451
+ :param params["df_value_col"]: (string) DataFrame column containing values
1452
+ :param params["value_axis_title"]: (string) y-axis title
1453
+ :param params["colourmap"]: (string) colourmap to use
1454
+ :param params["colourmap"]: (string) colourmap to use
1455
+ :param params["df_date_col"]: (string) DataFrame column containing dates
1456
+ :param params["df_count_col"]: (string) DataFrame column containing frequency data
1457
+ :param params["df_name_col"]: (string) DataFrame column containing categories
1458
+ :param params["legend_title"]: (string) legend title
1459
+ :param params["name_axis_title"]: (string) x-axis title
1460
+ :param params["return_as_dict"]: (boolean) flag to trigger return as a dictionary rather than a HTML DIV
1461
+ :param params["filename"]: (string) default filename to use for plot bitmap export
1462
+ :return: Plotly figure embedded in an HTML DIV; or Plotly figure as a dictionary (if "return_as_dict" is True);
1463
+ or an error message embedded in an HTML DIV if there was a ValueError when calculating the figure
1464
+ """
1465
+ if df.empty:
1466
+ return empty_dataframe_msg(params)
1467
+
1468
+ chart_height, n_facet_rows = calc_facet_rows_and_height(
1469
+ df, params["facet_col"], params["facet_col_wrap"]
1470
+ )
1471
+
1472
+ n_colours = len(df[params["df_name_col"]].unique())
1473
+ colour_sequence = calculate_colour_sequence(params["colourmap"], n_colours)
1474
+
1475
+ try:
1476
+ fig = px.scatter(
1477
+ df,
1478
+ x=params["df_date_col"],
1479
+ y=params["df_value_col"],
1480
+ color=params["df_name_col"],
1481
+ facet_col=params["facet_col"],
1482
+ facet_col_wrap=params["facet_col_wrap"],
1483
+ facet_row_spacing=0.40 / n_facet_rows,
1484
+ labels={
1485
+ params["facet_col"]: params["facet_title"],
1486
+ params["df_value_col"]: params["value_axis_title"],
1487
+ params["df_count_col"]: "Frequency",
1488
+ params["df_name_col"]: params["legend_title"],
1489
+ params["df_date_col"]: params["name_axis_title"],
1490
+ "x_ray_system_name": "System",
1491
+ },
1492
+ hover_name=params["df_name_col"],
1493
+ hover_data={
1494
+ params["df_name_col"]: False,
1495
+ params["df_value_col"]: ":.2f",
1496
+ params["df_count_col"]: ":.0f",
1497
+ },
1498
+ color_discrete_sequence=colour_sequence,
1499
+ category_orders=params["sorted_category_list"],
1500
+ height=chart_height,
1501
+ render_mode="svg",
1502
+ )
1503
+
1504
+ for data_set in fig.data:
1505
+ data_set.update(mode="markers+lines")
1506
+
1507
+ fig.update_xaxes(
1508
+ showticklabels=True,
1509
+ ticks="outside",
1510
+ ticklen=5,
1511
+ )
1512
+ fig.update_yaxes(showticklabels=True, matches=None)
1513
+
1514
+ fig.update_layout(legend_title_text=params["legend_title"])
1515
+
1516
+ fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
1517
+
1518
+ save_fig_as_html_div(fig, params["filename"])
1519
+
1520
+ if params["return_as_dict"]:
1521
+ return fig.to_dict()
1522
+ else:
1523
+ return plot(
1524
+ fig,
1525
+ output_type="div",
1526
+ include_plotlyjs=False,
1527
+ config=global_config(
1528
+ params["filename"], height_multiplier=chart_height / 500.0
1529
+ ),
1530
+ )
1531
+
1532
+ except ValueError as e:
1533
+ return failed_chart_message_div(
1534
+ "Could not resolve chart. Try filtering the data to reduce the number of categories or systems.",
1535
+ e,
1536
+ )
1537
+
1538
+
1539
+ def plotly_scatter(
1540
+ df,
1541
+ params,
1542
+ ):
1543
+ """
1544
+ Create a plotly scatter chart
1545
+
1546
+ :param df: Pandas DataFrame containing the data
1547
+ :param params: a dictionary of parameters
1548
+ :param params["df_name_col"]: (string) DataFrame column containing categories
1549
+ :param params["df_x_col"]: (string) DataFrame column containing x values
1550
+ :param params["df_y_col"]: (string) DataFrame column containing y values
1551
+ :param params["sorting_choice"]: 2-element list. [0] sets sort direction, [1] used to determine which field to sort
1552
+ :param params["grouping_choice"]: (string) "series" or "system"
1553
+ :param params["legend_title"]: (string) legend title
1554
+ :param params["facet_col_wrap"]: (int) number of subplots per row
1555
+ :param params["colourmap"]: (string) colourmap to use
1556
+ :param params["x_axis_title"]: (string) x-axis title
1557
+ :param params["y_axis_title"]: (string) y-axis title
1558
+ :param params["filename"]: (string) default filename to use for plot bitmap export
1559
+ :param params["return_as_dict"]: (boolean) flag to trigger return as a dictionary rather than a HTML DIV
1560
+ :return: Plotly figure embedded in an HTML DIV; or Plotly figure as a dictionary (if "return_as_dict" is True);
1561
+ or an error message embedded in an HTML DIV if there was a ValueError when calculating the figure
1562
+ """
1563
+ if df.empty:
1564
+ return empty_dataframe_msg(params)
1565
+
1566
+ params["df_category_name_col"] = params["df_name_col"]
1567
+ params["df_group_col"] = "x_ray_system_name"
1568
+ if params["grouping_choice"] == "series":
1569
+ params["df_category_name_col"] = "x_ray_system_name"
1570
+ params["df_group_col"] = params["df_name_col"]
1571
+ params["legend_title"] = "System"
1572
+
1573
+ sort_ascending = True
1574
+ if params["sorting_choice"][0] == 0:
1575
+ sort_ascending = False
1576
+
1577
+ sorting_categories = None
1578
+ if params["sorting_choice"][1].lower() == "name":
1579
+ sorting_categories = {
1580
+ params["df_category_name_col"]: (
1581
+ df.sort_values(
1582
+ by=params["df_category_name_col"], ascending=sort_ascending
1583
+ )[params["df_category_name_col"]]
1584
+ )
1585
+ .unique()
1586
+ .tolist()
1587
+ }
1588
+ sorting_categories["x_ray_system_name"] = (
1589
+ (
1590
+ df.sort_values(by="x_ray_system_name", ascending=sort_ascending)[
1591
+ "x_ray_system_name"
1592
+ ]
1593
+ )
1594
+ .unique()
1595
+ .tolist()
1596
+ )
1597
+ sorting_categories[params["df_group_col"]] = (
1598
+ (
1599
+ df.sort_values(by=params["df_group_col"], ascending=sort_ascending)[
1600
+ params["df_group_col"]
1601
+ ]
1602
+ )
1603
+ .unique()
1604
+ .tolist()
1605
+ )
1606
+ elif params["sorting_choice"][1].lower() == "frequency":
1607
+ sorting_categories = {
1608
+ params["df_category_name_col"]: df.groupby(
1609
+ params["df_category_name_col"], observed=False
1610
+ )
1611
+ .agg(freq=(params["df_category_name_col"], "count"))
1612
+ .sort_values(by="freq", ascending=sort_ascending)
1613
+ .reset_index()[params["df_category_name_col"]]
1614
+ .tolist()
1615
+ }
1616
+ sorting_categories["x_ray_system_name"] = (
1617
+ df.groupby("x_ray_system_name", observed=False)
1618
+ .agg(freq=("x_ray_system_name", "count"))
1619
+ .sort_values(by="freq", ascending=sort_ascending)
1620
+ .reset_index()["x_ray_system_name"]
1621
+ .tolist()
1622
+ )
1623
+ sorting_categories[params["df_group_col"]] = (
1624
+ df.groupby(params["df_group_col"], observed=False)
1625
+ .agg(freq=(params["df_group_col"], "count"))
1626
+ .sort_values(by="freq", ascending=sort_ascending)
1627
+ .reset_index()[params["df_group_col"]]
1628
+ .tolist()
1629
+ )
1630
+ else:
1631
+ sorting_categories = {
1632
+ params["df_category_name_col"]: df.groupby(
1633
+ params["df_category_name_col"], observed=False
1634
+ )
1635
+ .agg(mean=(params["df_y_col"], "mean"))
1636
+ .sort_values(by="mean", ascending=sort_ascending)
1637
+ .reset_index()[params["df_category_name_col"]]
1638
+ .tolist()
1639
+ }
1640
+ sorting_categories["x_ray_system_name"] = (
1641
+ df.groupby("x_ray_system_name", observed=False)
1642
+ .agg(mean=(params["df_y_col"], "mean"))
1643
+ .sort_values(by="mean", ascending=sort_ascending)
1644
+ .reset_index()["x_ray_system_name"]
1645
+ .tolist()
1646
+ )
1647
+ sorting_categories[params["df_group_col"]] = (
1648
+ df.groupby(params["df_group_col"], observed=False)
1649
+ .agg(mean=(params["df_y_col"], "mean"))
1650
+ .sort_values(by="mean", ascending=sort_ascending)
1651
+ .reset_index()[params["df_group_col"]]
1652
+ .tolist()
1653
+ )
1654
+
1655
+ try:
1656
+ # Drop any rows with nan values in x or y
1657
+ df = df.dropna(subset=[params["df_x_col"], params["df_y_col"]])
1658
+ if df.empty:
1659
+ return empty_dataframe_msg(params)
1660
+
1661
+ chart_height, n_facet_rows = calc_facet_rows_and_height(
1662
+ df, params["df_group_col"], params["facet_col_wrap"]
1663
+ )
1664
+
1665
+ n_colours = len(df[params["df_category_name_col"]].unique())
1666
+ colour_sequence = calculate_colour_sequence(params["colourmap"], n_colours)
1667
+
1668
+ fig = px.scatter(
1669
+ df,
1670
+ x=params["df_x_col"],
1671
+ y=params["df_y_col"],
1672
+ color=params["df_category_name_col"],
1673
+ facet_col=params["df_group_col"],
1674
+ facet_col_wrap=params["facet_col_wrap"],
1675
+ facet_row_spacing=0.40 / n_facet_rows,
1676
+ labels={
1677
+ params["df_x_col"]: params["x_axis_title"],
1678
+ params["df_y_col"]: params["y_axis_title"],
1679
+ params["df_category_name_col"]: params["legend_title"],
1680
+ },
1681
+ color_discrete_sequence=colour_sequence,
1682
+ category_orders=sorting_categories,
1683
+ opacity=0.6,
1684
+ height=chart_height,
1685
+ render_mode="svg", # "webgl",
1686
+ )
1687
+
1688
+ fig.update_traces(marker_line=dict(width=1, color="LightSlateGray"))
1689
+
1690
+ fig.update_xaxes(showticklabels=True, matches=None)
1691
+ fig.update_yaxes(showticklabels=True, matches=None)
1692
+
1693
+ fig.update_layout(legend_title_text=params["legend_title"])
1694
+
1695
+ fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
1696
+
1697
+ save_fig_as_html_div(fig, params["filename"])
1698
+
1699
+ if params["return_as_dict"]:
1700
+ return fig.to_dict()
1701
+ else:
1702
+ return plot(
1703
+ fig,
1704
+ output_type="div",
1705
+ include_plotlyjs=False,
1706
+ config=global_config(
1707
+ params["filename"], height_multiplier=chart_height / 500.0
1708
+ ),
1709
+ )
1710
+
1711
+ except ValueError as e:
1712
+ return failed_chart_message_div(
1713
+ "Could not resolve chart. Try filtering the data to reduce the number of categories or systems.",
1714
+ e,
1715
+ )
1716
+
1717
+
1718
+ def plotly_barchart_weekdays(
1719
+ df,
1720
+ df_name_col,
1721
+ df_value_col,
1722
+ name_axis_title="",
1723
+ value_axis_title="",
1724
+ colourmap="RdYlBu",
1725
+ filename="OpenREM_workload_chart",
1726
+ facet_col_wrap=3,
1727
+ sorting_choice=None,
1728
+ return_as_dict=False,
1729
+ ):
1730
+ """
1731
+ Create a plotly bar chart of event workload
1732
+
1733
+ :param df: Pandas DataFrame containing the data
1734
+ :param df_name_col: (string) DataFrame column containing categories
1735
+ :param df_value_col: (string) DataFrame column containing values
1736
+ :param name_axis_title: (string) x-axis title
1737
+ :param value_axis_title: (string) y-axis title
1738
+ :param colourmap: (string) colourmap to use
1739
+ :param filename: (string) default filename to use for plot bitmap export
1740
+ :param facet_col_wrap: (int) number of subplots per row
1741
+ :param sorting_choice: 2-element list. [0] sets sort direction, [1] used to determine which field to sort
1742
+ :param return_as_dict: (boolean) flag to trigger return as a dictionary rather than a HTML DIV
1743
+ :return: Plotly figure embedded in an HTML DIV; or Plotly figure as a dictionary (if "return_as_dict" is True);
1744
+ or an error message embedded in an HTML DIV if there was a ValueError when calculating the figure
1745
+ """
1746
+ # pylint: disable=too-many-locals
1747
+ if df.empty:
1748
+ return empty_dataframe_msg()
1749
+
1750
+ if sorting_choice is None:
1751
+ # Set default sorting to be ascending name order
1752
+ sorting_choice = [1, "name"]
1753
+
1754
+ sort_ascending = True
1755
+ if sorting_choice[0] == 0:
1756
+ sort_ascending = False
1757
+
1758
+ if sorting_choice[1].lower() == "name":
1759
+ sorting_categories = {
1760
+ "x_ray_system_name": (
1761
+ df.sort_values(by="x_ray_system_name", ascending=sort_ascending)[
1762
+ "x_ray_system_name"
1763
+ ]
1764
+ )
1765
+ .unique()
1766
+ .tolist()
1767
+ }
1768
+ else:
1769
+ sorting_categories = {
1770
+ "x_ray_system_name": df.groupby("x_ray_system_name", observed=False)
1771
+ .agg(freq=(df_value_col, "sum"))
1772
+ .sort_values(by="freq", ascending=sort_ascending)
1773
+ .reset_index()["x_ray_system_name"]
1774
+ .tolist()
1775
+ }
1776
+
1777
+ chart_height, n_facet_rows = calc_facet_rows_and_height(
1778
+ df, "x_ray_system_name", facet_col_wrap
1779
+ )
1780
+
1781
+ try:
1782
+ fig = px.bar(
1783
+ df,
1784
+ x=df_name_col,
1785
+ y=df_value_col,
1786
+ facet_col="x_ray_system_name",
1787
+ facet_col_wrap=facet_col_wrap,
1788
+ facet_row_spacing=0.40 / n_facet_rows,
1789
+ color=df_value_col,
1790
+ labels={
1791
+ df_name_col: name_axis_title,
1792
+ df_value_col: value_axis_title,
1793
+ "x_ray_system_name": "System",
1794
+ "hour": "Hour",
1795
+ },
1796
+ color_continuous_scale=colourmap,
1797
+ category_orders=sorting_categories,
1798
+ hover_name="x_ray_system_name",
1799
+ hover_data={
1800
+ "x_ray_system_name": False,
1801
+ "weekday": True,
1802
+ "hour": ":.2f",
1803
+ df_value_col: True,
1804
+ },
1805
+ height=chart_height,
1806
+ )
1807
+
1808
+ fig.update_xaxes(
1809
+ categoryarray=[
1810
+ "Monday",
1811
+ "Tuesday",
1812
+ "Wednesday",
1813
+ "Thursday",
1814
+ "Friday",
1815
+ "Saturday",
1816
+ "Sunday",
1817
+ ],
1818
+ tickson="boundaries",
1819
+ showticklabels=True,
1820
+ )
1821
+
1822
+ fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
1823
+
1824
+ save_fig_as_html_div(fig, filename)
1825
+
1826
+ if return_as_dict:
1827
+ return fig.to_dict()
1828
+ else:
1829
+ return plot(
1830
+ fig,
1831
+ output_type="div",
1832
+ include_plotlyjs=False,
1833
+ config=global_config(filename, height_multiplier=chart_height / 500.0),
1834
+ )
1835
+
1836
+ except ValueError as e:
1837
+ return failed_chart_message_div(
1838
+ "Could not resolve chart. Try filtering the data to reduce the number of systems.",
1839
+ e,
1840
+ )
1841
+
1842
+
1843
+ def plotly_frequency_barchart(
1844
+ df,
1845
+ params,
1846
+ csv_name="OpenREM chart data.csv",
1847
+ ):
1848
+ """
1849
+ Create a plotly bar chart of event frequency
1850
+
1851
+ :param df: Pandas DataFrame containing the data
1852
+ :param params: a dictionary of parameters
1853
+ :param params["df_x_axis_col"]: (string) DataFrame column containing categories
1854
+ :param params["x_axis_title"]: (string) x-axis title
1855
+ :param params["groupby_cols"]: list of strings with DataFrame columns to group data by
1856
+ :param params["grouping_choice"]: (string) "series" or "system"
1857
+ :param params["sorting_choice"]: 2-element list. [0] sets sort direction, [1] used to determine which field to sort
1858
+ :param params["legend_title"]: (string) legend title
1859
+ :param params["facet_col"]: (string) DataFrame column used to create subplots
1860
+ :param params["facet_col_wrap"]: (int) number of subplots per row
1861
+ :param params["return_as_dict"]: (boolean) flag to trigger return as a dictionary rather than a HTML DIV
1862
+ :param params["colourmap"]: (string) colourmap to use
1863
+ :param params["filename"]: (string) default filename to use for plot bitmap export
1864
+ :param csv_name: (string) default filename to use for plot csv export
1865
+ :return: Plotly figure embedded in an HTML DIV; or Plotly figure as a dictionary (if "return_as_dict" is True);
1866
+ or an error message embedded in an HTML DIV if there was a ValueError when calculating the figure
1867
+ """
1868
+ if df.empty:
1869
+ return empty_dataframe_msg(params), None
1870
+
1871
+ if params["groupby_cols"] is None:
1872
+ params["groupby_cols"] = [params["df_name_col"]]
1873
+
1874
+ df_aggregated = create_dataframe_aggregates(
1875
+ df, params["groupby_cols"], params["df_name_col"], ["count"]
1876
+ )
1877
+
1878
+ df_legend_col = params["df_name_col"]
1879
+ if params["grouping_choice"] == "series":
1880
+ df_legend_col = "x_ray_system_name"
1881
+ params["x_axis_title"] = params["legend_title"]
1882
+ params["legend_title"] = "System"
1883
+ params["df_x_axis_col"] = params["df_name_col"]
1884
+
1885
+ chart_height = 500
1886
+ n_facet_rows = 1
1887
+
1888
+ if params["facet_col"]:
1889
+ chart_height, n_facet_rows = calc_facet_rows_and_height(
1890
+ df, params["facet_col"], params["facet_col_wrap"]
1891
+ )
1892
+
1893
+ sort_ascending = True
1894
+ if params["sorting_choice"][0] == 0:
1895
+ sort_ascending = False
1896
+
1897
+ sorting_categories = None
1898
+ if params["sorting_choice"][1].lower() == "name":
1899
+ sorting_categories = {
1900
+ params["df_x_axis_col"]: (
1901
+ df_aggregated.sort_values(
1902
+ by=params["df_x_axis_col"], ascending=sort_ascending
1903
+ )[params["df_x_axis_col"]]
1904
+ )
1905
+ .unique()
1906
+ .tolist()
1907
+ }
1908
+ sorting_categories[df_legend_col] = (
1909
+ (
1910
+ df_aggregated.sort_values(by=df_legend_col, ascending=sort_ascending)[
1911
+ df_legend_col
1912
+ ]
1913
+ )
1914
+ .unique()
1915
+ .tolist()
1916
+ )
1917
+ if params["facet_col"]:
1918
+ sorting_categories[params["facet_col"]] = (
1919
+ (
1920
+ df_aggregated.sort_values(
1921
+ by=params["facet_col"], ascending=sort_ascending
1922
+ )[params["facet_col"]]
1923
+ )
1924
+ .unique()
1925
+ .tolist()
1926
+ )
1927
+ else:
1928
+ sorting_categories = {
1929
+ params["df_x_axis_col"]: df_aggregated.groupby(
1930
+ params["df_x_axis_col"], observed=False
1931
+ )
1932
+ .agg({"count": "sum"})
1933
+ .sort_values(by="count", ascending=sort_ascending)
1934
+ .reset_index()[params["df_x_axis_col"]]
1935
+ .tolist()
1936
+ }
1937
+ sorting_categories[df_legend_col] = (
1938
+ df_aggregated.groupby(df_legend_col, observed=False)
1939
+ .agg({"count": "sum"})
1940
+ .sort_values(by="count", ascending=sort_ascending)
1941
+ .reset_index()[df_legend_col]
1942
+ .tolist()
1943
+ )
1944
+ if params["facet_col"]:
1945
+ sorting_categories[params["facet_col"]] = (
1946
+ df_aggregated.groupby(params["facet_col"], observed=False)
1947
+ .agg({"count": "sum"})
1948
+ .sort_values(by="count", ascending=sort_ascending)
1949
+ .reset_index()[params["facet_col"]]
1950
+ .tolist()
1951
+ )
1952
+
1953
+ n_colours = len(df_aggregated[df_legend_col].unique())
1954
+ colour_sequence = calculate_colour_sequence(params["colourmap"], n_colours)
1955
+
1956
+ custom_data_fields = [df_legend_col]
1957
+ if params["facet_col"] is not None:
1958
+ custom_data_fields.append(params["facet_col"])
1959
+
1960
+ fig = px.bar(
1961
+ df_aggregated,
1962
+ x=params["df_x_axis_col"],
1963
+ y="count",
1964
+ color=df_legend_col,
1965
+ facet_col=params["facet_col"],
1966
+ facet_col_wrap=params["facet_col_wrap"],
1967
+ facet_row_spacing=0.50 / n_facet_rows,
1968
+ color_discrete_sequence=colour_sequence,
1969
+ height=chart_height,
1970
+ custom_data=custom_data_fields,
1971
+ category_orders=sorting_categories,
1972
+ )
1973
+
1974
+ fig.update_xaxes(
1975
+ tickson="boundaries",
1976
+ ticks="outside",
1977
+ ticklen=5,
1978
+ showticklabels=True,
1979
+ title=params["x_axis_title"],
1980
+ )
1981
+ fig.update_yaxes(showticklabels=True, matches=None)
1982
+
1983
+ fig.update_layout(
1984
+ legend_title_text=params["legend_title"],
1985
+ )
1986
+
1987
+ fig.update_traces(
1988
+ hovertemplate="<b>"
1989
+ + params["legend_title"]
1990
+ + ": %{customdata[0]}</b>"
1991
+ + "<br>"
1992
+ + params["x_axis_title"]
1993
+ + ": %{x}"
1994
+ + "<br>Frequency: %{y:.0d}"
1995
+ + "<extra></extra>",
1996
+ )
1997
+
1998
+ fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
1999
+
2000
+ save_fig_as_html_div(fig, params["filename"])
2001
+
2002
+ if params["return_as_dict"]:
2003
+ return fig.to_dict(), None
2004
+ else:
2005
+ csv_data = download_link(
2006
+ csv_data_frequency(fig, params),
2007
+ csv_name,
2008
+ )
2009
+
2010
+ return (
2011
+ plot(
2012
+ fig,
2013
+ output_type="div",
2014
+ include_plotlyjs=False,
2015
+ config=global_config(
2016
+ params["filename"], height_multiplier=chart_height / 500.0
2017
+ ),
2018
+ ),
2019
+ csv_data,
2020
+ )
2021
+
2022
+
2023
+ def construct_over_time_charts(
2024
+ df,
2025
+ params,
2026
+ group_by_physician=None,
2027
+ ):
2028
+ """
2029
+ Construct a Plotly line chart of average values over time, optionally grouped by performing physician name.
2030
+ For "boxplot" a plotly boxplot of values over time is returned instead of an plotly line chart.
2031
+
2032
+ :param df: the Pandas DataFrame containing the data
2033
+ :param params: a dictionary of processing parameters
2034
+
2035
+ :param params["df_name_col"]: (string) DataFrame column containing categories
2036
+ :param params["name_title"]: (string) name title
2037
+ :param params["df_value_col"]: (string) DataFrame column containing values
2038
+ :param params["value_title"]: (string) y-axis title
2039
+ :param params["df_date_col"]: (string) DataFrame column containing dates
2040
+ :param params["date_title"]: (string) date title
2041
+ :param params["facet_title"]: (string) subplot title
2042
+ :param params["sorting_choice"]: 2-element list. [0] sets sort direction, [1] used to determine which field to sort
2043
+ :param params["average_choices"]: list of strings containing requred averages ("mean", "median", "boxplot")
2044
+ :param params["time_period"]: string containing the time period to average over; "A" (years), "Q" (quarters),
2045
+ "M" (months), "W" (weeks), "D" (days)
2046
+ :param params["grouping_choice"]: (string) "series" or "system"
2047
+ :param params["colourmap"]: (string) colourmap to use
2048
+ :param params["filename"]: (string) default filename to use for plot bitmap export
2049
+ :param params["facet_col_wrap"]: (int) number of subplots per row
2050
+ :param params["return_as_dict"]: (boolean) flag to trigger return as a dictionary rather than a HTML DIV
2051
+ :param group_by_physician: boolean flag to set whether to group by physician name
2052
+ :return: a dictionary containing a combination of ["mean"], ["median"] and ["boxplot"] entries,
2053
+ each of which contains a Plotly figure embedded in an HTML DIV; or Plotly figure as a
2054
+ dictionary (if params["return_as_dict"] is True); or an error message embedded in an HTML DIV
2055
+ if there was a ValueError when calculating the figure
2056
+ """
2057
+ sorted_categories = create_sorted_category_list(
2058
+ df, params["df_name_col"], params["df_value_col"], params["sorting_choice"]
2059
+ )
2060
+
2061
+ df = df.dropna(subset=[params["df_value_col"]])
2062
+ if df.empty:
2063
+ return_value = {}
2064
+ if "mean" in params["average_choices"]:
2065
+ return_value["mean"] = empty_dataframe_msg(params)
2066
+ if "median" in params["average_choices"]:
2067
+ return_value["median"] = empty_dataframe_msg(params)
2068
+ if "boxplot" in params["average_choices"]:
2069
+ return_value["boxplot"] = empty_dataframe_msg(params)
2070
+ return return_value
2071
+
2072
+ if "mean" in params["average_choices"] or "median" in params["average_choices"]:
2073
+ df_time_series = create_dataframe_time_series(
2074
+ df,
2075
+ params["df_name_col"],
2076
+ params["df_value_col"],
2077
+ df_date_col=params["df_date_col"],
2078
+ time_period=params["time_period"],
2079
+ average_choices=list(
2080
+ set(params["average_choices"]).intersection(["mean", "median", "count"])
2081
+ ),
2082
+ group_by_physician=group_by_physician,
2083
+ )
2084
+
2085
+ category_names_col = params["df_name_col"]
2086
+ group_by_col = "x_ray_system_name"
2087
+ if group_by_physician:
2088
+ group_by_col = "performing_physician_name"
2089
+
2090
+ if params["grouping_choice"] == "series":
2091
+ category_names_col = "x_ray_system_name"
2092
+ group_by_col = params["df_name_col"]
2093
+ if group_by_physician:
2094
+ category_names_col = "performing_physician_name"
2095
+ params["name_title"] = "Physician"
2096
+
2097
+ return_value = {}
2098
+
2099
+ parameter_dict = {
2100
+ "df_count_col": "count" + params["df_value_col"],
2101
+ "df_name_col": category_names_col,
2102
+ "df_date_col": params["df_date_col"],
2103
+ "facet_col": group_by_col,
2104
+ "facet_title": params["facet_title"],
2105
+ "value_axis_title": params["value_title"],
2106
+ "name_axis_title": params["date_title"],
2107
+ "legend_title": params["name_title"],
2108
+ "colourmap": params["colourmap"],
2109
+ "filename": params["filename"],
2110
+ "facet_col_wrap": params["facet_col_wrap"],
2111
+ "sorted_category_list": sorted_categories,
2112
+ "return_as_dict": params["return_as_dict"],
2113
+ }
2114
+ if "mean" in params["average_choices"]:
2115
+ parameter_dict["df_value_col"] = "mean" + params["df_value_col"]
2116
+ return_value["mean"] = plotly_timeseries_linechart(
2117
+ df_time_series,
2118
+ parameter_dict,
2119
+ )
2120
+
2121
+ if "median" in params["average_choices"]:
2122
+ parameter_dict["df_value_col"] = "median" + params["df_value_col"]
2123
+ return_value["median"] = plotly_timeseries_linechart(
2124
+ df_time_series,
2125
+ parameter_dict,
2126
+ )
2127
+
2128
+ if "boxplot" in params["average_choices"]:
2129
+ df.set_index(params["df_date_col"], inplace=True)
2130
+ df = df.to_period(freq=params["time_period"], copy=False)
2131
+ df.reset_index(inplace=True)
2132
+ df[params["df_date_col"]] = df[params["df_date_col"]].map(
2133
+ lambda x: x.start_time
2134
+ )
2135
+ df.sort_values(params["df_date_col"], inplace=True)
2136
+
2137
+ parameter_dict["df_name_col"] = params["df_date_col"]
2138
+ parameter_dict["df_value_col"] = params["df_value_col"]
2139
+ parameter_dict["sorting_choice"] = params["sorting_choice"]
2140
+
2141
+ return_value["boxplot"] = plotly_boxplot(
2142
+ df,
2143
+ parameter_dict,
2144
+ )
2145
+
2146
+ return return_value
2147
+
2148
+
2149
+ def download_link(
2150
+ object_to_download, download_filename, download_link_text="Download csv"
2151
+ ):
2152
+ """
2153
+ Adapted from:
2154
+ https://discuss.streamlit.io/t/heres-a-download-function-that-works-for-dataframes-and-txt/4052
2155
+
2156
+ Generates a link to download the given object_to_download.
2157
+
2158
+ object_to_download (str, pd.DataFrame): The object to be downloaded.
2159
+ download_filename (str): filename and extension of file. e.g. mydata.csv, some_txt_output.txt
2160
+ download_link_text (str): Text to display for download link.
2161
+
2162
+ Examples:
2163
+
2164
+ ``download_link(YOUR_DF, 'YOUR_DF.csv', 'Click here to download data!')``
2165
+
2166
+ ``download_link(YOUR_STRING, 'YOUR_STRING.txt', 'Click here to download your text!')``
2167
+
2168
+ """
2169
+ if isinstance(object_to_download, pd.DataFrame):
2170
+ object_to_download.columns = object_to_download.columns.str.replace(
2171
+ "<br>", " ", regex=True
2172
+ )
2173
+ object_to_download = object_to_download.replace("<br>", " ", regex=True).to_csv(
2174
+ index=False
2175
+ )
2176
+
2177
+ # some strings <-> bytes conversions necessary here
2178
+ b64 = base64.b64encode(object_to_download.encode()).decode()
2179
+
2180
+ return f'<a class="btn btn-default btn-sm" role="button" href="data:file/txt;base64,{b64}" download="{download_filename}">{download_link_text}</a>' # pylint: disable=line-too-long
2181
+
2182
+
2183
+ def generate_average_chart_group(
2184
+ average_choices,
2185
+ chart_message,
2186
+ df,
2187
+ modality_text,
2188
+ name_field,
2189
+ name_text,
2190
+ return_as_dict,
2191
+ return_structure,
2192
+ units_text,
2193
+ user_profile,
2194
+ value_field,
2195
+ value_text,
2196
+ variable_name_start,
2197
+ variable_value_name,
2198
+ sorting_choice,
2199
+ ):
2200
+ # pylint: disable=too-many-locals
2201
+ if user_profile.plotBoxplots and "median" not in average_choices:
2202
+ average_choices = average_choices + ["median"]
2203
+
2204
+ if user_profile.plotMean or user_profile.plotMedian:
2205
+
2206
+ df_aggregated = create_dataframe_aggregates(
2207
+ df,
2208
+ [name_field],
2209
+ value_field,
2210
+ stats_to_use=average_choices + ["count"],
2211
+ )
2212
+
2213
+ parameter_dict = {
2214
+ "df_name_col": name_field,
2215
+ "name_axis_title": name_text,
2216
+ "colourmap": user_profile.plotColourMapChoice,
2217
+ "facet_col": None,
2218
+ "facet_col_wrap": user_profile.plotFacetColWrapVal,
2219
+ "return_as_dict": return_as_dict,
2220
+ "sorting_choice": [
2221
+ user_profile.plotInitialSortingDirection,
2222
+ sorting_choice,
2223
+ ],
2224
+ "custom_msg_line": chart_message,
2225
+ }
2226
+
2227
+ if user_profile.plotMean:
2228
+ parameter_dict["value_axis_title"] = "Mean " + value_text + " " + units_text
2229
+ parameter_dict["filename"] = (
2230
+ "OpenREM "
2231
+ + modality_text
2232
+ + " "
2233
+ + name_text
2234
+ + " "
2235
+ + value_text
2236
+ + " mean"
2237
+ )
2238
+ parameter_dict["average_choice"] = "mean"
2239
+ (
2240
+ return_structure[
2241
+ variable_name_start + "Mean" + variable_value_name + "Data"
2242
+ ],
2243
+ return_structure[
2244
+ variable_name_start + "Mean" + variable_value_name + "DataCSV"
2245
+ ],
2246
+ ) = plotly_barchart( # pylint: disable=line-too-long
2247
+ df_aggregated,
2248
+ parameter_dict,
2249
+ csv_name=variable_name_start + "Mean" + value_text + "Data.csv",
2250
+ )
2251
+
2252
+ # Create a data frame to use to display the data to the user in an html table
2253
+ table_df = df_aggregated[
2254
+ ["x_ray_system_name", name_field, "mean", "count"]
2255
+ ].round({"mean": 2})
2256
+
2257
+ # Rename the data frame columns to have user-friendly names
2258
+ table_df.columns = [
2259
+ "X-ray system name",
2260
+ name_text,
2261
+ "Mean " + value_text + " " + units_text,
2262
+ "Count",
2263
+ ]
2264
+
2265
+ # Pivot the table so that there is a column per system for the median and count
2266
+ table_df = table_df.pivot(index=name_text, columns="X-ray system name")
2267
+ table_df.columns = [
2268
+ "<br>".join((col[1], str(col[0]))) for col in table_df.columns
2269
+ ]
2270
+ table_df = table_df.reset_index()
2271
+
2272
+ # Add a html table version of the data frame to the return structure
2273
+ tableName = variable_name_start + "Mean" + variable_value_name + "DataTable"
2274
+ return_structure[tableName] = table_df.to_html(
2275
+ classes="table table-bordered table-sm small sortable chart-data-table-contents",
2276
+ table_id=tableName,
2277
+ index=False,
2278
+ na_rep="-",
2279
+ escape=False,
2280
+ )
2281
+
2282
+ if user_profile.plotMedian:
2283
+ parameter_dict["value_axis_title"] = (
2284
+ "Median " + value_text + " " + units_text
2285
+ )
2286
+ parameter_dict["filename"] = (
2287
+ "OpenREM "
2288
+ + modality_text
2289
+ + " "
2290
+ + name_text
2291
+ + " "
2292
+ + value_text
2293
+ + " median"
2294
+ )
2295
+ parameter_dict["average_choice"] = "median"
2296
+ (
2297
+ return_structure[
2298
+ variable_name_start + "Median" + variable_value_name + "Data"
2299
+ ],
2300
+ return_structure[
2301
+ variable_name_start + "Median" + variable_value_name + "DataCSV"
2302
+ ],
2303
+ ) = plotly_barchart( # pylint: disable=line-too-long
2304
+ df_aggregated,
2305
+ parameter_dict,
2306
+ csv_name=variable_name_start + "Median" + value_text + "Data.csv",
2307
+ )
2308
+
2309
+ # Create a data frame to use to display the data to the user in an html table
2310
+ table_df = df_aggregated[
2311
+ ["x_ray_system_name", name_field, "median", "count"]
2312
+ ].round({"median": 2})
2313
+
2314
+ # Rename the data frame columns to have user-friendly names
2315
+ table_df.columns = [
2316
+ "X-ray system name",
2317
+ name_text,
2318
+ "Median " + value_text + " " + units_text,
2319
+ "Count",
2320
+ ]
2321
+
2322
+ # Pivot the table so that there is a column per system for the median and count
2323
+ table_df = table_df.pivot(index=name_text, columns="X-ray system name")
2324
+ table_df.columns = [
2325
+ "<br>".join((col[1], str(col[0]))) for col in table_df.columns
2326
+ ]
2327
+ table_df = table_df.reset_index()
2328
+
2329
+ # Add a html table version of the data frame to the return structure
2330
+ tableName = (
2331
+ variable_name_start + "Median" + variable_value_name + "DataTable"
2332
+ )
2333
+ return_structure[tableName] = table_df.to_html(
2334
+ classes="table table-bordered table-sm small sortable chart-data-table-contents",
2335
+ table_id=tableName,
2336
+ index=False,
2337
+ na_rep="-",
2338
+ escape=False,
2339
+ )
2340
+
2341
+ if user_profile.plotBoxplots:
2342
+ parameter_dict = {
2343
+ "df_name_col": name_field,
2344
+ "df_value_col": value_field,
2345
+ "value_axis_title": value_text + " " + units_text,
2346
+ "name_axis_title": name_text,
2347
+ "colourmap": user_profile.plotColourMapChoice,
2348
+ "filename": "OpenREM "
2349
+ + modality_text
2350
+ + " "
2351
+ + name_text
2352
+ + " "
2353
+ + variable_value_name
2354
+ + " boxplot",
2355
+ "facet_col": None,
2356
+ "sorting_choice": [
2357
+ user_profile.plotInitialSortingDirection,
2358
+ sorting_choice,
2359
+ ],
2360
+ "facet_col_wrap": user_profile.plotFacetColWrapVal,
2361
+ "return_as_dict": return_as_dict,
2362
+ "custom_msg_line": chart_message,
2363
+ }
2364
+
2365
+ return_structure[
2366
+ variable_name_start + "Boxplot" + variable_value_name + "Data"
2367
+ ] = plotly_boxplot(
2368
+ df,
2369
+ parameter_dict,
2370
+ )
2371
+
2372
+ if user_profile.plotHistograms:
2373
+ category_names_col = name_field
2374
+ group_by_col = "x_ray_system_name"
2375
+ legend_title = name_text
2376
+
2377
+ if user_profile.plotGroupingChoice == "series":
2378
+ category_names_col = "x_ray_system_name"
2379
+ group_by_col = name_field
2380
+ legend_title = "System"
2381
+
2382
+ parameter_dict = {
2383
+ "df_facet_col": group_by_col,
2384
+ "df_category_col": category_names_col,
2385
+ "df_value_col": value_field,
2386
+ "value_axis_title": value_text + " " + units_text,
2387
+ "legend_title": legend_title,
2388
+ "n_bins": user_profile.plotHistogramBins,
2389
+ "colourmap": user_profile.plotColourMapChoice,
2390
+ "filename": "OpenREM "
2391
+ + modality_text
2392
+ + " "
2393
+ + name_text
2394
+ + " "
2395
+ + variable_value_name
2396
+ + " histogram",
2397
+ "facet_col_wrap": user_profile.plotFacetColWrapVal,
2398
+ "sorting_choice": [
2399
+ user_profile.plotInitialSortingDirection,
2400
+ sorting_choice,
2401
+ ],
2402
+ "global_max_min": user_profile.plotHistogramGlobalBins,
2403
+ "return_as_dict": return_as_dict,
2404
+ "custom_msg_line": chart_message,
2405
+ }
2406
+ return_structure[
2407
+ variable_name_start + "Histogram" + variable_value_name + "Data"
2408
+ ] = plotly_histogram_barchart(
2409
+ df,
2410
+ parameter_dict,
2411
+ )
2412
+ return return_structure