hammock-plot 1.1.0__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hammock_plot-1.1.0 → hammock_plot-1.2.0}/PKG-INFO +1 -1
- {hammock_plot-1.1.0 → hammock_plot-1.2.0}/README.md +3 -2
- {hammock_plot-1.1.0 → hammock_plot-1.2.0}/hammock_plot/figure.py +28 -23
- {hammock_plot-1.1.0 → hammock_plot-1.2.0}/hammock_plot/main.py +16 -29
- {hammock_plot-1.1.0 → hammock_plot-1.2.0}/hammock_plot/shapes.py +48 -1
- {hammock_plot-1.1.0 → hammock_plot-1.2.0}/hammock_plot/unibar.py +112 -174
- {hammock_plot-1.1.0 → hammock_plot-1.2.0}/hammock_plot/utils.py +47 -2
- {hammock_plot-1.1.0 → hammock_plot-1.2.0}/hammock_plot/value.py +1 -19
- {hammock_plot-1.1.0 → hammock_plot-1.2.0}/hammock_plot.egg-info/PKG-INFO +1 -1
- {hammock_plot-1.1.0 → hammock_plot-1.2.0}/pyproject.toml +1 -1
- {hammock_plot-1.1.0 → hammock_plot-1.2.0}/LICENSE +0 -0
- {hammock_plot-1.1.0 → hammock_plot-1.2.0}/hammock_plot/__init__.py +0 -0
- {hammock_plot-1.1.0 → hammock_plot-1.2.0}/hammock_plot.egg-info/SOURCES.txt +0 -0
- {hammock_plot-1.1.0 → hammock_plot-1.2.0}/hammock_plot.egg-info/dependency_links.txt +0 -0
- {hammock_plot-1.1.0 → hammock_plot-1.2.0}/hammock_plot.egg-info/requires.txt +0 -0
- {hammock_plot-1.1.0 → hammock_plot-1.2.0}/hammock_plot.egg-info/top_level.txt +0 -0
- {hammock_plot-1.1.0 → hammock_plot-1.2.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hammock-plot
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: Hammock plot visualization for categorical and mixed categorical-continuous data
|
|
5
5
|
Author-email: Tiancheng Yang <t77yang@uwaterloo.ca>, Sandra Huang <sandra.huang@uwaterloo.ca>, Matthias Schonlau <schonlau@uwaterloo.ca>
|
|
6
6
|
License: MIT
|
|
@@ -121,7 +121,7 @@ hammock = hammock_plot.Hammock(data_df = df)
|
|
|
121
121
|
ax = hammock.plot(var=var,
|
|
122
122
|
missing=True,
|
|
123
123
|
numerical_var_levels={"sataces": None, "satcomm": None, "satrate": None},
|
|
124
|
-
|
|
124
|
+
min_bar_height_unibar=0.2,
|
|
125
125
|
uni_vfill=0.3)
|
|
126
126
|
```
|
|
127
127
|
|
|
@@ -265,7 +265,8 @@ ax = hammock.plot(
|
|
|
265
265
|
| | `width` | `float` | Width of the plot in inches. Default is 15. Caution: Width too narrow may distort the plot. |
|
|
266
266
|
| Other options | `shape` | `str` | Shape of the boxes. "rectangle" or "parallelogram". Default is "rectangle". |
|
|
267
267
|
| | `same_scale` | `List[str]` | List of variables that have the same scale. Default is `None`. |
|
|
268
|
-
| | `
|
|
268
|
+
| | `min_bar_height_unibar` | `float` | Minimal drawn height of a unibar. Bars representing only a tiny fraction of the data may be so narrow that they are invisible in a plot; this sets an absolute floor on their thickness. With `hi_box="stacked"`, each colour segment within a unibar is also kept at least this tall (by trading height with the larger segments, so the bar height and layout are unchanged), keeping a colour visible even when it is a tiny share of the bar. The default value tries to ensure this does not happen. Default is 0.15 (0.15% of the entire plot height).
|
|
269
|
+
| | `min_bar_height_connectors` | `float` | Minimal drawn thickness of a connector (independent of `connector_fraction`). Like `min_bar_height_unibar` but for the connectors between unibars. Default is 0.12 (0.12% of the entire plot height).
|
|
269
270
|
| | `display_figure` | `bool` | Whether or not to display the figure. This can be useful if you just want to save the plots. Default is `True`. |
|
|
270
271
|
| | `save_path` | `str` | If it is not `None`, the figure will be saved to the given path with given name and format. Default is `None`. |
|
|
271
272
|
| | `violin_bw_method` | `str` or `float` | Specifies the bw method used to plot a violin plot. See https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.violinplot.html for more details. |
|
|
@@ -26,7 +26,7 @@ class Figure:
|
|
|
26
26
|
var_types:
|
|
27
27
|
- Dict of the types of each variable. Either: np.str_, np.floating, or np.integer
|
|
28
28
|
|
|
29
|
-
numerical_var_levels, display_type, missing, missing_placeholder, label, unibar, hi_box, width, height, uni_vfill, connector_fraction,
|
|
29
|
+
numerical_var_levels, display_type, missing, missing_placeholder, label, unibar, hi_box, width, height, uni_vfill, connector_fraction, min_bar_height_unibar, min_bar_height_connectors, uni_hfill, label_options, shape_type, same_scale, violin_bw_method: refer to README file
|
|
30
30
|
"""
|
|
31
31
|
def __init__(self,
|
|
32
32
|
# general
|
|
@@ -50,7 +50,8 @@ class Figure:
|
|
|
50
50
|
height: float,
|
|
51
51
|
uni_vfill: float,
|
|
52
52
|
connector_fraction: float,
|
|
53
|
-
|
|
53
|
+
min_bar_height_unibar: float,
|
|
54
|
+
min_bar_height_connectors: float,
|
|
54
55
|
uni_hfill: float,
|
|
55
56
|
|
|
56
57
|
# Other
|
|
@@ -79,7 +80,8 @@ class Figure:
|
|
|
79
80
|
self.height = height # height of the entire plot
|
|
80
81
|
self.uni_vfill = uni_vfill
|
|
81
82
|
self.connector_fraction = connector_fraction
|
|
82
|
-
self.
|
|
83
|
+
self.min_bar_height_unibar = min_bar_height_unibar
|
|
84
|
+
self.min_bar_height_connectors = min_bar_height_connectors
|
|
83
85
|
self.uni_hfill = uni_hfill
|
|
84
86
|
|
|
85
87
|
self.label_options = label_options
|
|
@@ -155,7 +157,7 @@ class Figure:
|
|
|
155
157
|
missing=self.missing,
|
|
156
158
|
missing_placeholder=self.missing_placeholder,
|
|
157
159
|
val_order=order,
|
|
158
|
-
min_bar_height=self.
|
|
160
|
+
min_bar_height=self.min_bar_height_unibar,
|
|
159
161
|
colors=self.colors,
|
|
160
162
|
hi_box=self.hi_box,
|
|
161
163
|
display_type = uni_display_type,
|
|
@@ -235,7 +237,7 @@ class Figure:
|
|
|
235
237
|
|
|
236
238
|
max_missing_height = max_missing_occ * self.bar_unit
|
|
237
239
|
|
|
238
|
-
missing_padding = (max(self.
|
|
240
|
+
missing_padding = (max(self.min_bar_height_unibar, max_missing_height) + Defaults.SPACE_ABOVE_MISSING)
|
|
239
241
|
|
|
240
242
|
# if there are horizontal bar charts, calculate the bar unit differently.
|
|
241
243
|
max_num_categories = 0
|
|
@@ -250,7 +252,7 @@ class Figure:
|
|
|
250
252
|
max_val_occ = max(max_val_occ, max(val.occurrences for val in uni.values))
|
|
251
253
|
max_num_categories = max(max_num_categories, len(uni.non_missing_vals))
|
|
252
254
|
if max_num_categories > 0:
|
|
253
|
-
hbar_height = max(max_val_occ * self.bar_unit, self.
|
|
255
|
+
hbar_height = max(max_val_occ * self.bar_unit, self.min_bar_height_unibar)
|
|
254
256
|
# if the horizontal bar charts overlap
|
|
255
257
|
available_height = (self.height - 2 * self.ymargin * self.height) * self.scale
|
|
256
258
|
|
|
@@ -260,15 +262,14 @@ class Figure:
|
|
|
260
262
|
self.bar_unit = (available_height * self.uni_vfill) / (max_val_occ * max_num_categories)
|
|
261
263
|
if self.missing:
|
|
262
264
|
self.bar_unit = self.bar_unit / (1 + max_missing_occ * self.uni_vfill)
|
|
263
|
-
|
|
264
|
-
|
|
265
|
+
|
|
265
266
|
nonmissing_height = available_height
|
|
266
267
|
if self.missing:
|
|
267
268
|
max_missing_height = max_missing_occ * self.bar_unit
|
|
268
|
-
missing_padding = (max(self.
|
|
269
|
+
missing_padding = (max(self.min_bar_height_unibar, max_missing_height) + Defaults.SPACE_ABOVE_MISSING)
|
|
269
270
|
nonmissing_height -= missing_padding
|
|
270
271
|
|
|
271
|
-
hbar_height = max(nonmissing_height * self.uni_vfill / max_num_categories, self.
|
|
272
|
+
hbar_height = max(nonmissing_height * self.uni_vfill / max_num_categories, self.min_bar_height_unibar)
|
|
272
273
|
|
|
273
274
|
# set bar_unit in unibars, set missing_padding in unibars, set hbar heights, set unibar widths
|
|
274
275
|
for uni in self.unibars:
|
|
@@ -284,18 +285,20 @@ class Figure:
|
|
|
284
285
|
# Determine ranges for unibars that should use same_scale
|
|
285
286
|
global_range = None
|
|
286
287
|
if same_scale:
|
|
287
|
-
#
|
|
288
|
-
|
|
288
|
+
# Take the min/max across the same_scale group column-by-column
|
|
289
|
+
# (vectorised) rather than gathering every value into one list.
|
|
290
|
+
global_min, global_max = None, None
|
|
289
291
|
for uni_name in same_scale:
|
|
290
|
-
|
|
291
|
-
numeric_vals
|
|
292
|
-
|
|
292
|
+
numeric_vals = pd.to_numeric(self.data_df[uni_name], errors="coerce").dropna()
|
|
293
|
+
if numeric_vals.empty:
|
|
294
|
+
continue
|
|
295
|
+
col_min, col_max = numeric_vals.min(), numeric_vals.max()
|
|
296
|
+
global_min = col_min if global_min is None else min(global_min, col_min)
|
|
297
|
+
global_max = col_max if global_max is None else max(global_max, col_max)
|
|
293
298
|
|
|
294
|
-
if
|
|
295
|
-
global_min, global_max = min(combined_vals), max(combined_vals)
|
|
299
|
+
if global_min is not None:
|
|
296
300
|
# Assign the same global range to all unibars in same_scale
|
|
297
|
-
|
|
298
|
-
global_range = (global_min, global_max)
|
|
301
|
+
global_range = (global_min, global_max)
|
|
299
302
|
|
|
300
303
|
# set variables so that same_scale variables align with each other.
|
|
301
304
|
# Only unibars whose display draws a *value-specific bar* at min/max
|
|
@@ -406,11 +409,9 @@ class Figure:
|
|
|
406
409
|
uni.draw(
|
|
407
410
|
ax,
|
|
408
411
|
rectangle_painter=rect_painter,
|
|
409
|
-
y_start=self.y_start,
|
|
410
|
-
y_end=self.y_end,
|
|
411
412
|
alpha=alpha,
|
|
412
413
|
)
|
|
413
|
-
|
|
414
|
+
|
|
414
415
|
return ax
|
|
415
416
|
|
|
416
417
|
def draw_connections(self, alpha, color, ax=None):
|
|
@@ -538,7 +539,11 @@ class Figure:
|
|
|
538
539
|
|
|
539
540
|
left_center_pts.append((lx, ly))
|
|
540
541
|
right_center_pts.append((rx, ry))
|
|
541
|
-
|
|
542
|
+
# min_bar_height_connectors floors the drawn thickness only (not the
|
|
543
|
+
# stacking math above), so a too-thin connector stays centred but renders
|
|
544
|
+
# visibly. The floor is absolute - independent of connector_fraction.
|
|
545
|
+
h = total_cnt * self.bar_unit * self.connector_fraction
|
|
546
|
+
heights.append(max(h, self.min_bar_height_connectors))
|
|
542
547
|
weights.append(wts)
|
|
543
548
|
|
|
544
549
|
if left_center_pts:
|
|
@@ -5,9 +5,10 @@ import matplotlib.pyplot as plt
|
|
|
5
5
|
from hammock_plot.figure import Figure
|
|
6
6
|
from hammock_plot.utils import Defaults
|
|
7
7
|
import numpy as np
|
|
8
|
-
from hammock_plot.utils import safe_numeric, validate_expression, resolve_ordering, assign_color_index, get_formatted_label
|
|
8
|
+
from hammock_plot.utils import safe_numeric, validate_expression, resolve_ordering, assign_color_index, get_formatted_label, clamp_unit
|
|
9
9
|
import warnings
|
|
10
10
|
|
|
11
|
+
|
|
11
12
|
class Hammock:
|
|
12
13
|
"""
|
|
13
14
|
Initializes a Hammock plot with dataframe
|
|
@@ -45,7 +46,8 @@ class Hammock:
|
|
|
45
46
|
label_options: dict = None,
|
|
46
47
|
height: float = 10,
|
|
47
48
|
width: float = 15,
|
|
48
|
-
|
|
49
|
+
min_bar_height_unibar: float = Defaults.MIN_BAR_HEIGHT_UNIBAR,
|
|
50
|
+
min_bar_height_connectors: float = Defaults.MIN_BAR_HEIGHT_CONNECTORS,
|
|
49
51
|
alpha: float = Defaults.ALPHA,
|
|
50
52
|
|
|
51
53
|
# Other
|
|
@@ -94,36 +96,20 @@ class Hammock:
|
|
|
94
96
|
f'The weight variable {weights} must be numeric.'
|
|
95
97
|
)
|
|
96
98
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
)
|
|
99
|
+
if (self.data_df[weights] <= 0).any():
|
|
100
|
+
raise ValueError(
|
|
101
|
+
f'There is a nonpositive variable in {weights}. This is not allowed.'
|
|
102
|
+
)
|
|
102
103
|
|
|
103
104
|
|
|
104
|
-
|
|
105
|
-
warnings.warn("uni_hfill < 0. Value has been clamped to 0.")
|
|
106
|
-
uni_hfill = 0
|
|
107
|
-
elif uni_hfill > 1:
|
|
108
|
-
warnings.warn("uni_hfill > 1. Value has been clamped to 1.")
|
|
109
|
-
uni_hfill = 1
|
|
105
|
+
uni_hfill = clamp_unit(uni_hfill, "uni_hfill")
|
|
110
106
|
|
|
111
107
|
if uni_hfill == 1:
|
|
112
108
|
warnings.warn("Tip: To leave a bit of a gap between the univariate bars, set uni_hfill to something close to 1 but not quite one (ex 0.9)")
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
elif uni_vfill > 1:
|
|
118
|
-
warnings.warn("uni_vfill > 1. Value has been clamped to 1.")
|
|
119
|
-
uni_vfill = 1
|
|
120
|
-
|
|
121
|
-
if alpha < 0:
|
|
122
|
-
warnings.warn("alpha < 0. Value has been clamped to 0.")
|
|
123
|
-
alpha = 0
|
|
124
|
-
elif alpha > 1:
|
|
125
|
-
warnings.warn("alpha > 1. Value has been clamped to 1.")
|
|
126
|
-
alpha = 1
|
|
109
|
+
|
|
110
|
+
uni_vfill = clamp_unit(uni_vfill, "uni_vfill")
|
|
111
|
+
|
|
112
|
+
alpha = clamp_unit(alpha, "alpha")
|
|
127
113
|
|
|
128
114
|
# drop missing values if missing values should not be plotted
|
|
129
115
|
if not missing:
|
|
@@ -144,7 +130,7 @@ class Hammock:
|
|
|
144
130
|
var_types[varname] = np.integer
|
|
145
131
|
else:
|
|
146
132
|
var_types[varname] = np.floating
|
|
147
|
-
elif pd.
|
|
133
|
+
elif isinstance(dtype, pd.CategoricalDtype) or pd.api.types.is_string_dtype(dtype):
|
|
148
134
|
var_types[varname] = np.str_
|
|
149
135
|
else:
|
|
150
136
|
raise RuntimeError("Invalid dtype detected - logic error in code. dtype: ", dtype)
|
|
@@ -461,7 +447,8 @@ class Hammock:
|
|
|
461
447
|
height=height,
|
|
462
448
|
uni_vfill=uni_vfill,
|
|
463
449
|
connector_fraction=connector_fraction,
|
|
464
|
-
|
|
450
|
+
min_bar_height_unibar=min_bar_height_unibar,
|
|
451
|
+
min_bar_height_connectors=min_bar_height_connectors,
|
|
465
452
|
uni_hfill=uni_hfill,
|
|
466
453
|
|
|
467
454
|
# Other
|
|
@@ -17,6 +17,45 @@ class FigureBase(ABC):
|
|
|
17
17
|
order = np.argsort(np.arctan2(y - y.mean(), x - x.mean()))
|
|
18
18
|
return x[order], y[order]
|
|
19
19
|
|
|
20
|
+
@staticmethod
|
|
21
|
+
def _floor_fractions(fracs, min_frac):
|
|
22
|
+
"""
|
|
23
|
+
Raise every non-zero colour fraction to at least ``min_frac`` by shrinking
|
|
24
|
+
the larger fractions to compensate, keeping the total at 1 (so the bar's
|
|
25
|
+
height and position are unchanged). Zero fractions stay zero - a colour
|
|
26
|
+
absent from this bar is never given a sliver. Used to keep each colour
|
|
27
|
+
segment at least ``min_bar_height`` tall even when a colour is a tiny
|
|
28
|
+
share of the bar. If the minimum cannot be met for every colour (the bar
|
|
29
|
+
is too short to fit them all), the segments are split equally as a best
|
|
30
|
+
effort.
|
|
31
|
+
"""
|
|
32
|
+
f = np.array(fracs, dtype=float)
|
|
33
|
+
nz = f > 0
|
|
34
|
+
k = int(nz.sum())
|
|
35
|
+
if k == 0 or min_frac <= 0:
|
|
36
|
+
return f
|
|
37
|
+
if min_frac * k >= 1.0:
|
|
38
|
+
out = np.zeros_like(f)
|
|
39
|
+
out[nz] = 1.0 / k
|
|
40
|
+
return out
|
|
41
|
+
out = f.copy()
|
|
42
|
+
# Water-filling: lift deficient segments to min_frac, draw the shortfall
|
|
43
|
+
# from segments still above min_frac in proportion to their surplus.
|
|
44
|
+
for _ in range(k + 1):
|
|
45
|
+
deficient = nz & (out < min_frac)
|
|
46
|
+
if not deficient.any():
|
|
47
|
+
break
|
|
48
|
+
out[deficient] = min_frac
|
|
49
|
+
shortfall = out.sum() - 1.0
|
|
50
|
+
donors = nz & (out > min_frac)
|
|
51
|
+
surplus = out[donors] - min_frac
|
|
52
|
+
total_surplus = surplus.sum()
|
|
53
|
+
if total_surplus <= 0:
|
|
54
|
+
out[nz] = 1.0 / k
|
|
55
|
+
break
|
|
56
|
+
out[donors] -= shortfall * (surplus / total_surplus)
|
|
57
|
+
return out
|
|
58
|
+
|
|
20
59
|
def plot(self, ax,
|
|
21
60
|
alpha: float,
|
|
22
61
|
left_center_pts: List[Tuple[float, float]],
|
|
@@ -27,7 +66,8 @@ class FigureBase(ABC):
|
|
|
27
66
|
orientation: str = "side-by-side",
|
|
28
67
|
zorder: int = 0,
|
|
29
68
|
check_overlap: bool = False,
|
|
30
|
-
unibar_name: str = None
|
|
69
|
+
unibar_name: str = None,
|
|
70
|
+
min_seg_height: float = 0.0):
|
|
31
71
|
"""
|
|
32
72
|
Draw polygons (rectangles or parallelograms) with segmented coloring.
|
|
33
73
|
|
|
@@ -77,6 +117,13 @@ class FigureBase(ABC):
|
|
|
77
117
|
right_top = np.array([poly_x[2], poly_y[2]])
|
|
78
118
|
right_bot = np.array([poly_x[3], poly_y[3]])
|
|
79
119
|
|
|
120
|
+
# Keep each colour segment at least min_seg_height tall (absolute),
|
|
121
|
+
# by trading height between segments within this fixed-height bar.
|
|
122
|
+
if min_seg_height > 0:
|
|
123
|
+
bar_h = abs(poly_y[1] - poly_y[0])
|
|
124
|
+
if bar_h > 0:
|
|
125
|
+
fracs = self._floor_fractions(fracs, min_seg_height / bar_h)
|
|
126
|
+
|
|
80
127
|
cum = 0.0
|
|
81
128
|
for f, col in zip(fracs, colors):
|
|
82
129
|
f0, f1 = cum, cum + f
|
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
# unibar.py
|
|
2
|
-
from typing import List
|
|
2
|
+
from typing import List
|
|
3
3
|
import numpy as np
|
|
4
4
|
import matplotlib.pyplot as plt
|
|
5
5
|
from hammock_plot.value import Value
|
|
6
6
|
from hammock_plot.utils import edge_color_from_face
|
|
7
7
|
from .utils import Defaults, get_formatted_label
|
|
8
|
-
from fractions import Fraction
|
|
9
8
|
from scipy.stats import gaussian_kde
|
|
10
9
|
|
|
11
10
|
class Unibar:
|
|
@@ -65,8 +64,6 @@ class Unibar:
|
|
|
65
64
|
Create Value objects for this unibar from self.df.
|
|
66
65
|
Each Value has total occurrences and breakdown by colour_index.
|
|
67
66
|
"""
|
|
68
|
-
uni_series = self.df[self.name]
|
|
69
|
-
counts = uni_series.value_counts()
|
|
70
67
|
values: List[Value] = []
|
|
71
68
|
|
|
72
69
|
dtype = self.val_type
|
|
@@ -77,34 +74,26 @@ class Unibar:
|
|
|
77
74
|
# Determine order
|
|
78
75
|
order = self.val_order
|
|
79
76
|
|
|
77
|
+
# Count occurrences per (value, colour) in one grouped pass rather than
|
|
78
|
+
# scanning the whole frame once per value. Rows = each value, columns =
|
|
79
|
+
# each colour index; weighted sums the weight column, else just counts.
|
|
80
|
+
grouped = self.df.groupby([self.name, "color_index"], observed=True)
|
|
81
|
+
if self.weights is None:
|
|
82
|
+
occ_table = grouped.size().unstack("color_index", fill_value=0)
|
|
83
|
+
else:
|
|
84
|
+
occ_table = grouped[self.weights].sum().unstack("color_index", fill_value=0)
|
|
85
|
+
occ_table = occ_table.reindex(columns=all_colors, fill_value=0)
|
|
86
|
+
|
|
80
87
|
for val in order:
|
|
81
|
-
#
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
cnt =
|
|
87
|
-
|
|
88
|
-
# assigns a list of # occurrences which corresponds to each of the highlight colours.
|
|
89
|
-
if cnt > 0:
|
|
90
|
-
subset = self.df[self.df[self.name] == val]
|
|
91
|
-
|
|
92
|
-
if self.weights is None:
|
|
93
|
-
occ_by_colour = (
|
|
94
|
-
subset["color_index"]
|
|
95
|
-
.value_counts()
|
|
96
|
-
.reindex(all_colors, fill_value=0)
|
|
97
|
-
.tolist()
|
|
98
|
-
)
|
|
99
|
-
else:
|
|
100
|
-
occ_by_colour = (
|
|
101
|
-
subset.groupby("color_index")[self.weights]
|
|
102
|
-
.sum()
|
|
103
|
-
.reindex(all_colors, fill_value=0)
|
|
104
|
-
.tolist()
|
|
105
|
-
)
|
|
88
|
+
# look up this value's per-colour occurrences. A value that's in the
|
|
89
|
+
# order but absent from the data (e.g. an empty same_scale slot) isn't
|
|
90
|
+
# in the table, so it gets all zeros.
|
|
91
|
+
if val in occ_table.index:
|
|
92
|
+
occ_by_colour = occ_table.loc[val].tolist()
|
|
93
|
+
cnt = sum(occ_by_colour)
|
|
106
94
|
else:
|
|
107
95
|
occ_by_colour = [0] * len(all_colors)
|
|
96
|
+
cnt = 0
|
|
108
97
|
|
|
109
98
|
# puts the constructed Value in a list associated with the Unibar.
|
|
110
99
|
values.append(Value(
|
|
@@ -128,6 +117,9 @@ class Unibar:
|
|
|
128
117
|
# sort values before separating missing and non-missing values
|
|
129
118
|
self._sort_values()
|
|
130
119
|
|
|
120
|
+
# id -> Value lookup so get_value_by_id doesn't rescan the list each call
|
|
121
|
+
self._values_by_id = {v.id: v for v in self.values}
|
|
122
|
+
|
|
131
123
|
# Separate missing and non-missing values
|
|
132
124
|
self.missing_vals = [v for v in self.values
|
|
133
125
|
if self.missing_placeholder is not None and str(v.id) == str(self.missing_placeholder)]
|
|
@@ -135,7 +127,7 @@ class Unibar:
|
|
|
135
127
|
|
|
136
128
|
|
|
137
129
|
def set_measurements(self, pos_x=None, width=None, bar_unit=None, missing_padding=None,
|
|
138
|
-
|
|
130
|
+
hbar_height=None):
|
|
139
131
|
if pos_x is not None:
|
|
140
132
|
self.pos_x = pos_x
|
|
141
133
|
if width is not None:
|
|
@@ -144,8 +136,6 @@ class Unibar:
|
|
|
144
136
|
self.bar_unit = bar_unit
|
|
145
137
|
if missing_padding is not None:
|
|
146
138
|
self.missing_padding = missing_padding
|
|
147
|
-
if scale_ypos is not None:
|
|
148
|
-
self.scale_ypos = scale_ypos
|
|
149
139
|
if hbar_height is not None:
|
|
150
140
|
self.hbar_height = hbar_height
|
|
151
141
|
|
|
@@ -300,8 +290,7 @@ class Unibar:
|
|
|
300
290
|
self.values.sort(key=lambda v: order_map.get(v.id, len(order_map)))
|
|
301
291
|
|
|
302
292
|
|
|
303
|
-
def draw(self, ax, alpha, rectangle_painter=None,
|
|
304
|
-
color="lightskyblue", y_start: int = None, y_end: int = None):
|
|
293
|
+
def draw(self, ax, alpha, rectangle_painter=None, color="lightskyblue"):
|
|
305
294
|
"""
|
|
306
295
|
Template Method for drawing a unibar:
|
|
307
296
|
1. Draw the background according to display_type
|
|
@@ -312,16 +301,16 @@ class Unibar:
|
|
|
312
301
|
|
|
313
302
|
# Step 1: Draw background based on display_type
|
|
314
303
|
if self.unibar:
|
|
315
|
-
self._draw_background(ax, rectangle_painter
|
|
304
|
+
self._draw_background(ax, rectangle_painter)
|
|
316
305
|
|
|
317
306
|
# Step 2: Draw labels
|
|
318
307
|
if self.label:
|
|
319
|
-
self._draw_labels(ax
|
|
308
|
+
self._draw_labels(ax)
|
|
320
309
|
|
|
321
310
|
return ax
|
|
322
311
|
|
|
323
312
|
# ---------- Template Method ----------
|
|
324
|
-
def _draw_background(self, ax, rectangle_painter
|
|
313
|
+
def _draw_background(self, ax, rectangle_painter):
|
|
325
314
|
"""
|
|
326
315
|
Template Method for drawing the backgrounds in a unibar
|
|
327
316
|
3 types of backgrounds:
|
|
@@ -373,64 +362,78 @@ class Unibar:
|
|
|
373
362
|
right_pts.append((self.pos_x + half_label_space, val.vert_centre))
|
|
374
363
|
weights.append(val.occ_by_colour)
|
|
375
364
|
|
|
376
|
-
rectangle_painter.plot(ax, self.alpha, left_pts, right_pts, heights, self.colors, weights, orientation=self.hi_box,zorder=1,
|
|
377
|
-
check_overlap=True, unibar_name=self.name)
|
|
378
|
-
|
|
365
|
+
rectangle_painter.plot(ax, self.alpha, left_pts, right_pts, heights, self.colors, weights, orientation=self.hi_box,zorder=1,
|
|
366
|
+
check_overlap=True, unibar_name=self.name, min_seg_height=self.min_bar_height)
|
|
367
|
+
|
|
379
368
|
if self.draw_white_dividers and len(values) > 1:
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
369
|
+
# each rectangle's edge is half its own bar height from its centre
|
|
370
|
+
half_heights = [h / 2 for h in heights]
|
|
371
|
+
self._draw_white_dividers(ax, values, rectangle_painter, half_heights, width)
|
|
372
|
+
|
|
373
|
+
def _draw_white_dividers(self, ax, values, rectangle_painter, half_heights, width):
|
|
374
|
+
"""
|
|
375
|
+
Draw thin white lines dividing adjacent bars (used when uni_vfill == 1).
|
|
376
|
+
half_heights[i] is the half-height of values[i], so each divider lands
|
|
377
|
+
midway between the top edge of one bar and the bottom edge of the next.
|
|
378
|
+
"""
|
|
379
|
+
divider_height = Defaults.WHITE_DIVIDER_HEIGHT
|
|
380
|
+
|
|
381
|
+
divider_left_pts = []
|
|
382
|
+
divider_right_pts = []
|
|
383
|
+
divider_heights = []
|
|
384
|
+
divider_weights = []
|
|
385
|
+
|
|
386
|
+
half_label_space = width / 2
|
|
387
|
+
|
|
388
|
+
for i in range(len(values) - 1):
|
|
389
|
+
top_of_i = values[i].vert_centre + half_heights[i]
|
|
390
|
+
bottom_of_next = values[i + 1].vert_centre - half_heights[i + 1]
|
|
391
|
+
divider_y = (top_of_i + bottom_of_next) / 2
|
|
392
|
+
|
|
393
|
+
divider_left_pts.append((self.pos_x - half_label_space, divider_y))
|
|
394
|
+
divider_right_pts.append((self.pos_x + half_label_space, divider_y))
|
|
395
|
+
divider_heights.append(divider_height)
|
|
396
|
+
|
|
397
|
+
# white divider bar (use 2D structure)
|
|
398
|
+
divider_weights.append([1])
|
|
399
|
+
|
|
400
|
+
rectangle_painter.plot(
|
|
401
|
+
ax,
|
|
402
|
+
alpha=1,
|
|
403
|
+
left_center_pts=divider_left_pts,
|
|
404
|
+
right_center_pts=divider_right_pts,
|
|
405
|
+
heights=divider_heights,
|
|
406
|
+
colors=["white"],
|
|
407
|
+
weights=divider_weights,
|
|
408
|
+
orientation=self.hi_box,
|
|
409
|
+
zorder=2, # slightly above bars
|
|
410
|
+
check_overlap=False
|
|
411
|
+
)
|
|
412
|
+
|
|
413
413
|
def _prepare_scaled_data(self, y_start, y_end):
|
|
414
414
|
"""
|
|
415
|
-
Collect the y-positions for the box/violin plots, split by colour
|
|
415
|
+
Collect the y-positions for the box/violin plots, split by colour, along
|
|
416
|
+
with the frequency sitting at each position.
|
|
416
417
|
|
|
417
418
|
Each value's number is mapped onto the [y_start, y_end] span, and that
|
|
418
|
-
position is recorded once per colour it appears in.
|
|
419
|
-
|
|
420
|
-
|
|
419
|
+
position is recorded once per colour it appears in. The matching entry in
|
|
420
|
+
weights_per_color carries how many observations sit there (occurrence
|
|
421
|
+
count, or weight-sum if a weights column is set), so the KDE and quantile
|
|
422
|
+
code can zip the two together entry for entry.
|
|
421
423
|
|
|
422
424
|
Args:
|
|
423
425
|
y_start, y_end: bottom and top of the drawable vertical span.
|
|
424
426
|
|
|
425
|
-
Returns (data_per_color, facecolors, edgecolors): the
|
|
426
|
-
colour, the fill colours, and their
|
|
427
|
-
if there are no non-missing values.
|
|
427
|
+
Returns (data_per_color, weights_per_color, facecolors, edgecolors): the
|
|
428
|
+
y-positions and their frequencies per colour, the fill colours, and their
|
|
429
|
+
matching edge colours. Empty lists if there are no non-missing values.
|
|
428
430
|
"""
|
|
429
431
|
if not self.non_missing_vals:
|
|
430
|
-
return [], [], []
|
|
432
|
+
return [], [], [], []
|
|
431
433
|
|
|
432
434
|
n_colors = len(self.colors)
|
|
433
435
|
data_per_color = [[] for _ in range(n_colors)]
|
|
436
|
+
weights_per_color = [[] for _ in range(n_colors)]
|
|
434
437
|
|
|
435
438
|
all_numeric_vals = [v.numeric for v in self.non_missing_vals]
|
|
436
439
|
min_val, max_val = self.range if self.range else (min(all_numeric_vals), max(all_numeric_vals))
|
|
@@ -448,35 +451,10 @@ class Unibar:
|
|
|
448
451
|
for i, occ in enumerate(occs):
|
|
449
452
|
if occ > 0:
|
|
450
453
|
data_per_color[i].append(scaled)
|
|
451
|
-
|
|
452
|
-
return data_per_color, self.colors, [edge_color_from_face(c) for c in self.colors]
|
|
453
|
-
|
|
454
|
-
def _prepare_weights(self, n_colors):
|
|
455
|
-
"""
|
|
456
|
-
Give the frequency of each y-position from _prepare_scaled_data.
|
|
457
|
-
|
|
458
|
-
Walks the values the same way that method does, but records the
|
|
459
|
-
occurrence count (or weight-sum, if a weights column is set) instead of
|
|
460
|
-
the position. The box/violin code zips the two together so the KDE and
|
|
461
|
-
quantiles know how many observations sit at each spot.
|
|
462
|
-
|
|
463
|
-
Args:
|
|
464
|
-
n_colors: number of colours to split the weights across.
|
|
465
|
-
|
|
466
|
-
Returns weights_per_color: a list of weights per colour, aligned with
|
|
467
|
-
_prepare_scaled_data's output.
|
|
468
|
-
"""
|
|
469
|
-
weights_per_color = [[] for _ in range(n_colors)]
|
|
470
|
-
for v in self.non_missing_vals:
|
|
471
|
-
occs = v.occ_by_colour
|
|
472
|
-
if len(occs) < n_colors:
|
|
473
|
-
occs = occs + [0] * (n_colors - len(occs))
|
|
474
|
-
for i, occ in enumerate(occs):
|
|
475
|
-
if occ > 0:
|
|
476
454
|
# if no weight column, occ is an integer count — use it directly as the weight
|
|
477
455
|
weights_per_color[i].append(float(occ))
|
|
478
456
|
|
|
479
|
-
return weights_per_color
|
|
457
|
+
return data_per_color, weights_per_color, self.colors, [edge_color_from_face(c) for c in self.colors]
|
|
480
458
|
|
|
481
459
|
def _weighted_quantile(self, data, weights, quantiles):
|
|
482
460
|
"""
|
|
@@ -519,8 +497,7 @@ class Unibar:
|
|
|
519
497
|
its occurrence count or weight-sum.
|
|
520
498
|
"""
|
|
521
499
|
|
|
522
|
-
data_per_color, facecolors, edgecolors = self._prepare_scaled_data(y_start, y_end)
|
|
523
|
-
weights_per_color = self._prepare_weights(len(self.colors))
|
|
500
|
+
data_per_color, weights_per_color, facecolors, edgecolors = self._prepare_scaled_data(y_start, y_end)
|
|
524
501
|
|
|
525
502
|
# ---- helpers ----
|
|
526
503
|
|
|
@@ -608,8 +585,7 @@ class Unibar:
|
|
|
608
585
|
return lst[1:] + lst[:1]
|
|
609
586
|
return lst
|
|
610
587
|
|
|
611
|
-
data_per_color, facecolors, edgecolors = self._prepare_scaled_data(y_start, y_end)
|
|
612
|
-
weights_per_color = self._prepare_weights(len(self.colors))
|
|
588
|
+
data_per_color, weights_per_color, facecolors, edgecolors = self._prepare_scaled_data(y_start, y_end)
|
|
613
589
|
|
|
614
590
|
n = len(data_per_color)
|
|
615
591
|
if n == 0:
|
|
@@ -699,7 +675,17 @@ class Unibar:
|
|
|
699
675
|
self.non_missing_vals,
|
|
700
676
|
rectangle_painter)
|
|
701
677
|
|
|
702
|
-
def
|
|
678
|
+
def _weighted_centre(self, weight_fn):
|
|
679
|
+
"""
|
|
680
|
+
Weighted mean of vert_centre across the non-missing values, weighting each
|
|
681
|
+
value by weight_fn(val). Returns None when the weights sum to zero.
|
|
682
|
+
"""
|
|
683
|
+
total = sum(weight_fn(val) for val in self.non_missing_vals)
|
|
684
|
+
if total == 0:
|
|
685
|
+
return None
|
|
686
|
+
return sum(val.vert_centre * weight_fn(val) for val in self.non_missing_vals) / total
|
|
687
|
+
|
|
688
|
+
def _draw_spiky_beanplot(self, ax, y_start, y_end, rectangle_painter):
|
|
703
689
|
# draw violin
|
|
704
690
|
self._draw_violin(ax, y_start, y_end, draw_boxplot=False)
|
|
705
691
|
|
|
@@ -730,11 +716,7 @@ class Unibar:
|
|
|
730
716
|
zorder=1)
|
|
731
717
|
|
|
732
718
|
# draw the mean line
|
|
733
|
-
|
|
734
|
-
mean_y = sum(
|
|
735
|
-
val.vert_centre * val.occurrences
|
|
736
|
-
for val in self.non_missing_vals
|
|
737
|
-
) / total_weight
|
|
719
|
+
mean_y = self._weighted_centre(lambda val: val.occurrences)
|
|
738
720
|
|
|
739
721
|
rectangle_painter.plot(ax, alpha=1,
|
|
740
722
|
left_center_pts=[(self.pos_x - self.width / 2, mean_y)],
|
|
@@ -798,13 +780,8 @@ class Unibar:
|
|
|
798
780
|
|
|
799
781
|
# draw the mean lines
|
|
800
782
|
# LEFT (highlighted)
|
|
801
|
-
|
|
802
|
-
if
|
|
803
|
-
l_mean_y = sum(
|
|
804
|
-
val.vert_centre * val.occ_by_colour[1]
|
|
805
|
-
for val in self.non_missing_vals
|
|
806
|
-
) / l_total
|
|
807
|
-
|
|
783
|
+
l_mean_y = self._weighted_centre(lambda val: val.occ_by_colour[1])
|
|
784
|
+
if l_mean_y is not None:
|
|
808
785
|
rectangle_painter.plot(
|
|
809
786
|
ax,
|
|
810
787
|
alpha=1,
|
|
@@ -817,13 +794,8 @@ class Unibar:
|
|
|
817
794
|
)
|
|
818
795
|
|
|
819
796
|
# RIGHT (non-highlighted)
|
|
820
|
-
|
|
821
|
-
if
|
|
822
|
-
r_mean_y = sum(
|
|
823
|
-
val.vert_centre * val.occ_by_colour[0]
|
|
824
|
-
for val in self.non_missing_vals
|
|
825
|
-
) / r_total
|
|
826
|
-
|
|
797
|
+
r_mean_y = self._weighted_centre(lambda val: val.occ_by_colour[0])
|
|
798
|
+
if r_mean_y is not None:
|
|
827
799
|
rectangle_painter.plot(
|
|
828
800
|
ax,
|
|
829
801
|
alpha=1,
|
|
@@ -866,42 +838,11 @@ class Unibar:
|
|
|
866
838
|
zorder=1)
|
|
867
839
|
|
|
868
840
|
if self.draw_white_dividers and len(values) > 1:
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
divider_right_pts = []
|
|
873
|
-
divider_heights = []
|
|
874
|
-
divider_weights = []
|
|
875
|
-
|
|
876
|
-
space_between_dividers = self.hbar_height / 2
|
|
877
|
-
|
|
878
|
-
for i in range(len(values) - 1):
|
|
879
|
-
top_of_i = values[i].vert_centre + space_between_dividers
|
|
880
|
-
bottom_of_next = values[i + 1].vert_centre - space_between_dividers
|
|
881
|
-
divider_y = (top_of_i + bottom_of_next) / 2
|
|
882
|
-
half_label_space = self.width / 2
|
|
883
|
-
|
|
884
|
-
divider_left_pts.append((self.pos_x - half_label_space, divider_y))
|
|
885
|
-
divider_right_pts.append((self.pos_x + half_label_space, divider_y))
|
|
886
|
-
divider_heights.append(divider_height)
|
|
887
|
-
|
|
888
|
-
# white divider bar (use 2D structure)
|
|
889
|
-
divider_weights.append([1])
|
|
890
|
-
|
|
891
|
-
rectangle_painter.plot(
|
|
892
|
-
ax,
|
|
893
|
-
alpha=1,
|
|
894
|
-
left_center_pts=divider_left_pts,
|
|
895
|
-
right_center_pts=divider_right_pts,
|
|
896
|
-
heights=divider_heights,
|
|
897
|
-
colors=["white"],
|
|
898
|
-
weights=divider_weights,
|
|
899
|
-
orientation=self.hi_box,
|
|
900
|
-
zorder=2, # slightly above bars
|
|
901
|
-
check_overlap=False
|
|
902
|
-
)
|
|
841
|
+
# bar charts draw every value at a constant hbar_height
|
|
842
|
+
half_heights = [self.hbar_height / 2] * len(values)
|
|
843
|
+
self._draw_white_dividers(ax, values, rectangle_painter, half_heights, self.width)
|
|
903
844
|
# ---------- Label Drawing ----------
|
|
904
|
-
def _draw_labels(self, ax
|
|
845
|
+
def _draw_labels(self, ax):
|
|
905
846
|
"""
|
|
906
847
|
Draws labels depending on the display type.
|
|
907
848
|
2 types of labels:
|
|
@@ -913,14 +854,14 @@ class Unibar:
|
|
|
913
854
|
if self.missing:
|
|
914
855
|
for mv in self.missing_vals:
|
|
915
856
|
# don't draw the labels if there are no missing values
|
|
916
|
-
if mv.occurrences > 0:
|
|
857
|
+
if mv.occurrences > 0:
|
|
917
858
|
# Place missing labels just above the bottom with missing_padding
|
|
918
859
|
ax.text(x, mv.vert_centre, self.missing_placeholder, ha='center', va='center', **(self.label_options or {}))
|
|
919
|
-
|
|
860
|
+
|
|
920
861
|
if self.label_type == "values":
|
|
921
862
|
self._draw_value_labels(ax) #draws labels directly according to the values
|
|
922
863
|
elif self.label_type == "levels":
|
|
923
|
-
self._draw_level_labels(ax
|
|
864
|
+
self._draw_level_labels(ax)
|
|
924
865
|
else:
|
|
925
866
|
raise ValueError(f"invalid label_type {self.label_type}")
|
|
926
867
|
|
|
@@ -934,7 +875,7 @@ class Unibar:
|
|
|
934
875
|
ax.text(self.pos_x, val.vert_centre, self._get_formatted_label(val.dtype, val.id), ha='center', va='center', **(self.label_options or {}))
|
|
935
876
|
|
|
936
877
|
# -------- Label drawing - levels (starting from y_start and ending at y_end) ------
|
|
937
|
-
def _draw_level_labels(self, ax
|
|
878
|
+
def _draw_level_labels(self, ax):
|
|
938
879
|
"""
|
|
939
880
|
2 ways to draw levels:
|
|
940
881
|
1. Display type == rug
|
|
@@ -995,10 +936,7 @@ class Unibar:
|
|
|
995
936
|
Returns a Value, given its id
|
|
996
937
|
Assumes that all ids are unique (true)
|
|
997
938
|
"""
|
|
998
|
-
|
|
999
|
-
if v.id == id:
|
|
1000
|
-
return v
|
|
1001
|
-
return None
|
|
939
|
+
return self._values_by_id.get(id)
|
|
1002
940
|
|
|
1003
941
|
def __repr__(self):
|
|
1004
942
|
return f"unibar(name={self.name!r}, x={self.pos_x:.2f}, nvals={len(self.values)})"
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import re
|
|
2
2
|
import colorsys
|
|
3
|
+
import warnings
|
|
3
4
|
import matplotlib.colors as mcolors
|
|
4
5
|
import pandas as pd
|
|
5
6
|
from typing import List, Dict, Any
|
|
@@ -20,7 +21,8 @@ class Defaults:
|
|
|
20
21
|
UNI_VFILL: float = 0.08 # default unibar vertical fill
|
|
21
22
|
CONNECTOR_FRACTION: float = 1 # default proportion fraction of connectors : vfill
|
|
22
23
|
UNI_HFILL: float = 0.3 # default horizontal fill
|
|
23
|
-
|
|
24
|
+
MIN_BAR_HEIGHT_UNIBAR: float = 0.15 # minimum drawn height of a unibar (and its colour segments)
|
|
25
|
+
MIN_BAR_HEIGHT_CONNECTORS: float = 0.12 # minimum drawn thickness of a connector; slightly less than the unibar floor
|
|
24
26
|
BAR_UNIT: float = 1.0 # default bar unit (how many pixels/obs.) is recalculated on init.
|
|
25
27
|
XMARGIN: float = 0.02 # margin on x axis
|
|
26
28
|
YMARGIN: float = 0.04 # margin on y axis
|
|
@@ -92,6 +94,17 @@ def safe_numeric(val):
|
|
|
92
94
|
except (ValueError, TypeError):
|
|
93
95
|
return val
|
|
94
96
|
|
|
97
|
+
def clamp_unit(value, name):
|
|
98
|
+
"""Clamp a 0-1 layout fraction, warning (with the parameter's name) when the
|
|
99
|
+
given value falls outside the range."""
|
|
100
|
+
if value < 0:
|
|
101
|
+
warnings.warn(f"{name} < 0. Value has been clamped to 0.")
|
|
102
|
+
return 0
|
|
103
|
+
elif value > 1:
|
|
104
|
+
warnings.warn(f"{name} > 1. Value has been clamped to 1.")
|
|
105
|
+
return 1
|
|
106
|
+
return value
|
|
107
|
+
|
|
95
108
|
def resolve_ordering(orders):
|
|
96
109
|
"""
|
|
97
110
|
Merge several category orderings (from variables in same_scale) into one
|
|
@@ -235,7 +248,39 @@ def assign_color_index(df: pd.DataFrame, var_list: List[str], hi_missing, missin
|
|
|
235
248
|
if v != hi_var:
|
|
236
249
|
continue
|
|
237
250
|
mask = df["color_index"] == 0
|
|
238
|
-
|
|
251
|
+
col = df.loc[mask, v]
|
|
252
|
+
if isinstance(hi_value, list):
|
|
253
|
+
# Fast path: a list of highlight values is a straight lookup.
|
|
254
|
+
# Build the same mapping _compute_color_index applies — first
|
|
255
|
+
# occurrence in hi_value wins (list.index semantics), exact match
|
|
256
|
+
# first, then a numeric-coercion fallback for non-string values.
|
|
257
|
+
buffer = 1 if hi_missing else 0
|
|
258
|
+
exact_map = {}
|
|
259
|
+
numeric_map = {}
|
|
260
|
+
for i, hv in enumerate(hi_value):
|
|
261
|
+
if hv not in exact_map:
|
|
262
|
+
exact_map[hv] = i + 1 + buffer
|
|
263
|
+
try:
|
|
264
|
+
fk = float(hv)
|
|
265
|
+
except (ValueError, TypeError):
|
|
266
|
+
continue
|
|
267
|
+
if fk not in numeric_map:
|
|
268
|
+
numeric_map[fk] = i + 1 + buffer
|
|
269
|
+
|
|
270
|
+
def _numeric_index(val):
|
|
271
|
+
# numeric fallback only applies to non-string, non-NaN values
|
|
272
|
+
if isinstance(val, str) or pd.isna(val):
|
|
273
|
+
return np.nan
|
|
274
|
+
try:
|
|
275
|
+
return numeric_map.get(float(val), np.nan)
|
|
276
|
+
except (ValueError, TypeError):
|
|
277
|
+
return np.nan
|
|
278
|
+
|
|
279
|
+
result = col.map(exact_map).fillna(col.map(_numeric_index))
|
|
280
|
+
df.loc[mask, "color_index"] = result.fillna(0).astype(int)
|
|
281
|
+
else:
|
|
282
|
+
# regex / numeric-range expressions stay on the row-wise path
|
|
283
|
+
df.loc[mask, "color_index"] = col.apply(lambda val: _compute_color_index(val, hi_missing, hi_value))
|
|
239
284
|
return df
|
|
240
285
|
|
|
241
286
|
def get_formatted_label(datatype, value):
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# value.py
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import List, Optional
|
|
3
3
|
import numpy as np
|
|
4
4
|
|
|
5
5
|
class Value:
|
|
@@ -13,7 +13,6 @@ class Value:
|
|
|
13
13
|
occurrences: how many times the value occurs in the unibar
|
|
14
14
|
occ_by_color: the # occurrences of each of the highlighted groups in the Value
|
|
15
15
|
vert_centre: the vertical coordinate of the Value's centre
|
|
16
|
-
next: the Values in the next unibar that this Value is associated with
|
|
17
16
|
numeric: the numeric value associated with the Value (if it is categorical, there is no numeric value associated.)
|
|
18
17
|
"""
|
|
19
18
|
self.dtype = dtype
|
|
@@ -22,7 +21,6 @@ class Value:
|
|
|
22
21
|
# occ_by_colour: [non_highlight_count, hi_count_1, hi_count_2, ...]
|
|
23
22
|
self.occ_by_colour = occ_by_colour if occ_by_colour is not None else [self.occurrences]
|
|
24
23
|
self.vert_centre: float = 0.0
|
|
25
|
-
self.next: Dict[str, int] = {}
|
|
26
24
|
if dtype != np.str_:
|
|
27
25
|
self.numeric = float(id)
|
|
28
26
|
else:
|
|
@@ -36,22 +34,6 @@ class Value:
|
|
|
36
34
|
self.vert_centre = float(centre)
|
|
37
35
|
return
|
|
38
36
|
|
|
39
|
-
def add_next(self, next_id: str, count: int = 1):
|
|
40
|
-
"""
|
|
41
|
-
Helper function to add the Values in the next unibar that this Value connects to
|
|
42
|
-
"""
|
|
43
|
-
self.next[next_id] = self.next.get(id, 0) + int(count)
|
|
44
|
-
|
|
45
|
-
def set_occurrences(self, total: int, occ_by_colour: Optional[List[int]] = None):
|
|
46
|
-
"""
|
|
47
|
-
Sets the number of occurrences of this Value.
|
|
48
|
-
"""
|
|
49
|
-
self.occurrences = int(total)
|
|
50
|
-
if occ_by_colour is not None:
|
|
51
|
-
self.occ_by_colour = [int(x) for x in occ_by_colour]
|
|
52
|
-
else:
|
|
53
|
-
self.occ_by_colour = [int(total)]
|
|
54
|
-
|
|
55
37
|
def __repr__(self):
|
|
56
38
|
"""
|
|
57
39
|
Debugging statement that print's Value's ID, number of occurrences, and the y-coordinate of the Value
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: hammock-plot
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: Hammock plot visualization for categorical and mixed categorical-continuous data
|
|
5
5
|
Author-email: Tiancheng Yang <t77yang@uwaterloo.ca>, Sandra Huang <sandra.huang@uwaterloo.ca>, Matthias Schonlau <schonlau@uwaterloo.ca>
|
|
6
6
|
License: MIT
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "hammock-plot"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.2.0"
|
|
8
8
|
description = "Hammock plot visualization for categorical and mixed categorical-continuous data"
|
|
9
9
|
readme = {text = "For the current project description, documentation, and examples, please see the GitHub repository: https://github.com/TianchengY/hammock_plot", content-type = "text/markdown"}
|
|
10
10
|
license = {text = "MIT"}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|