hammock-plot 1.1.0__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hammock-plot
3
- Version: 1.1.0
3
+ Version: 1.2.0
4
4
  Summary: Hammock plot visualization for categorical and mixed categorical-continuous data
5
5
  Author-email: Tiancheng Yang <t77yang@uwaterloo.ca>, Sandra Huang <sandra.huang@uwaterloo.ca>, Matthias Schonlau <schonlau@uwaterloo.ca>
6
6
  License: MIT
@@ -121,7 +121,7 @@ hammock = hammock_plot.Hammock(data_df = df)
121
121
  ax = hammock.plot(var=var,
122
122
  missing=True,
123
123
  numerical_var_levels={"sataces": None, "satcomm": None, "satrate": None},
124
- min_bar_height=0.2,
124
+ min_bar_height_unibar=0.2,
125
125
  uni_vfill=0.3)
126
126
  ```
127
127
 
@@ -265,7 +265,8 @@ ax = hammock.plot(
265
265
  | | `width` | `float` | Width of the plot in inches. Default is 15. Caution: Width too narrow may distort the plot. |
266
266
  | Other options | `shape` | `str` | Shape of the boxes. "rectangle" or "parallelogram". Default is "rectangle". |
267
267
  | | `same_scale` | `List[str]` | List of variables that have the same scale. Default is `None`. |
268
- | | `min_bar_height` | `float` | Minimal bar height of unibars (connectors are unchanged). Bars representing only a tiny fraction of the data may be so narrow, that they are invisible in a plot. The default value tries to ensure this does not happen. Default is 0.1.
268
+ | | `min_bar_height_unibar` | `float` | Minimal drawn height of a unibar. Bars representing only a tiny fraction of the data may be so narrow that they are invisible in a plot; this sets an absolute floor on their thickness. With `hi_box="stacked"`, each colour segment within a unibar is also kept at least this tall (by trading height with the larger segments, so the bar height and layout are unchanged), keeping a colour visible even when it is a tiny share of the bar. The default value tries to ensure this does not happen. Default is 0.15 (0.15% of the entire plot height).
269
+ | | `min_bar_height_connectors` | `float` | Minimal drawn thickness of a connector (independent of `connector_fraction`). Like `min_bar_height_unibar` but for the connectors between unibars. Default is 0.12 (0.12% of the entire plot height).
269
270
  | | `display_figure` | `bool` | Whether or not to display the figure. This can be useful if you just want to save the plots. Default is `True`. |
270
271
  | | `save_path` | `str` | If it is not `None`, the figure will be saved to the given path with given name and format. Default is `None`. |
271
272
  | | `violin_bw_method` | `str` or `float` | Specifies the bw method used to plot a violin plot. See https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.violinplot.html for more details. |
@@ -26,7 +26,7 @@ class Figure:
26
26
  var_types:
27
27
  - Dict of the types of each variable. Either: np.str_, np.floating, or np.integer
28
28
 
29
- numerical_var_levels, display_type, missing, missing_placeholder, label, unibar, hi_box, width, height, uni_vfill, connector_fraction, min_bar_height, uni_hfill, label_options, shape_type, same_scale, violin_bw_method: refer to README file
29
+ numerical_var_levels, display_type, missing, missing_placeholder, label, unibar, hi_box, width, height, uni_vfill, connector_fraction, min_bar_height_unibar, min_bar_height_connectors, uni_hfill, label_options, shape_type, same_scale, violin_bw_method: refer to README file
30
30
  """
31
31
  def __init__(self,
32
32
  # general
@@ -50,7 +50,8 @@ class Figure:
50
50
  height: float,
51
51
  uni_vfill: float,
52
52
  connector_fraction: float,
53
- min_bar_height: float,
53
+ min_bar_height_unibar: float,
54
+ min_bar_height_connectors: float,
54
55
  uni_hfill: float,
55
56
 
56
57
  # Other
@@ -79,7 +80,8 @@ class Figure:
79
80
  self.height = height # height of the entire plot
80
81
  self.uni_vfill = uni_vfill
81
82
  self.connector_fraction = connector_fraction
82
- self.min_bar_height = min_bar_height
83
+ self.min_bar_height_unibar = min_bar_height_unibar
84
+ self.min_bar_height_connectors = min_bar_height_connectors
83
85
  self.uni_hfill = uni_hfill
84
86
 
85
87
  self.label_options = label_options
@@ -155,7 +157,7 @@ class Figure:
155
157
  missing=self.missing,
156
158
  missing_placeholder=self.missing_placeholder,
157
159
  val_order=order,
158
- min_bar_height=self.min_bar_height,
160
+ min_bar_height=self.min_bar_height_unibar,
159
161
  colors=self.colors,
160
162
  hi_box=self.hi_box,
161
163
  display_type = uni_display_type,
@@ -235,7 +237,7 @@ class Figure:
235
237
 
236
238
  max_missing_height = max_missing_occ * self.bar_unit
237
239
 
238
- missing_padding = (max(self.min_bar_height, max_missing_height) + Defaults.SPACE_ABOVE_MISSING)
240
+ missing_padding = (max(self.min_bar_height_unibar, max_missing_height) + Defaults.SPACE_ABOVE_MISSING)
239
241
 
240
242
  # if there are horizontal bar charts, calculate the bar unit differently.
241
243
  max_num_categories = 0
@@ -250,7 +252,7 @@ class Figure:
250
252
  max_val_occ = max(max_val_occ, max(val.occurrences for val in uni.values))
251
253
  max_num_categories = max(max_num_categories, len(uni.non_missing_vals))
252
254
  if max_num_categories > 0:
253
- hbar_height = max(max_val_occ * self.bar_unit, self.min_bar_height)
255
+ hbar_height = max(max_val_occ * self.bar_unit, self.min_bar_height_unibar)
254
256
  # if the horizontal bar charts overlap
255
257
  available_height = (self.height - 2 * self.ymargin * self.height) * self.scale
256
258
 
@@ -260,15 +262,14 @@ class Figure:
260
262
  self.bar_unit = (available_height * self.uni_vfill) / (max_val_occ * max_num_categories)
261
263
  if self.missing:
262
264
  self.bar_unit = self.bar_unit / (1 + max_missing_occ * self.uni_vfill)
263
- max_missing_height = max_missing_occ * self.bar_unit
264
-
265
+
265
266
  nonmissing_height = available_height
266
267
  if self.missing:
267
268
  max_missing_height = max_missing_occ * self.bar_unit
268
- missing_padding = (max(self.min_bar_height, max_missing_height) + Defaults.SPACE_ABOVE_MISSING)
269
+ missing_padding = (max(self.min_bar_height_unibar, max_missing_height) + Defaults.SPACE_ABOVE_MISSING)
269
270
  nonmissing_height -= missing_padding
270
271
 
271
- hbar_height = max(nonmissing_height * self.uni_vfill / max_num_categories, self.min_bar_height)
272
+ hbar_height = max(nonmissing_height * self.uni_vfill / max_num_categories, self.min_bar_height_unibar)
272
273
 
273
274
  # set bar_unit in unibars, set missing_padding in unibars, set hbar heights, set unibar widths
274
275
  for uni in self.unibars:
@@ -284,18 +285,20 @@ class Figure:
284
285
  # Determine ranges for unibars that should use same_scale
285
286
  global_range = None
286
287
  if same_scale:
287
- # Collect all numeric values across the same_scale group
288
- combined_vals = []
288
+ # Take the min/max across the same_scale group column-by-column
289
+ # (vectorised) rather than gathering every value into one list.
290
+ global_min, global_max = None, None
289
291
  for uni_name in same_scale:
290
- uni_series = self.data_df[uni_name]
291
- numeric_vals = pd.to_numeric(uni_series, errors="coerce").dropna()
292
- combined_vals.extend(numeric_vals.tolist())
292
+ numeric_vals = pd.to_numeric(self.data_df[uni_name], errors="coerce").dropna()
293
+ if numeric_vals.empty:
294
+ continue
295
+ col_min, col_max = numeric_vals.min(), numeric_vals.max()
296
+ global_min = col_min if global_min is None else min(global_min, col_min)
297
+ global_max = col_max if global_max is None else max(global_max, col_max)
293
298
 
294
- if combined_vals:
295
- global_min, global_max = min(combined_vals), max(combined_vals)
299
+ if global_min is not None:
296
300
  # Assign the same global range to all unibars in same_scale
297
- for uni_name in same_scale:
298
- global_range = (global_min, global_max)
301
+ global_range = (global_min, global_max)
299
302
 
300
303
  # set variables so that same_scale variables align with each other.
301
304
  # Only unibars whose display draws a *value-specific bar* at min/max
@@ -406,11 +409,9 @@ class Figure:
406
409
  uni.draw(
407
410
  ax,
408
411
  rectangle_painter=rect_painter,
409
- y_start=self.y_start,
410
- y_end=self.y_end,
411
412
  alpha=alpha,
412
413
  )
413
-
414
+
414
415
  return ax
415
416
 
416
417
  def draw_connections(self, alpha, color, ax=None):
@@ -538,7 +539,11 @@ class Figure:
538
539
 
539
540
  left_center_pts.append((lx, ly))
540
541
  right_center_pts.append((rx, ry))
541
- heights.append(total_cnt * self.bar_unit * self.connector_fraction)
542
+ # min_bar_height_connectors floors the drawn thickness only (not the
543
+ # stacking math above), so a too-thin connector stays centred but renders
544
+ # visibly. The floor is absolute - independent of connector_fraction.
545
+ h = total_cnt * self.bar_unit * self.connector_fraction
546
+ heights.append(max(h, self.min_bar_height_connectors))
542
547
  weights.append(wts)
543
548
 
544
549
  if left_center_pts:
@@ -5,9 +5,10 @@ import matplotlib.pyplot as plt
5
5
  from hammock_plot.figure import Figure
6
6
  from hammock_plot.utils import Defaults
7
7
  import numpy as np
8
- from hammock_plot.utils import safe_numeric, validate_expression, resolve_ordering, assign_color_index, get_formatted_label
8
+ from hammock_plot.utils import safe_numeric, validate_expression, resolve_ordering, assign_color_index, get_formatted_label, clamp_unit
9
9
  import warnings
10
10
 
11
+
11
12
  class Hammock:
12
13
  """
13
14
  Initializes a Hammock plot with dataframe
@@ -45,7 +46,8 @@ class Hammock:
45
46
  label_options: dict = None,
46
47
  height: float = 10,
47
48
  width: float = 15,
48
- min_bar_height: float = Defaults.MIN_BAR_HEIGHT,
49
+ min_bar_height_unibar: float = Defaults.MIN_BAR_HEIGHT_UNIBAR,
50
+ min_bar_height_connectors: float = Defaults.MIN_BAR_HEIGHT_CONNECTORS,
49
51
  alpha: float = Defaults.ALPHA,
50
52
 
51
53
  # Other
@@ -94,36 +96,20 @@ class Hammock:
94
96
  f'The weight variable {weights} must be numeric.'
95
97
  )
96
98
 
97
- for idx, val in self.data_df[weights].items():
98
- if val <= 0:
99
- raise ValueError(
100
- f'There is a nonpositive variable in {weights}. This is not allowed.'
101
- )
99
+ if (self.data_df[weights] <= 0).any():
100
+ raise ValueError(
101
+ f'There is a nonpositive variable in {weights}. This is not allowed.'
102
+ )
102
103
 
103
104
 
104
- if uni_hfill < 0:
105
- warnings.warn("uni_hfill < 0. Value has been clamped to 0.")
106
- uni_hfill = 0
107
- elif uni_hfill > 1:
108
- warnings.warn("uni_hfill > 1. Value has been clamped to 1.")
109
- uni_hfill = 1
105
+ uni_hfill = clamp_unit(uni_hfill, "uni_hfill")
110
106
 
111
107
  if uni_hfill == 1:
112
108
  warnings.warn("Tip: To leave a bit of a gap between the univariate bars, set uni_hfill to something close to 1 but not quite one (ex 0.9)")
113
-
114
- if uni_vfill < 0:
115
- warnings.warn("uni_vfill < 0. Value has been clamped to 0.")
116
- uni_vfill = 0
117
- elif uni_vfill > 1:
118
- warnings.warn("uni_vfill > 1. Value has been clamped to 1.")
119
- uni_vfill = 1
120
-
121
- if alpha < 0:
122
- warnings.warn("alpha < 0. Value has been clamped to 0.")
123
- alpha = 0
124
- elif alpha > 1:
125
- warnings.warn("alpha > 1. Value has been clamped to 1.")
126
- alpha = 1
109
+
110
+ uni_vfill = clamp_unit(uni_vfill, "uni_vfill")
111
+
112
+ alpha = clamp_unit(alpha, "alpha")
127
113
 
128
114
  # drop missing values if missing values should not be plotted
129
115
  if not missing:
@@ -144,7 +130,7 @@ class Hammock:
144
130
  var_types[varname] = np.integer
145
131
  else:
146
132
  var_types[varname] = np.floating
147
- elif pd.api.types.is_categorical_dtype(dtype) or pd.api.types.is_string_dtype(dtype):
133
+ elif isinstance(dtype, pd.CategoricalDtype) or pd.api.types.is_string_dtype(dtype):
148
134
  var_types[varname] = np.str_
149
135
  else:
150
136
  raise RuntimeError("Invalid dtype detected - logic error in code. dtype: ", dtype)
@@ -461,7 +447,8 @@ class Hammock:
461
447
  height=height,
462
448
  uni_vfill=uni_vfill,
463
449
  connector_fraction=connector_fraction,
464
- min_bar_height=min_bar_height,
450
+ min_bar_height_unibar=min_bar_height_unibar,
451
+ min_bar_height_connectors=min_bar_height_connectors,
465
452
  uni_hfill=uni_hfill,
466
453
 
467
454
  # Other
@@ -17,6 +17,45 @@ class FigureBase(ABC):
17
17
  order = np.argsort(np.arctan2(y - y.mean(), x - x.mean()))
18
18
  return x[order], y[order]
19
19
 
20
+ @staticmethod
21
+ def _floor_fractions(fracs, min_frac):
22
+ """
23
+ Raise every non-zero colour fraction to at least ``min_frac`` by shrinking
24
+ the larger fractions to compensate, keeping the total at 1 (so the bar's
25
+ height and position are unchanged). Zero fractions stay zero - a colour
26
+ absent from this bar is never given a sliver. Used to keep each colour
27
+ segment at least ``min_bar_height`` tall even when a colour is a tiny
28
+ share of the bar. If the minimum cannot be met for every colour (the bar
29
+ is too short to fit them all), the segments are split equally as a best
30
+ effort.
31
+ """
32
+ f = np.array(fracs, dtype=float)
33
+ nz = f > 0
34
+ k = int(nz.sum())
35
+ if k == 0 or min_frac <= 0:
36
+ return f
37
+ if min_frac * k >= 1.0:
38
+ out = np.zeros_like(f)
39
+ out[nz] = 1.0 / k
40
+ return out
41
+ out = f.copy()
42
+ # Water-filling: lift deficient segments to min_frac, draw the shortfall
43
+ # from segments still above min_frac in proportion to their surplus.
44
+ for _ in range(k + 1):
45
+ deficient = nz & (out < min_frac)
46
+ if not deficient.any():
47
+ break
48
+ out[deficient] = min_frac
49
+ shortfall = out.sum() - 1.0
50
+ donors = nz & (out > min_frac)
51
+ surplus = out[donors] - min_frac
52
+ total_surplus = surplus.sum()
53
+ if total_surplus <= 0:
54
+ out[nz] = 1.0 / k
55
+ break
56
+ out[donors] -= shortfall * (surplus / total_surplus)
57
+ return out
58
+
20
59
  def plot(self, ax,
21
60
  alpha: float,
22
61
  left_center_pts: List[Tuple[float, float]],
@@ -27,7 +66,8 @@ class FigureBase(ABC):
27
66
  orientation: str = "side-by-side",
28
67
  zorder: int = 0,
29
68
  check_overlap: bool = False,
30
- unibar_name: str = None):
69
+ unibar_name: str = None,
70
+ min_seg_height: float = 0.0):
31
71
  """
32
72
  Draw polygons (rectangles or parallelograms) with segmented coloring.
33
73
 
@@ -77,6 +117,13 @@ class FigureBase(ABC):
77
117
  right_top = np.array([poly_x[2], poly_y[2]])
78
118
  right_bot = np.array([poly_x[3], poly_y[3]])
79
119
 
120
+ # Keep each colour segment at least min_seg_height tall (absolute),
121
+ # by trading height between segments within this fixed-height bar.
122
+ if min_seg_height > 0:
123
+ bar_h = abs(poly_y[1] - poly_y[0])
124
+ if bar_h > 0:
125
+ fracs = self._floor_fractions(fracs, min_seg_height / bar_h)
126
+
80
127
  cum = 0.0
81
128
  for f, col in zip(fracs, colors):
82
129
  f0, f1 = cum, cum + f
@@ -1,11 +1,10 @@
1
1
  # unibar.py
2
- from typing import List, Dict, Tuple, Optional
2
+ from typing import List
3
3
  import numpy as np
4
4
  import matplotlib.pyplot as plt
5
5
  from hammock_plot.value import Value
6
6
  from hammock_plot.utils import edge_color_from_face
7
7
  from .utils import Defaults, get_formatted_label
8
- from fractions import Fraction
9
8
  from scipy.stats import gaussian_kde
10
9
 
11
10
  class Unibar:
@@ -65,8 +64,6 @@ class Unibar:
65
64
  Create Value objects for this unibar from self.df.
66
65
  Each Value has total occurrences and breakdown by colour_index.
67
66
  """
68
- uni_series = self.df[self.name]
69
- counts = uni_series.value_counts()
70
67
  values: List[Value] = []
71
68
 
72
69
  dtype = self.val_type
@@ -77,34 +74,26 @@ class Unibar:
77
74
  # Determine order
78
75
  order = self.val_order
79
76
 
77
+ # Count occurrences per (value, colour) in one grouped pass rather than
78
+ # scanning the whole frame once per value. Rows = each value, columns =
79
+ # each colour index; weighted sums the weight column, else just counts.
80
+ grouped = self.df.groupby([self.name, "color_index"], observed=True)
81
+ if self.weights is None:
82
+ occ_table = grouped.size().unstack("color_index", fill_value=0)
83
+ else:
84
+ occ_table = grouped[self.weights].sum().unstack("color_index", fill_value=0)
85
+ occ_table = occ_table.reindex(columns=all_colors, fill_value=0)
86
+
80
87
  for val in order:
81
- # get number of occurrences of the value, and assign to the Value object.
82
- if self.weights is None:
83
- cnt = int(counts.get(val, 0))
84
- else:
85
- mask = self.df[self.name] == val
86
- cnt = self.df.loc[mask, self.weights].sum()
87
-
88
- # assigns a list of # occurrences which corresponds to each of the highlight colours.
89
- if cnt > 0:
90
- subset = self.df[self.df[self.name] == val]
91
-
92
- if self.weights is None:
93
- occ_by_colour = (
94
- subset["color_index"]
95
- .value_counts()
96
- .reindex(all_colors, fill_value=0)
97
- .tolist()
98
- )
99
- else:
100
- occ_by_colour = (
101
- subset.groupby("color_index")[self.weights]
102
- .sum()
103
- .reindex(all_colors, fill_value=0)
104
- .tolist()
105
- )
88
+ # look up this value's per-colour occurrences. A value that's in the
89
+ # order but absent from the data (e.g. an empty same_scale slot) isn't
90
+ # in the table, so it gets all zeros.
91
+ if val in occ_table.index:
92
+ occ_by_colour = occ_table.loc[val].tolist()
93
+ cnt = sum(occ_by_colour)
106
94
  else:
107
95
  occ_by_colour = [0] * len(all_colors)
96
+ cnt = 0
108
97
 
109
98
  # puts the constructed Value in a list associated with the Unibar.
110
99
  values.append(Value(
@@ -128,6 +117,9 @@ class Unibar:
128
117
  # sort values before separating missing and non-missing values
129
118
  self._sort_values()
130
119
 
120
+ # id -> Value lookup so get_value_by_id doesn't rescan the list each call
121
+ self._values_by_id = {v.id: v for v in self.values}
122
+
131
123
  # Separate missing and non-missing values
132
124
  self.missing_vals = [v for v in self.values
133
125
  if self.missing_placeholder is not None and str(v.id) == str(self.missing_placeholder)]
@@ -135,7 +127,7 @@ class Unibar:
135
127
 
136
128
 
137
129
  def set_measurements(self, pos_x=None, width=None, bar_unit=None, missing_padding=None,
138
- scale_ypos: Tuple[float, float] = None, hbar_height=None):
130
+ hbar_height=None):
139
131
  if pos_x is not None:
140
132
  self.pos_x = pos_x
141
133
  if width is not None:
@@ -144,8 +136,6 @@ class Unibar:
144
136
  self.bar_unit = bar_unit
145
137
  if missing_padding is not None:
146
138
  self.missing_padding = missing_padding
147
- if scale_ypos is not None:
148
- self.scale_ypos = scale_ypos
149
139
  if hbar_height is not None:
150
140
  self.hbar_height = hbar_height
151
141
 
@@ -300,8 +290,7 @@ class Unibar:
300
290
  self.values.sort(key=lambda v: order_map.get(v.id, len(order_map)))
301
291
 
302
292
 
303
- def draw(self, ax, alpha, rectangle_painter=None,
304
- color="lightskyblue", y_start: int = None, y_end: int = None):
293
+ def draw(self, ax, alpha, rectangle_painter=None, color="lightskyblue"):
305
294
  """
306
295
  Template Method for drawing a unibar:
307
296
  1. Draw the background according to display_type
@@ -312,16 +301,16 @@ class Unibar:
312
301
 
313
302
  # Step 1: Draw background based on display_type
314
303
  if self.unibar:
315
- self._draw_background(ax, rectangle_painter, y_start, y_end)
304
+ self._draw_background(ax, rectangle_painter)
316
305
 
317
306
  # Step 2: Draw labels
318
307
  if self.label:
319
- self._draw_labels(ax, y_start, y_end)
308
+ self._draw_labels(ax)
320
309
 
321
310
  return ax
322
311
 
323
312
  # ---------- Template Method ----------
324
- def _draw_background(self, ax, rectangle_painter, y_start, y_end):
313
+ def _draw_background(self, ax, rectangle_painter):
325
314
  """
326
315
  Template Method for drawing the backgrounds in a unibar
327
316
  3 types of backgrounds:
@@ -373,64 +362,78 @@ class Unibar:
373
362
  right_pts.append((self.pos_x + half_label_space, val.vert_centre))
374
363
  weights.append(val.occ_by_colour)
375
364
 
376
- rectangle_painter.plot(ax, self.alpha, left_pts, right_pts, heights, self.colors, weights, orientation=self.hi_box,zorder=1,
377
- check_overlap=True, unibar_name=self.name)
378
-
365
+ rectangle_painter.plot(ax, self.alpha, left_pts, right_pts, heights, self.colors, weights, orientation=self.hi_box,zorder=1,
366
+ check_overlap=True, unibar_name=self.name, min_seg_height=self.min_bar_height)
367
+
379
368
  if self.draw_white_dividers and len(values) > 1:
380
- divider_height = Defaults.WHITE_DIVIDER_HEIGHT
381
-
382
- divider_left_pts = []
383
- divider_right_pts = []
384
- divider_heights = []
385
- divider_weights = []
386
-
387
- for i in range(len(values) - 1):
388
- top_of_i = values[i].vert_centre + heights[i] / 2
389
- bottom_of_next = values[i + 1].vert_centre - heights[i + 1] / 2
390
- divider_y = (top_of_i + bottom_of_next) / 2
391
- half_label_space = width / 2
392
-
393
- divider_left_pts.append((self.pos_x - half_label_space, divider_y))
394
- divider_right_pts.append((self.pos_x + half_label_space, divider_y))
395
- divider_heights.append(divider_height)
396
-
397
- # white divider bar (use 2D structure)
398
- divider_weights.append([1])
399
-
400
- rectangle_painter.plot(
401
- ax,
402
- alpha=1,
403
- left_center_pts=divider_left_pts,
404
- right_center_pts=divider_right_pts,
405
- heights=divider_heights,
406
- colors=["white"],
407
- weights=divider_weights,
408
- orientation=self.hi_box,
409
- zorder=2, # slightly above bars
410
- check_overlap=False
411
- )
412
-
369
+ # each rectangle's edge is half its own bar height from its centre
370
+ half_heights = [h / 2 for h in heights]
371
+ self._draw_white_dividers(ax, values, rectangle_painter, half_heights, width)
372
+
373
+ def _draw_white_dividers(self, ax, values, rectangle_painter, half_heights, width):
374
+ """
375
+ Draw thin white lines dividing adjacent bars (used when uni_vfill == 1).
376
+ half_heights[i] is the half-height of values[i], so each divider lands
377
+ midway between the top edge of one bar and the bottom edge of the next.
378
+ """
379
+ divider_height = Defaults.WHITE_DIVIDER_HEIGHT
380
+
381
+ divider_left_pts = []
382
+ divider_right_pts = []
383
+ divider_heights = []
384
+ divider_weights = []
385
+
386
+ half_label_space = width / 2
387
+
388
+ for i in range(len(values) - 1):
389
+ top_of_i = values[i].vert_centre + half_heights[i]
390
+ bottom_of_next = values[i + 1].vert_centre - half_heights[i + 1]
391
+ divider_y = (top_of_i + bottom_of_next) / 2
392
+
393
+ divider_left_pts.append((self.pos_x - half_label_space, divider_y))
394
+ divider_right_pts.append((self.pos_x + half_label_space, divider_y))
395
+ divider_heights.append(divider_height)
396
+
397
+ # white divider bar (use 2D structure)
398
+ divider_weights.append([1])
399
+
400
+ rectangle_painter.plot(
401
+ ax,
402
+ alpha=1,
403
+ left_center_pts=divider_left_pts,
404
+ right_center_pts=divider_right_pts,
405
+ heights=divider_heights,
406
+ colors=["white"],
407
+ weights=divider_weights,
408
+ orientation=self.hi_box,
409
+ zorder=2, # slightly above bars
410
+ check_overlap=False
411
+ )
412
+
413
413
  def _prepare_scaled_data(self, y_start, y_end):
414
414
  """
415
- Collect the y-positions for the box/violin plots, split by colour.
415
+ Collect the y-positions for the box/violin plots, split by colour, along
416
+ with the frequency sitting at each position.
416
417
 
417
418
  Each value's number is mapped onto the [y_start, y_end] span, and that
418
- position is recorded once per colour it appears in. Repetition by count
419
- is left to _prepare_weights, which lines up with this output entry for
420
- entry.
419
+ position is recorded once per colour it appears in. The matching entry in
420
+ weights_per_color carries how many observations sit there (occurrence
421
+ count, or weight-sum if a weights column is set), so the KDE and quantile
422
+ code can zip the two together entry for entry.
421
423
 
422
424
  Args:
423
425
  y_start, y_end: bottom and top of the drawable vertical span.
424
426
 
425
- Returns (data_per_color, facecolors, edgecolors): the y-positions per
426
- colour, the fill colours, and their matching edge colours. Empty lists
427
- if there are no non-missing values.
427
+ Returns (data_per_color, weights_per_color, facecolors, edgecolors): the
428
+ y-positions and their frequencies per colour, the fill colours, and their
429
+ matching edge colours. Empty lists if there are no non-missing values.
428
430
  """
429
431
  if not self.non_missing_vals:
430
- return [], [], []
432
+ return [], [], [], []
431
433
 
432
434
  n_colors = len(self.colors)
433
435
  data_per_color = [[] for _ in range(n_colors)]
436
+ weights_per_color = [[] for _ in range(n_colors)]
434
437
 
435
438
  all_numeric_vals = [v.numeric for v in self.non_missing_vals]
436
439
  min_val, max_val = self.range if self.range else (min(all_numeric_vals), max(all_numeric_vals))
@@ -448,35 +451,10 @@ class Unibar:
448
451
  for i, occ in enumerate(occs):
449
452
  if occ > 0:
450
453
  data_per_color[i].append(scaled)
451
-
452
- return data_per_color, self.colors, [edge_color_from_face(c) for c in self.colors]
453
-
454
- def _prepare_weights(self, n_colors):
455
- """
456
- Give the frequency of each y-position from _prepare_scaled_data.
457
-
458
- Walks the values the same way that method does, but records the
459
- occurrence count (or weight-sum, if a weights column is set) instead of
460
- the position. The box/violin code zips the two together so the KDE and
461
- quantiles know how many observations sit at each spot.
462
-
463
- Args:
464
- n_colors: number of colours to split the weights across.
465
-
466
- Returns weights_per_color: a list of weights per colour, aligned with
467
- _prepare_scaled_data's output.
468
- """
469
- weights_per_color = [[] for _ in range(n_colors)]
470
- for v in self.non_missing_vals:
471
- occs = v.occ_by_colour
472
- if len(occs) < n_colors:
473
- occs = occs + [0] * (n_colors - len(occs))
474
- for i, occ in enumerate(occs):
475
- if occ > 0:
476
454
  # if no weight column, occ is an integer count — use it directly as the weight
477
455
  weights_per_color[i].append(float(occ))
478
456
 
479
- return weights_per_color
457
+ return data_per_color, weights_per_color, self.colors, [edge_color_from_face(c) for c in self.colors]
480
458
 
481
459
  def _weighted_quantile(self, data, weights, quantiles):
482
460
  """
@@ -519,8 +497,7 @@ class Unibar:
519
497
  its occurrence count or weight-sum.
520
498
  """
521
499
 
522
- data_per_color, facecolors, edgecolors = self._prepare_scaled_data(y_start, y_end)
523
- weights_per_color = self._prepare_weights(len(self.colors))
500
+ data_per_color, weights_per_color, facecolors, edgecolors = self._prepare_scaled_data(y_start, y_end)
524
501
 
525
502
  # ---- helpers ----
526
503
 
@@ -608,8 +585,7 @@ class Unibar:
608
585
  return lst[1:] + lst[:1]
609
586
  return lst
610
587
 
611
- data_per_color, facecolors, edgecolors = self._prepare_scaled_data(y_start, y_end)
612
- weights_per_color = self._prepare_weights(len(self.colors))
588
+ data_per_color, weights_per_color, facecolors, edgecolors = self._prepare_scaled_data(y_start, y_end)
613
589
 
614
590
  n = len(data_per_color)
615
591
  if n == 0:
@@ -699,7 +675,17 @@ class Unibar:
699
675
  self.non_missing_vals,
700
676
  rectangle_painter)
701
677
 
702
- def _draw_spiky_beanplot(self, ax, y_start, y_end, rectangle_painter, bins=14):
678
+ def _weighted_centre(self, weight_fn):
679
+ """
680
+ Weighted mean of vert_centre across the non-missing values, weighting each
681
+ value by weight_fn(val). Returns None when the weights sum to zero.
682
+ """
683
+ total = sum(weight_fn(val) for val in self.non_missing_vals)
684
+ if total == 0:
685
+ return None
686
+ return sum(val.vert_centre * weight_fn(val) for val in self.non_missing_vals) / total
687
+
688
+ def _draw_spiky_beanplot(self, ax, y_start, y_end, rectangle_painter):
703
689
  # draw violin
704
690
  self._draw_violin(ax, y_start, y_end, draw_boxplot=False)
705
691
 
@@ -730,11 +716,7 @@ class Unibar:
730
716
  zorder=1)
731
717
 
732
718
  # draw the mean line
733
- total_weight = sum(val.occurrences for val in self.non_missing_vals)
734
- mean_y = sum(
735
- val.vert_centre * val.occurrences
736
- for val in self.non_missing_vals
737
- ) / total_weight
719
+ mean_y = self._weighted_centre(lambda val: val.occurrences)
738
720
 
739
721
  rectangle_painter.plot(ax, alpha=1,
740
722
  left_center_pts=[(self.pos_x - self.width / 2, mean_y)],
@@ -798,13 +780,8 @@ class Unibar:
798
780
 
799
781
  # draw the mean lines
800
782
  # LEFT (highlighted)
801
- l_total = sum(val.occ_by_colour[1] for val in self.non_missing_vals)
802
- if l_total > 0:
803
- l_mean_y = sum(
804
- val.vert_centre * val.occ_by_colour[1]
805
- for val in self.non_missing_vals
806
- ) / l_total
807
-
783
+ l_mean_y = self._weighted_centre(lambda val: val.occ_by_colour[1])
784
+ if l_mean_y is not None:
808
785
  rectangle_painter.plot(
809
786
  ax,
810
787
  alpha=1,
@@ -817,13 +794,8 @@ class Unibar:
817
794
  )
818
795
 
819
796
  # RIGHT (non-highlighted)
820
- r_total = sum(val.occ_by_colour[0] for val in self.non_missing_vals)
821
- if r_total > 0:
822
- r_mean_y = sum(
823
- val.vert_centre * val.occ_by_colour[0]
824
- for val in self.non_missing_vals
825
- ) / r_total
826
-
797
+ r_mean_y = self._weighted_centre(lambda val: val.occ_by_colour[0])
798
+ if r_mean_y is not None:
827
799
  rectangle_painter.plot(
828
800
  ax,
829
801
  alpha=1,
@@ -866,42 +838,11 @@ class Unibar:
866
838
  zorder=1)
867
839
 
868
840
  if self.draw_white_dividers and len(values) > 1:
869
- divider_height = Defaults.WHITE_DIVIDER_HEIGHT
870
-
871
- divider_left_pts = []
872
- divider_right_pts = []
873
- divider_heights = []
874
- divider_weights = []
875
-
876
- space_between_dividers = self.hbar_height / 2
877
-
878
- for i in range(len(values) - 1):
879
- top_of_i = values[i].vert_centre + space_between_dividers
880
- bottom_of_next = values[i + 1].vert_centre - space_between_dividers
881
- divider_y = (top_of_i + bottom_of_next) / 2
882
- half_label_space = self.width / 2
883
-
884
- divider_left_pts.append((self.pos_x - half_label_space, divider_y))
885
- divider_right_pts.append((self.pos_x + half_label_space, divider_y))
886
- divider_heights.append(divider_height)
887
-
888
- # white divider bar (use 2D structure)
889
- divider_weights.append([1])
890
-
891
- rectangle_painter.plot(
892
- ax,
893
- alpha=1,
894
- left_center_pts=divider_left_pts,
895
- right_center_pts=divider_right_pts,
896
- heights=divider_heights,
897
- colors=["white"],
898
- weights=divider_weights,
899
- orientation=self.hi_box,
900
- zorder=2, # slightly above bars
901
- check_overlap=False
902
- )
841
+ # bar charts draw every value at a constant hbar_height
842
+ half_heights = [self.hbar_height / 2] * len(values)
843
+ self._draw_white_dividers(ax, values, rectangle_painter, half_heights, self.width)
903
844
  # ---------- Label Drawing ----------
904
- def _draw_labels(self, ax, y_start, y_end):
845
+ def _draw_labels(self, ax):
905
846
  """
906
847
  Draws labels depending on the display type.
907
848
  2 types of labels:
@@ -913,14 +854,14 @@ class Unibar:
913
854
  if self.missing:
914
855
  for mv in self.missing_vals:
915
856
  # don't draw the labels if there are no missing values
916
- if mv.occurrences > 0:
857
+ if mv.occurrences > 0:
917
858
  # Place missing labels just above the bottom with missing_padding
918
859
  ax.text(x, mv.vert_centre, self.missing_placeholder, ha='center', va='center', **(self.label_options or {}))
919
-
860
+
920
861
  if self.label_type == "values":
921
862
  self._draw_value_labels(ax) #draws labels directly according to the values
922
863
  elif self.label_type == "levels":
923
- self._draw_level_labels(ax, y_start, y_end)
864
+ self._draw_level_labels(ax)
924
865
  else:
925
866
  raise ValueError(f"invalid label_type {self.label_type}")
926
867
 
@@ -934,7 +875,7 @@ class Unibar:
934
875
  ax.text(self.pos_x, val.vert_centre, self._get_formatted_label(val.dtype, val.id), ha='center', va='center', **(self.label_options or {}))
935
876
 
936
877
  # -------- Label drawing - levels (starting from y_start and ending at y_end) ------
937
- def _draw_level_labels(self, ax, y_start, y_end):
878
+ def _draw_level_labels(self, ax):
938
879
  """
939
880
  2 ways to draw levels:
940
881
  1. Display type == rug
@@ -995,10 +936,7 @@ class Unibar:
995
936
  Returns a Value, given its id
996
937
  Assumes that all ids are unique (true)
997
938
  """
998
- for v in self.values:
999
- if v.id == id:
1000
- return v
1001
- return None
939
+ return self._values_by_id.get(id)
1002
940
 
1003
941
  def __repr__(self):
1004
942
  return f"unibar(name={self.name!r}, x={self.pos_x:.2f}, nvals={len(self.values)})"
@@ -1,5 +1,6 @@
1
1
  import re
2
2
  import colorsys
3
+ import warnings
3
4
  import matplotlib.colors as mcolors
4
5
  import pandas as pd
5
6
  from typing import List, Dict, Any
@@ -20,7 +21,8 @@ class Defaults:
20
21
  UNI_VFILL: float = 0.08 # default unibar vertical fill
21
22
  CONNECTOR_FRACTION: float = 1 # default proportion fraction of connectors : vfill
22
23
  UNI_HFILL: float = 0.3 # default horizontal fill
23
- MIN_BAR_HEIGHT: float = 0.15 # minimum bar height
24
+ MIN_BAR_HEIGHT_UNIBAR: float = 0.15 # minimum drawn height of a unibar (and its colour segments)
25
+ MIN_BAR_HEIGHT_CONNECTORS: float = 0.12 # minimum drawn thickness of a connector; slightly less than the unibar floor
24
26
  BAR_UNIT: float = 1.0 # default bar unit (how many pixels/obs.) is recalculated on init.
25
27
  XMARGIN: float = 0.02 # margin on x axis
26
28
  YMARGIN: float = 0.04 # margin on y axis
@@ -92,6 +94,17 @@ def safe_numeric(val):
92
94
  except (ValueError, TypeError):
93
95
  return val
94
96
 
97
+ def clamp_unit(value, name):
98
+ """Clamp a 0-1 layout fraction, warning (with the parameter's name) when the
99
+ given value falls outside the range."""
100
+ if value < 0:
101
+ warnings.warn(f"{name} < 0. Value has been clamped to 0.")
102
+ return 0
103
+ elif value > 1:
104
+ warnings.warn(f"{name} > 1. Value has been clamped to 1.")
105
+ return 1
106
+ return value
107
+
95
108
  def resolve_ordering(orders):
96
109
  """
97
110
  Merge several category orderings (from variables in same_scale) into one
@@ -235,7 +248,39 @@ def assign_color_index(df: pd.DataFrame, var_list: List[str], hi_missing, missin
235
248
  if v != hi_var:
236
249
  continue
237
250
  mask = df["color_index"] == 0
238
- df.loc[mask, "color_index"] = df.loc[mask, v].apply(lambda val: _compute_color_index(val, hi_missing, hi_value))
251
+ col = df.loc[mask, v]
252
+ if isinstance(hi_value, list):
253
+ # Fast path: a list of highlight values is a straight lookup.
254
+ # Build the same mapping _compute_color_index applies — first
255
+ # occurrence in hi_value wins (list.index semantics), exact match
256
+ # first, then a numeric-coercion fallback for non-string values.
257
+ buffer = 1 if hi_missing else 0
258
+ exact_map = {}
259
+ numeric_map = {}
260
+ for i, hv in enumerate(hi_value):
261
+ if hv not in exact_map:
262
+ exact_map[hv] = i + 1 + buffer
263
+ try:
264
+ fk = float(hv)
265
+ except (ValueError, TypeError):
266
+ continue
267
+ if fk not in numeric_map:
268
+ numeric_map[fk] = i + 1 + buffer
269
+
270
+ def _numeric_index(val):
271
+ # numeric fallback only applies to non-string, non-NaN values
272
+ if isinstance(val, str) or pd.isna(val):
273
+ return np.nan
274
+ try:
275
+ return numeric_map.get(float(val), np.nan)
276
+ except (ValueError, TypeError):
277
+ return np.nan
278
+
279
+ result = col.map(exact_map).fillna(col.map(_numeric_index))
280
+ df.loc[mask, "color_index"] = result.fillna(0).astype(int)
281
+ else:
282
+ # regex / numeric-range expressions stay on the row-wise path
283
+ df.loc[mask, "color_index"] = col.apply(lambda val: _compute_color_index(val, hi_missing, hi_value))
239
284
  return df
240
285
 
241
286
  def get_formatted_label(datatype, value):
@@ -1,5 +1,5 @@
1
1
  # value.py
2
- from typing import Dict, List, Optional
2
+ from typing import List, Optional
3
3
  import numpy as np
4
4
 
5
5
  class Value:
@@ -13,7 +13,6 @@ class Value:
13
13
  occurrences: how many times the value occurs in the unibar
14
14
  occ_by_color: the # occurrences of each of the highlighted groups in the Value
15
15
  vert_centre: the vertical coordinate of the Value's centre
16
- next: the Values in the next unibar that this Value is associated with
17
16
  numeric: the numeric value associated with the Value (if it is categorical, there is no numeric value associated.)
18
17
  """
19
18
  self.dtype = dtype
@@ -22,7 +21,6 @@ class Value:
22
21
  # occ_by_colour: [non_highlight_count, hi_count_1, hi_count_2, ...]
23
22
  self.occ_by_colour = occ_by_colour if occ_by_colour is not None else [self.occurrences]
24
23
  self.vert_centre: float = 0.0
25
- self.next: Dict[str, int] = {}
26
24
  if dtype != np.str_:
27
25
  self.numeric = float(id)
28
26
  else:
@@ -36,22 +34,6 @@ class Value:
36
34
  self.vert_centre = float(centre)
37
35
  return
38
36
 
39
- def add_next(self, next_id: str, count: int = 1):
40
- """
41
- Helper function to add the Values in the next unibar that this Value connects to
42
- """
43
- self.next[next_id] = self.next.get(id, 0) + int(count)
44
-
45
- def set_occurrences(self, total: int, occ_by_colour: Optional[List[int]] = None):
46
- """
47
- Sets the number of occurrences of this Value.
48
- """
49
- self.occurrences = int(total)
50
- if occ_by_colour is not None:
51
- self.occ_by_colour = [int(x) for x in occ_by_colour]
52
- else:
53
- self.occ_by_colour = [int(total)]
54
-
55
37
  def __repr__(self):
56
38
  """
57
39
  Debugging statement that print's Value's ID, number of occurrences, and the y-coordinate of the Value
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hammock-plot
3
- Version: 1.1.0
3
+ Version: 1.2.0
4
4
  Summary: Hammock plot visualization for categorical and mixed categorical-continuous data
5
5
  Author-email: Tiancheng Yang <t77yang@uwaterloo.ca>, Sandra Huang <sandra.huang@uwaterloo.ca>, Matthias Schonlau <schonlau@uwaterloo.ca>
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "hammock-plot"
7
- version = "1.1.0"
7
+ version = "1.2.0"
8
8
  description = "Hammock plot visualization for categorical and mixed categorical-continuous data"
9
9
  readme = {text = "For the current project description, documentation, and examples, please see the GitHub repository: https://github.com/TianchengY/hammock_plot", content-type = "text/markdown"}
10
10
  license = {text = "MIT"}
File without changes
File without changes