plot-misc 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. plot_misc/__init__.py +1 -0
  2. plot_misc/_version.py +1 -0
  3. plot_misc/barchart.py +523 -0
  4. plot_misc/constants.py +118 -0
  5. plot_misc/errors.py +328 -0
  6. plot_misc/example_data/__init__.py +1 -0
  7. plot_misc/example_data/example_datasets/bar_points.tsv.gz +0 -0
  8. plot_misc/example_data/example_datasets/barchart.tsv.gz +0 -0
  9. plot_misc/example_data/example_datasets/calibration_bins.tsv.gz +0 -0
  10. plot_misc/example_data/example_datasets/calibration_data.tsv.gz +0 -0
  11. plot_misc/example_data/example_datasets/forest_data.tsv.gz +0 -0
  12. plot_misc/example_data/example_datasets/group_bar.tsv.gz +0 -0
  13. plot_misc/example_data/example_datasets/heatmap_data.tsv.gz +0 -0
  14. plot_misc/example_data/example_datasets/incidence_matrix_data.tsv.gz +0 -0
  15. plot_misc/example_data/example_datasets/lollipop_data.tsv.gz +0 -0
  16. plot_misc/example_data/example_datasets/mace_associations.tsv.gz +0 -0
  17. plot_misc/example_data/example_datasets/net_benefit.tsv.gz +0 -0
  18. plot_misc/example_data/example_datasets/string_data.txt +1 -0
  19. plot_misc/example_data/example_datasets/volcano.tsv.gz +0 -0
  20. plot_misc/example_data/examples.py +637 -0
  21. plot_misc/forest.py +1478 -0
  22. plot_misc/heatmap.py +369 -0
  23. plot_misc/incidencematrix.py +394 -0
  24. plot_misc/machine_learning.py +1143 -0
  25. plot_misc/piechart.py +197 -0
  26. plot_misc/utils/__init__.py +1 -0
  27. plot_misc/utils/colour.py +171 -0
  28. plot_misc/utils/formatting.py +369 -0
  29. plot_misc/utils/utils.py +1151 -0
  30. plot_misc/volcano.py +203 -0
  31. plot_misc-2.0.2.dist-info/METADATA +107 -0
  32. plot_misc-2.0.2.dist-info/RECORD +35 -0
  33. plot_misc-2.0.2.dist-info/WHEEL +5 -0
  34. plot_misc-2.0.2.dist-info/licenses/LICENSE +18 -0
  35. plot_misc-2.0.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,394 @@
1
+ """
2
+ Incidence matrix plotting for categorical heatmaps and set visualisation.
3
+
4
+ This module provides a plotting function for drawing incidence matrices,
5
+ where each cell in a 2D grid is populated with a marker (dot) based on the
6
+ underlying matrix value. This is useful for visualising categorical
7
+ presence/absence patterns, binary annotations, or simplified heatmaps
8
+ without continuous shading.
9
+
10
+ The visual output is a grid of vertical and horizontal lines forming an
11
+ n-by-m lattice, with overlaid points coloured and sized according to
12
+ user-defined thresholds and formatting options.
13
+
14
+ Functions
15
+ ---------
16
+ draw_incidencematrix(data, fsize=(6,6), ...)
17
+ Draws a categorical incidence matrix, customising grid lines, dot styles,
18
+ and axis labels using a DataFrame as input.
19
+
20
+ Notes
21
+ -----
22
+ Each dot is rendered using `matplotlib.pyplot.scatter`, and horizontal/vertical
23
+ lines define the grid. The mapping of dot appearance to values is governed by
24
+ user-supplied breakpoints and style parameters. Optional keyword dictionaries
25
+ enable fine-grained customisation of scatter and line elements.
26
+ """
27
+
28
+ # importing
29
+ import pandas as pd
30
+ import numpy as np
31
+ import matplotlib.pyplot as plt
32
+ from matplotlib.transforms import Bbox
33
+ from typing import (
34
+ Any,
35
+ Literal,
36
+ )
37
+ from plot_misc.constants import (
38
+ NamesIncidenceMatrix as NamesIM,
39
+ Real,
40
+ )
41
+ from plot_misc.utils.utils import _update_kwargs
42
+ from plot_misc.errors import (
43
+ is_type,
44
+ is_df,
45
+ are_columns_in_df,
46
+ # same_len,
47
+ Error_MSG,
48
+ )
49
+
50
+ # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
51
+ def draw_incidencematrix(
52
+ data:pd.DataFrame, fsize:tuple[Real, Real]=(3,4),
53
+ dot_colour:list[tuple[str, Real]]=[('grey',0), ('black',1)],
54
+ dot_size:list[Real] | list[tuple[Real, Real]]=[4, 8],
55
+ dot_transparency:list[Real] | list[tuple[Real, Real]]=[0.9, 1.0],
56
+ line_colour:tuple[str, str]=('lightgrey', 'lightgrey'),
57
+ lw:tuple[float, float]=(0.3, 0.3),
58
+ tick_lab_size:tuple[float, float]=(4.5, 4.5),
59
+ tick_len:tuple[float,float]=(2.0, 2.0),
60
+ tick_w:tuple[float,float]=(0.3, 0.3),
61
+ margins:tuple[float,float] | None = None,
62
+ grid_position:Literal['outline', 'centre'] | None = 'centre',
63
+ ax:plt.Axes | None = None,
64
+ break_limits:tuple[float, float] = (-np.inf, np.inf),
65
+ size_data: pd.DataFrame | None = None,
66
+ transparency_data: pd.DataFrame | None = None,
67
+ kwargs_scatter_dict:dict[Any,Any] | None = None,
68
+ kwargs_vline_dict:dict[Any,Any] | None = None,
69
+ kwargs_hline_dict:dict[Any,Any] | None = None,
70
+ ) -> tuple[plt.Figure, plt.Axes]:
71
+ """
72
+ Draw a categorical heatmap to visualise an incidence matrix.
73
+
74
+ This function plots a square grid where each cell is populated
75
+ with a dot, based on the value in the input matrix. Dot colour, size,
76
+ and transparency are mapped to user-defined thresholds, allowing flexible
77
+ binary or ordinal heatmap-style visualisations for presence/absence or
78
+ category membership.
79
+
80
+ Parameters
81
+ ----------
82
+ data : `pd.DataFrame`
83
+ A matrix of shape (n_rows, n_columns). Index and column labels are used
84
+ for the y-axis and x-axis ticks, respectively. Values are mapped to
85
+ dot attributes based on `dot_colour`.
86
+ fsize : `tuple` [`float`, `float`], default (6.0, 6.0)
87
+ Width and height of the figure in inches.
88
+ dot_colour : `list` [`tuple` [`str`, `float`]], default [('grey', 0), ('black', 1)]
89
+ A list of (colour, upper bound) tuples defining dot appearance by value.
90
+ Each dot is coloured according to the first `cut` for which the value is
91
+ less than or equal to `cut` and greater than the previous break.
92
+
93
+ The default: [('grey',0), ('black',1)], colours dots grey for value in
94
+ (\\infinity, 0], and colours dots black for values in (0, 1].
95
+ line_colour : `tuple` [`str`, `str`], default ('lightgrey', 'lightgrey')
96
+ Colours of vertical and horizontal grid lines.
97
+ dot_size : `list` [`float`], default [4, 8]
98
+ Size of dots corresponding to each threshold in `dot_colour`. Can also
99
+ be supplied a list of tuple similar to `dot_colour`. The cut-offs
100
+ can be based on the `data` values or on a separately supplied
101
+ `size_data` of equal dimmension to `data`.
102
+ dot_transparency : `list` [`float`], default [0.9, 1.0]
103
+ Alpha transparency values for dots in each category. Can also be
104
+ supplied a list of tuple similar to `dot_colour`. The cut-offs
105
+ can be based on the `data` values or on a separately supplied
106
+ `transparency_data` of equal dimmension to `data`.
107
+ lw : tuple [`float`, `float`], default (0.3, 0.3)
108
+ Line width for vertical and horizontal grid lines.
109
+ tick_lab_size : `tuple` [`float`, `float`], default (4.5, 4.5)
110
+ Font size of x- and y-axis tick labels.
111
+ tick_len : `tuple` [`float`, `float`], default (2, 2)
112
+ Tick length for x- and y-axes.
113
+ tick_w : `tuple` [`float`, `float`], default (0.3, 0.3)
114
+ Tick width for x- and y-axes.
115
+ margins : `tuple` [`float`, `float`], optional
116
+ Margins to apply along the x- and y-axes.
117
+ grid_position : {'centre', 'outline'}, default 'centre'
118
+ Whether to draw lines through cell centres or between cells.
119
+ ax : `plt.axes` or `None`, default `None`
120
+ If provided, the plot is drawn on this axis. Otherwise, a new figure
121
+ and axis are created.
122
+ break_limits : `tuple` [`float`, `float`], default (-np.inf, np.inf)
123
+ Lower and upper bounds for the first and final break. Used to define
124
+ open-ended ranges in dot colouring. Currently only uses the lower
125
+ bound.
126
+ kwargs_*_dict : `dict` [`any`, `any`] or `None`, default None
127
+ Optional arguments supplied to the various plotting functions:
128
+ kwargs_scatter_dict --> ax.scatter
129
+ kwargs_vline_dict --> ax.vline
130
+ kwargs_hline_dict --> ax.hline
131
+
132
+ Returns
133
+ -------
134
+ fig : `matplotlib.figure.Figure`
135
+ The matplotlib figure containing the plot.
136
+ ax : `matplotlib.axes.Axes`
137
+ The axis containing the plotted incidence matrix.
138
+
139
+ Notes
140
+ -----
141
+ The appearance of the matrix is governed by the breakpoints defined in
142
+ `dot_colour`, and optionally `dot_size` and `dot_transparency. The latter
143
+ two can take a list of floats to be applied at the same cut-offs as
144
+ `dot_colour`. One can also provide fewer values than `len(dot_colour)`
145
+ for `dot_size` or `dot_transparency`, these are automatically broadcast.
146
+ A list with tuples can be used to define custom cut-points for size and
147
+ transparency.
148
+
149
+ Missing or non-matching entries in the input matrix will be ignored.
150
+ """
151
+ SHAPE_ERR = ('`data` and `{0}` should have the same shapes '
152
+ 'not: {1} and {2}, respectively.')
153
+ # check inputs
154
+ is_type(dot_size, list)
155
+ is_type(dot_colour, list)
156
+ is_type(dot_transparency, list)
157
+ is_type(ax,(type(None), plt.Axes))
158
+ is_df(data)
159
+ is_type(grid_position, (str, type(None)))
160
+ is_type(size_data, (type(None), pd.DataFrame))
161
+ is_type(transparency_data, (type(None), pd.DataFrame))
162
+ # check literals
163
+ EXP_GRID = [NamesIM.GRID_POS_B, NamesIM.GRID_POS_O]
164
+ if grid_position is not None and not grid_position in EXP_GRID:
165
+ raise ValueError(
166
+ Error_MSG.INVALID_STRING.format(
167
+ 'grid_position', EXP_GRID))
168
+ # make sure all the data have the same shape
169
+ if size_data is not None and data.shape != size_data.shape:
170
+ raise IndexError(
171
+ SHAPE_ERR.format('size_data', data.shape, size_data.shape
172
+ ))
173
+ if transparency_data is not None and data.shape != transparency_data.shape:
174
+ raise IndexError(
175
+ SHAPE_ERR.format('transparency_data', data.shape,
176
+ transparency_data.shape
177
+ ))
178
+ # transpose - hack to make the output match the input row,col and order.
179
+ data = data.iloc[::-1].T
180
+ if transparency_data is not None:
181
+ transparency_data = transparency_data.iloc[::-1].T
182
+ if size_data is not None:
183
+ size_data = size_data.iloc[::-1].T
184
+ # map None to dict
185
+ kwargs_scatter_dict = kwargs_scatter_dict or {}
186
+ kwargs_vline_dict = kwargs_vline_dict or {}
187
+ kwargs_hline_dict = kwargs_hline_dict or {}
188
+ # if one value is supplied, multiply the number of dot_colour elements
189
+ ndots = len(dot_colour)
190
+ if len(dot_size) == 1:
191
+ dot_size = dot_size * ndots
192
+ if len(dot_transparency) ==1:
193
+ dot_transparency = dot_transparency * ndots
194
+ # # further tests
195
+ # same_len(dot_colour, dot_size, ['dot_colour','dot_size'])
196
+ # same_len(dot_colour, dot_transparency, ['dot_colour','dot_transparency'])
197
+ # do we need to make an axis
198
+ if ax is None:
199
+ f, ax = plt.subplots(figsize=(fsize[0], fsize[1]))
200
+ else:
201
+ f = ax.figure
202
+ # get colour maps
203
+ dot_colours_arr = _map_attributes(data, dot_colour,
204
+ break_limits=break_limits)
205
+ # get size maps
206
+ size_input = size_data if size_data is not None else data
207
+ if all(isinstance(x, Real) for x in dot_size) == True:
208
+ new_dot_size = [(n, i[1]) for n, i in zip(dot_size, dot_colour)]
209
+ else:
210
+ new_dot_size = dot_size
211
+ dot_size_arr = _map_attributes(size_input, new_dot_size,
212
+ break_limits=break_limits)
213
+ # get transparency maps
214
+ transparency_input = (
215
+ transparency_data if transparency_data is not None else data)
216
+ if all(isinstance(x, Real) for x in dot_transparency) == True:
217
+ new_dot_transparency=\
218
+ [(n, i[1]) for n, i in zip(dot_transparency, dot_colour)]
219
+ else:
220
+ new_dot_transparency = dot_transparency
221
+ dot_transparency_arr = _map_attributes(
222
+ transparency_input, new_dot_transparency,
223
+ break_limits=break_limits)
224
+ # the x and y coordinates
225
+ M, N = data.shape
226
+ x, y = np.meshgrid(np.arange(M), np.arange(N))
227
+ xv = x.T.ravel()
228
+ yv = y.T.ravel()
229
+ col_flat = dot_colours_arr.ravel()
230
+ size_flat = dot_size_arr.ravel().astype(float)
231
+ alpha_flat = dot_transparency_arr.ravel().astype(float)
232
+ ################
233
+ # plot dots, size, and alpha
234
+ for col in np.unique(col_flat):
235
+ mask = col_flat == col
236
+ if not np.any(mask):
237
+ continue
238
+ # sort out kwargs
239
+ new_scatter_kwargs = _update_kwargs(update_dict=kwargs_scatter_dict,
240
+ edgecolor='black',
241
+ linewidths=0.0,
242
+ s=size_flat[mask],
243
+ alpha=alpha_flat[mask],
244
+ c=col, zorder=3,
245
+ )
246
+ ax.scatter(
247
+ xv[mask], yv[mask],
248
+ **new_scatter_kwargs,)
249
+ ################
250
+ # adding grid lines
251
+ if grid_position is not None:
252
+ # if grid_position is not None and grid_position == 'centre':
253
+ new_vline_kwargs = _update_kwargs(update_dict=kwargs_vline_dict,
254
+ c=line_colour[1], linestyle='-',
255
+ linewidth=lw[1], zorder=1,
256
+ )
257
+ _draw_grid(x, ax, axis = 'y', grid_position=grid_position,
258
+ **new_vline_kwargs)
259
+ new_hline_kwargs = _update_kwargs(update_dict=kwargs_hline_dict,
260
+ c=line_colour[0], linestyle='-',
261
+ linewidth=lw[0], zorder=1,
262
+ )
263
+ _draw_grid(x, ax, axis = 'x', grid_position=grid_position,
264
+ **new_hline_kwargs)
265
+ # ticks
266
+ ax.set(xticks=np.arange(x.shape[1]), yticks=np.arange(x.shape[0]),
267
+ xticklabels=data.index, yticklabels=data.columns)
268
+ ax.tick_params(axis="x", labelsize=tick_lab_size[0], length=tick_len[0],
269
+ width=tick_w[0], rotation=90)
270
+ ax.tick_params(axis="y", labelsize=tick_lab_size[1], length=tick_len[1],
271
+ width=tick_w[1])
272
+ # trim margin
273
+ if not margins is None:
274
+ ax.margins(x=margins[0], y=margins[1])
275
+ # return the figure and axes
276
+ return f, ax
277
+
278
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
279
+ def _map_attributes(data:pd.DataFrame, list_map: list[tuple[Any, Real]],
280
+ break_limits:tuple[float, float] = (-np.inf, np.inf),
281
+ ) -> np.ndarray:
282
+ """
283
+ Map values from a DataFrame to discrete attributes based on thresholds.
284
+
285
+ This function assigns each element in the input `data` a corresponding
286
+ attribute (e.g., colour, size, or alpha) using a list of thresholded
287
+ rules provided in `list_map`. Each rule is a tuple of the form
288
+ `(attribute_value, upper_bound)`, and values are mapped according to
289
+ which threshold interval they fall into.
290
+
291
+ Parameters
292
+ ----------
293
+ data : `pd.DataFrame`
294
+ A numeric matrix of shape (N, M) to be mapped.
295
+ list_map : `list` [`tuple` [`any`, `real`]]
296
+ A list of (attribute, upper_bound) pairs. Each value in the input
297
+ data is mapped to the `attribute` if it lies in the open-closed
298
+ interval (previous_bound, upper_bound]. The list is automatically
299
+ sorted by `upper_bound`.
300
+ break_limits : `tuple` [`float`, `float`], default (-np.inf, np.inf)
301
+ Tuple specifying the lower and upper bounds for the mapping. The first
302
+ threshold interval begins just above `break_limits[0]`. The upper bound
303
+ is not currently used, but is included for future expansion.
304
+
305
+ Returns
306
+ -------
307
+ np.ndarray
308
+ A NumPy array of the same shape as `data`, with each element replaced
309
+ by the mapped attribute.
310
+
311
+ Notes
312
+ -----
313
+ The rules are applied sequentially after sorting by `upper_bound`, and
314
+ values outside the defined breakpoints are assigned `np.nan`.
315
+
316
+ Examples
317
+ --------
318
+ >>> data = pd.DataFrame([[0.2, 0.6], [1.2, 2.5]])
319
+ >>> rules = [('grey', 0.5), ('black', 1.5), ('red', 3)]
320
+ >>> _map_attributes(data, rules)
321
+ array([['grey', 'black'],
322
+ ['black', 'red']], dtype=object)
323
+ """
324
+ # check input
325
+ is_type(list_map, list)
326
+ is_type(break_limits, tuple)
327
+ is_df(data)
328
+ # get values
329
+ vals = data.to_numpy()
330
+ # sortting the rule based on the second tuple element
331
+ rule_sorted = sorted(list_map, key=lambda x: x[1])
332
+ # apply rule
333
+ mapped_vals = np.full_like(vals, np.nan, dtype=object)
334
+ cut_low = break_limits[0]
335
+ for col, cut_high in rule_sorted:
336
+ sel = (vals > cut_low) & (vals <= cut_high)
337
+ mapped_vals[sel] = col
338
+ cut_low = cut_high
339
+ # return
340
+ return mapped_vals
341
+
342
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
343
+ def _draw_grid(arr:np.ndarray, ax:plt.Axes,
344
+ axis:Literal['x','y','both'] = 'both',
345
+ grid_position: Literal['centre', 'outline'] = 'centre',
346
+ **kwargs,) -> None:
347
+ """
348
+ Draws grid lines across or around the provide coordinates.
349
+
350
+ Parameters
351
+ ----------
352
+ arr : `np.ndarray`
353
+ A 2D array off coordinates.
354
+ ax : `matplotlib.axes.Axes`
355
+ The axis on which to draw the grid lines.
356
+ axis : {'x', 'y', 'both'}, default 'both'
357
+ Which axis to draw grid lines for.
358
+ grid_position : {'centre', 'outline'}, default 'centre'
359
+ Whether to draw lines through cell centres or between cells.
360
+ **kwargs
361
+ Additional keyword arguments passed to `ax.axvline` and/or
362
+ `ax.axhline`.
363
+
364
+ Returns
365
+ -------
366
+ None
367
+ """
368
+ # check input
369
+ is_type(arr, np.ndarray)
370
+ is_type(ax,plt.Axes)
371
+ EXP_GRID = [NamesIM.GRID_POS_B, NamesIM.GRID_POS_O]
372
+ if grid_position is not None and not grid_position in EXP_GRID:
373
+ raise ValueError(
374
+ Error_MSG.INVALID_STRING.format(
375
+ 'grid_position', EXP_GRID))
376
+ EXP_AXIS = [NamesIM.AXIS_X, NamesIM.AXIS_Y, NamesIM.AXIS_B]
377
+ if not axis in EXP_AXIS:
378
+ raise ValueError(
379
+ Error_MSG.INVALID_STRING.format(
380
+ 'axis', EXP_AXIS))
381
+ # what type of grid - the first type will plot across the dot centers
382
+ # the second type will place the grid around the dots.
383
+ x_vals = (np.arange(arr.shape[1]) if grid_position == 'centre'
384
+ else np.arange(-0.5, arr.shape[1], 1.0))
385
+ y_vals = (np.arange(arr.shape[0]) if grid_position == 'centre'
386
+ else np.arange(-0.5, arr.shape[0], 1.0))
387
+ # finally set grid
388
+ if axis in ['x', 'both']:
389
+ for xv in x_vals:
390
+ ax.axvline(x=xv, **kwargs)
391
+ if axis in ['y', 'both']:
392
+ for xy in y_vals:
393
+ ax.axhline(y=xy, **kwargs)
394
+