densitty 0.8.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
densitty/binning.py CHANGED
@@ -3,14 +3,37 @@
3
3
  import math
4
4
  from bisect import bisect_right
5
5
  from decimal import Decimal
6
+ from fractions import Fraction
6
7
  from typing import Optional, Sequence
7
8
 
8
9
  from .axis import Axis
9
10
  from .util import FloatLike, ValueRange
10
- from .util import clamp, decimal_value_range, most_round, round_up_ish
11
+ from .util import clamp, make_decimal, make_value_range, most_round, round_up_ish
11
12
 
13
+ # Following MatPlotLib, the 'bins' argument for functions can be:
14
+ # int: number of bins for both X and Y
15
+ # Sequence[FloatLike]: bin edges for both X and Y
16
+ # tuple(int, int): number of bins for X, number of bins for Y
17
+ # tuple(Sequence[FloatLike], Sequence[FloatLike]): bin edges for X, bin edges for Y
12
18
 
13
- def bin_edges(
19
+ CountArg = int
20
+ EdgesArg = Sequence[FloatLike]
21
+ # a type for the "for both X and Y" variants:
22
+ SingleBinsArg = CountArg | EdgesArg
23
+
24
+ # a type for the tuple (X,Y) variants:
25
+ DoubleCountArg = tuple[CountArg, CountArg]
26
+ DoubleEdgesArg = tuple[EdgesArg, EdgesArg]
27
+ ExpandedBinsArg = DoubleCountArg | DoubleEdgesArg
28
+
29
+ FullBinsArg = Optional[SingleBinsArg | ExpandedBinsArg]
30
+
31
+ RangesArg = tuple[Optional[ValueRange], Optional[ValueRange]]
32
+
33
+ DEFAULT_NUM_BINS = (10, 10)
34
+
35
+
36
+ def bin_by_edges(
14
37
  points: Sequence[tuple[FloatLike, FloatLike]],
15
38
  x_edges: Sequence[FloatLike],
16
39
  y_edges: Sequence[FloatLike],
@@ -45,17 +68,42 @@ def calc_value_range(values: Sequence[FloatLike]) -> ValueRange:
45
68
  """Calculate a value range from data values"""
46
69
  if not values:
47
70
  # Could raise an exception here, but for now just return _something_
48
- return ValueRange(0, 1)
71
+ return make_value_range((0, 1))
49
72
 
50
73
  # bins are closed on left and open on right: i.e. left_edge <= values < right_edge
51
74
  # so, the right-most bin edge needs to be larger than the largest data value:
52
- max_value = max(values)
53
- range_top = max_value + math.ulp(max_value) # increase by smallest representable amount
54
- return ValueRange(min(values), range_top)
55
-
56
-
57
- def pick_edges(
58
- num_bins: int,
75
+ min_value = make_decimal(min(values))
76
+ max_value = make_decimal(max(values))
77
+
78
+ range_top = max_value + Decimal(
79
+ math.ulp(max_value)
80
+ ) # increase by smallest float-representable amount
81
+ return ValueRange(min_value, range_top)
82
+
83
+
84
+ def align_value_range(vr: ValueRange, alignment_arg: FloatLike) -> ValueRange:
85
+ """Shift the provided ValueRange up or down to the specified alignment.
86
+ up/down choice based on which will shift it less"""
87
+ alignment = make_decimal(alignment_arg)
88
+ width = vr.max - vr.min
89
+ aligned_min = math.floor(vr.min / alignment) * alignment
90
+ aligned_max = math.ceil(vr.max / alignment) * alignment
91
+ shift_for_min = vr.min - aligned_min # how far down did 'min' get shifted?
92
+ shift_for_max = aligned_max - vr.max # how far up did 'max' get shifted?
93
+ if shift_for_min < shift_for_max:
94
+ return ValueRange(aligned_min, aligned_min + width)
95
+ return ValueRange(aligned_max - width, aligned_max)
96
+
97
+
98
+ def force_value_range_width(vr: ValueRange, width: FloatLike) -> ValueRange:
99
+ """Return a ValueRange with specified width, centered on an existing ValueRange"""
100
+ half_width = make_decimal(width) / 2
101
+ midpoint = (vr.max + vr.min) / 2
102
+ return ValueRange(midpoint - half_width, midpoint + half_width)
103
+
104
+
105
+ def segment_interval(
106
+ num_outputs: int,
59
107
  value_range: ValueRange,
60
108
  align=True,
61
109
  ) -> Sequence[FloatLike]:
@@ -64,49 +112,63 @@ def pick_edges(
64
112
  Parameters
65
113
  ----------
66
114
  values: Sequence of data values
67
- num_bins: int
68
- Number of bins to partition into
115
+ num_outputs: int
116
+ Number of output values
69
117
  value_range: ValueRange
70
- Min/Max of the values to be binned
118
+ Min/Max of the output values
71
119
  align: bool
72
120
  Adjust the range somewhat to put bin size & edges on "round" values
73
121
  """
74
- value_range = decimal_value_range(value_range) # coerce into Decimal if not already
122
+ value_range = make_value_range(value_range) # coerce into Decimal if not already
123
+ assert isinstance(value_range.min, Decimal) # make the type-checker happy
124
+ assert isinstance(value_range.max, Decimal)
125
+ num_steps = num_outputs - 1
75
126
 
76
- min_step_size = (value_range.max - value_range.min) / num_bins
127
+ min_step_size = (value_range.max - value_range.min) / num_steps
77
128
  if align:
78
129
  step_size = round_up_ish(min_step_size)
79
- first_edge = math.floor(Decimal(value_range.min) / step_size) * step_size
80
- if first_edge + num_bins * step_size < value_range.max:
130
+ first_edge = math.floor(Fraction(value_range.min) / step_size) * step_size
131
+ if first_edge + num_steps * step_size < value_range.max:
81
132
  # Uh oh: even though we rounded up the bin size, shifting the first edge
82
133
  # down to a multiple has shifted the last edge down too far. Bump up the step size:
83
- step_size = round_up_ish(step_size * Decimal(1.015625))
84
- first_edge = math.floor(Decimal(value_range.min) / step_size) * step_size
134
+ step_size = round_up_ish(step_size * Fraction(65, 64))
135
+ first_edge = math.floor(Fraction(value_range.min) / step_size) * step_size
85
136
  # we now have a round step size, and a first edge that the highest possible multiple of it
86
137
  # Test to see if any lower multiples of it will still include the whole ranges,
87
138
  # and be "nicer" i.e. if data is all in 1.1..9.5 range with 10 bins, we now have bins
88
139
  # covering 1-11, but could have 0-10
89
- last_edge = first_edge + step_size * num_bins
90
- num_trials = int((last_edge - value_range.max) // step_size + 1)
91
- offsets = (step_size * i for i in range(num_trials))
92
- edge_pairs = ((first_edge - offset, last_edge - offset) for offset in offsets)
93
- first_edge = most_round(edge_pairs)[0]
94
-
140
+ last_edge = first_edge + step_size * num_steps
141
+ edge_pairs = []
142
+ max_step_slop = int((last_edge - Fraction(value_range.max)) // step_size)
143
+ for step_shift in range(-max_step_slop, 1):
144
+ for end_step_shift in range(-max_step_slop, step_shift + 1):
145
+ edge_pairs += [
146
+ (first_edge + step_shift * step_size, last_edge + end_step_shift * step_size)
147
+ ]
148
+ first_edge, last_edge = most_round(edge_pairs)
95
149
  else:
96
150
  step_size = min_step_size
97
151
  first_edge = value_range.min
152
+ last_edge = value_range.max
153
+
154
+ stepped_values = tuple(first_edge + step_size * i for i in range(num_outputs))
98
155
 
99
- num_edges = num_bins + 1
100
- return tuple(first_edge + step_size * i for i in range(num_edges))
156
+ # The values may have overrun the end of the desired output range. Trim if so:
157
+ return tuple(v for v in stepped_values if v <= last_edge)
101
158
 
102
159
 
103
- def edge_range(start: FloatLike, end: FloatLike, step: FloatLike, align: bool):
104
- """Similar to range/np.arange, but includes "end" in the output if appropriate"""
160
+ def edge_range(rng: ValueRange, step_arg: FloatLike, align: bool):
161
+ """Generator providing values containing range, by step.
162
+ The first value will be rng.min, or rng.min rounded down to nearest 'step'
163
+ The last value will be equal to or larger than rng.max"""
164
+
165
+ step = make_decimal(step_arg) # turn into decimal if it isn't already
105
166
  if align:
106
- v = math.floor(start / step) * step
167
+ v = math.floor(rng.min / step) * step
107
168
  else:
108
- v = start
109
- while v < end + step:
169
+ v = rng.min
170
+
171
+ while v < (rng.max + step).next_minus():
110
172
  if align:
111
173
  yield round(v / step) * step
112
174
  else:
@@ -114,80 +176,103 @@ def edge_range(start: FloatLike, end: FloatLike, step: FloatLike, align: bool):
114
176
  v += step
115
177
 
116
178
 
117
- def bin_with_size(
118
- points: Sequence[tuple[FloatLike, FloatLike]],
119
- bin_sizes: FloatLike | tuple[FloatLike, FloatLike],
120
- ranges: Optional[tuple[ValueRange, ValueRange]] = None,
121
- align=True,
122
- drop_outside=True,
123
- **axis_args,
124
- ) -> tuple[Sequence[Sequence[int]], Axis, Axis]:
125
- """Bin points into a 2-D histogram, given bin sizes
179
+ def make_edges(rng: ValueRange, step_arg: FloatLike, align: bool):
180
+ """Return the edges as from 'edge_range', as a tuple for convenience"""
181
+ return tuple(edge_range(rng, step_arg, align))
126
182
 
127
- Parameters
128
- ----------
129
- points: Sequence of (X,Y) tuples: the points to bin
130
- bin_sizes: float or tuple(float, float)
131
- Size(s) of (X,Y) bins to partition into
132
- ranges: Optional (ValueRange, ValueRange)
133
- ((x_min, x_max), (y_min, y_max)) for the bins. Default: take from data.
134
- align: bool (default: True)
135
- Force bin edges to be at a multiple of the bin size
136
- drop_outside: bool (default: True)
137
- True: Drop any data points outside the ranges
138
- False: Put any outside points in closest bin (i.e. edge bins include outliers)
139
- axis_args: Extra arguments to pass through to Axis constructor
140
183
 
141
- returns: Sequence[Sequence[int]], (x-)Axis, (y-)Axis
184
+ def expand_bins_arg(
185
+ bins: FullBinsArg,
186
+ ) -> tuple[bool, DoubleCountArg, Optional[DoubleEdgesArg]]:
187
+ """Deal with 'bins' that may be
188
+ - None
189
+ - an integer indicating number of bins
190
+ - a list of edges/centers for the bins
191
+ - a 2-tuple of either of those
192
+ Returns a 3-tuple:
193
+ - specified/not-default (bool),
194
+ - 2-tuple of number of bins,
195
+ - optional 2-tuple of lists of edges/centers
142
196
  """
143
-
144
- if ranges is None:
145
- x_range = calc_value_range(tuple(x for x, _ in points))
146
- y_range = calc_value_range(tuple(y for _, y in points))
197
+ if bins is None:
198
+ return (False, DEFAULT_NUM_BINS, None)
199
+ if isinstance(bins, int):
200
+ num_bins = (bins, bins)
201
+ bin_positions = None
202
+ elif len(bins) > 2:
203
+ # we were given a single list of bin edges
204
+ num = len(bins) - 1
205
+ num_bins = (num, num)
206
+ bin_positions = (bins, bins)
147
207
  else:
148
- x_range, y_range = ValueRange(*ranges[0]), ValueRange(*ranges[1])
208
+ if not isinstance(bins, tuple):
209
+ raise ValueError("Invalid 'bins' argument")
210
+ # we either have a tuple of int/int or Sequence/Sequence
211
+ if isinstance(bins[0], int):
212
+ num_bins = bins
213
+ bin_positions = None
214
+ else:
215
+ num_bins = (len(bins[0]) - 1, len(bins[1]) - 1)
216
+ bin_positions = bins
217
+ return True, num_bins, bin_positions
149
218
 
150
- if not isinstance(bin_sizes, tuple):
151
- # given just a single bin size: replicate it for both axes:
152
- bin_sizes = (bin_sizes, bin_sizes)
153
219
 
154
- x_edges = tuple(edge_range(x_range.min, x_range.max, bin_sizes[0], align))
155
- y_edges = tuple(edge_range(y_range.min, y_range.max, bin_sizes[1], align))
220
+ def expand_bin_size_arg(
221
+ bin_size: Optional[FloatLike | tuple[FloatLike, FloatLike]],
222
+ ) -> Optional[tuple[FloatLike, FloatLike]]:
223
+ """If bin_size arg is not a 2-tuple, replicate it into one"""
224
+ if bin_size is None:
225
+ return None
226
+ if isinstance(bin_size, tuple):
227
+ return bin_size
228
+ return (bin_size, bin_size)
156
229
 
157
- x_axis = Axis(x_range, values_are_edges=True, **axis_args)
158
- y_axis = Axis(y_range, values_are_edges=True, **axis_args)
159
230
 
160
- return (bin_edges(points, x_edges, y_edges, drop_outside=drop_outside), x_axis, y_axis)
231
+ def range_from_arg_or_data(range_arg, points):
232
+ """Return range arg if given, or calculate a range from the data"""
233
+ if range_arg:
234
+ return make_value_range(range_arg)
235
+ return calc_value_range(tuple(points))
161
236
 
162
237
 
163
- def histogram2d(
238
+ def histogram2d( # pylint: disable=too-many-arguments,too-many-positional-arguments,too-many-locals
164
239
  points: Sequence[tuple[FloatLike, FloatLike]],
165
- bins: (
166
- int
167
- | tuple[int, int]
168
- | Sequence[FloatLike]
169
- | tuple[Sequence[FloatLike], Sequence[FloatLike]]
170
- ) = 10,
171
- ranges: Optional[tuple[Optional[ValueRange], Optional[ValueRange]]] = None,
240
+ bins: FullBinsArg = None,
241
+ ranges: Optional[RangesArg] = None,
242
+ bin_size: Optional[FloatLike | tuple[FloatLike, FloatLike]] = None,
172
243
  align=True,
173
244
  drop_outside=True,
174
245
  **axis_args,
175
246
  ) -> tuple[Sequence[Sequence[int]], Axis, Axis]:
176
- """Bin points into a 2-D histogram, given number of bins, or bin edges
247
+ """Bin points into a 2-D histogram, given number of bins, bin edges, or bin sizes
248
+
249
+ Parameters can be combined in the following ways:
250
+ - bin_size with optional ranges
251
+ - bins (as edges) with no ranges
252
+ - bins (as count) with optional ranges
253
+ - bins (as count) + bin_size with no ranges: Fixed number and size of bins, centered on data
177
254
 
178
255
  Parameters
179
256
  ----------
180
257
  points: Sequence of (X,Y) tuples: the points to bin
181
- bins: int or (int, int) or [float,...] or ([float,...], [float,...])
182
- int: number of bins for both X & Y (default: 10)
258
+ bins: int or (int, int) or [float,...] or ([float,...], [float,...]) or None
259
+ int: number of bins for both X & Y
183
260
  (int,int): number of bins in X, number of bins in Y
184
261
  list[float]: bin edges for both X & Y
185
262
  (list[float], list[float]): bin edges for X, bin edges for Y
263
+ None: defaults to DEFAULT_NUM_BINS if bin_size is not provided
186
264
  ranges: Optional (ValueRange, ValueRange)
187
265
  ((x_min, x_max), (y_min, y_max)) for the bins if # of bins is provided
188
- Default: take from data.
266
+ Cannot be specified with bins (as count) + bin_size, or bins (as edges)
267
+ Default if allowed: take from data
268
+ bin_size: Optional float or (float, float)
269
+ Size(s) of (X,Y) bins to partition into.
270
+ Cannot be combined with bins (as edges) since edge spacing already determines size.
271
+ float: bin size for both X & Y
272
+ (float, float): bin size for X, bin size for Y
189
273
  align: bool (default: True)
190
- pick bin edges at 'round' values if # of bins is provided
274
+ pick bin edges at 'round' values if # of bins is provided, or force bin edges
275
+ to be at multiples of bin_size if bin_size is provided
191
276
  drop_outside: bool (default: True)
192
277
  True: Drop any data points outside the ranges
193
278
  False: Put any outside points in closest bin (i.e. edge bins include outliers)
@@ -196,45 +281,45 @@ def histogram2d(
196
281
  returns: Sequence[Sequence[int]], (x-)Axis, (y-)Axis
197
282
  """
198
283
 
199
- if isinstance(bins, int):
200
- # we were given a single # of bins
201
- bins = (bins, bins)
284
+ bins_specified, num_bins, bin_edges = expand_bins_arg(bins)
202
285
 
203
- if isinstance(bins, Sequence) and len(bins) > 2:
204
- # we were given a single list of bin edges: replicate it
205
- bins = (bins, bins)
286
+ bin_sizes = expand_bin_size_arg(bin_size)
287
+ if ranges is None:
288
+ ranges = (None, None)
289
+
290
+ if bin_edges and any(ranges):
291
+ raise ValueError("Cannot specify both bin edges and plot range")
292
+ if bins_specified and bin_sizes and any(ranges):
293
+ # The number of bins and bin size imply a size of plot range, so this
294
+ # is overconstrained.
295
+ raise ValueError("Cannot specify number of bins and bin size and plot range")
296
+
297
+ x_range = range_from_arg_or_data(ranges[0], (x for x, _ in points))
298
+ y_range = range_from_arg_or_data(ranges[1], (y for _, y in points))
299
+
300
+ if bins_specified and bin_sizes:
301
+ # range width must be num_bins * bin_sizes, so take the data's range
302
+ # and force the width, aligning as needed
303
+ x_range = force_value_range_width(x_range, num_bins[0] * bin_sizes[0])
304
+ if align:
305
+ x_range = align_value_range(x_range, bin_sizes[0])
206
306
 
207
- if isinstance(bins[0], int):
208
- # we were given the number of bins for X. Calculate the edges:
209
- if ranges is None or ranges[0] is None:
210
- x_range = calc_value_range(tuple(x for x, _ in points))
211
- else:
212
- x_range = ValueRange(*ranges[0])
307
+ y_range = force_value_range_width(y_range, num_bins[1] * bin_sizes[1])
308
+ if align:
309
+ y_range = align_value_range(y_range, bin_sizes[1])
213
310
 
214
- x_edges = pick_edges(bins[0], x_range, align)
311
+ # Handle different parameter combinations
312
+ if bin_edges:
313
+ x_edges, y_edges = bin_edges
215
314
  else:
216
- # we were given the bin edges already
217
- if ranges is not None and ranges[0] is not None:
218
- raise ValueError("Both bin edges and bin ranges provided, pick one or the other")
219
- assert isinstance(bins[0], Sequence)
220
- x_edges = bins[0]
221
-
222
- if isinstance(bins[1], int):
223
- # we were given the number of bins. Calculate the edges:
224
- if ranges is None or ranges[1] is None:
225
- y_range = calc_value_range(tuple(y for _, y in points))
315
+ if bin_sizes:
316
+ x_edges = make_edges(x_range, bin_sizes[0], align)
317
+ y_edges = make_edges(y_range, bin_sizes[1], align)
226
318
  else:
227
- y_range = ValueRange(*ranges[1])
228
-
229
- y_edges = pick_edges(bins[1], y_range, align)
230
- else:
231
- # we were given the bin edges already
232
- if ranges is not None and ranges[1] is not None:
233
- raise ValueError("Both bin edges and bin ranges provided, pick one or the other")
234
- assert isinstance(bins[1], Sequence)
235
- y_edges = bins[1]
319
+ # Only number of bins provided, if that
320
+ x_edges = segment_interval(num_bins[0] + 1, x_range, align)
321
+ y_edges = segment_interval(num_bins[1] + 1, y_range, align)
236
322
 
237
323
  x_axis = Axis((x_edges[0], x_edges[-1]), values_are_edges=True, **axis_args)
238
324
  y_axis = Axis((y_edges[0], y_edges[-1]), values_are_edges=True, **axis_args)
239
-
240
- return (bin_edges(points, x_edges, y_edges, drop_outside), x_axis, y_axis)
325
+ return (bin_by_edges(points, x_edges, y_edges, drop_outside), x_axis, y_axis)
densitty/colorbar.py ADDED
@@ -0,0 +1,84 @@
1
+ """Colorbar generation for density plots."""
2
+
3
+ from .axis import Axis
4
+ from .plotting import Plot
5
+
6
+
7
+ def make_colorbar(
8
+ source_plot: Plot,
9
+ label_fmt: str = "{}",
10
+ vertical: bool = False,
11
+ ) -> Plot:
12
+ """Create a colorbar Plot object from an existing Plot.
13
+
14
+ Parameters
15
+ ----------
16
+ source_plot : Plot
17
+ The Plot object to create a colorbar for.
18
+ label_fmt : str
19
+ Format string for min/max labels (e.g., "{:.2f}").
20
+ vertical : bool
21
+ Vertical/Columnnar bar rather than horizontal/row.
22
+
23
+ Returns
24
+ -------
25
+ Plot
26
+ A new Plot object representing the colorbar.
27
+ """
28
+ min_value, max_value = source_plot.data_limits()
29
+
30
+ color_map = source_plot.color_map
31
+
32
+ labels = {
33
+ min_value: label_fmt.format(min_value),
34
+ max_value: label_fmt.format(max_value),
35
+ }
36
+ axis = Axis(
37
+ value_range=(min_value, max_value),
38
+ labels=labels,
39
+ values_are_edges=False,
40
+ border_line=False,
41
+ )
42
+
43
+ if vertical:
44
+ size = len(source_plot.data) # num rows => height
45
+ gradient_data = [[i / (size - 1)] for i in range(size)] if size > 1 else [[0.5]]
46
+ else:
47
+ size = len(source_plot.data[0]) # num cols => width
48
+ gradient_data = (
49
+ [
50
+ [i / (size - 1) for i in range(size)],
51
+ ]
52
+ if size > 1
53
+ else [[0.5]]
54
+ )
55
+
56
+ if vertical:
57
+ return Plot(
58
+ data=gradient_data,
59
+ color_map=color_map,
60
+ render_halfheight=source_plot.render_halfheight,
61
+ font_mapping=source_plot.font_mapping,
62
+ y_axis=axis,
63
+ min_data=0,
64
+ max_data=1,
65
+ flip_y=True,
66
+ )
67
+
68
+ return Plot(
69
+ data=gradient_data,
70
+ color_map=color_map,
71
+ render_halfheight=source_plot.render_halfheight,
72
+ font_mapping=source_plot.font_mapping,
73
+ x_axis=axis,
74
+ min_data=0,
75
+ max_data=1,
76
+ flip_y=False,
77
+ )
78
+
79
+
80
+ def add_colorbar(source_plot: Plot, label_fmt: str = "{}", padding: str = " ") -> Plot:
81
+ """Add a vertical colorbar to an existing Plot."""
82
+ cb = make_colorbar(source_plot, label_fmt, vertical=True)
83
+ source_plot.glue_on(cb, padding)
84
+ return source_plot