densitty 0.8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- densitty/__init__.py +0 -0
- densitty/ansi.py +109 -0
- densitty/ascii_art.py +24 -0
- densitty/axis.py +265 -0
- densitty/binning.py +240 -0
- densitty/detect.py +465 -0
- densitty/lineart.py +130 -0
- densitty/plot.py +201 -0
- densitty/truecolor.py +170 -0
- densitty/util.py +234 -0
- densitty-0.8.2.dist-info/METADATA +36 -0
- densitty-0.8.2.dist-info/RECORD +15 -0
- densitty-0.8.2.dist-info/WHEEL +5 -0
- densitty-0.8.2.dist-info/licenses/LICENSE +21 -0
- densitty-0.8.2.dist-info/top_level.txt +1 -0
densitty/__init__.py
ADDED
|
File without changes
|
densitty/ansi.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""ANSI code/color support"""
|
|
2
|
+
|
|
3
|
+
# <pedantic> that the 256-color support here is not actually ANSI X3.64, though it uses ANSI-ish
|
|
4
|
+
# escape sequences. I believe it was originally done in Xterm. And 4b colors (16-color)
|
|
5
|
+
# are really an aixterm extension to the ANSI-specified 8-color standard. </pedantic>
|
|
6
|
+
|
|
7
|
+
from typing import Optional, Sequence
|
|
8
|
+
|
|
9
|
+
from .util import nearest
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
RESET = "\033[0m"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def compose(codes: Sequence[str]) -> str:
|
|
16
|
+
"""Given a list of individual color codes, produce the full escape sequence."""
|
|
17
|
+
return f"\033[{';'.join(codes)}m"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def colormap_16(colors):
|
|
21
|
+
"""Produce a function that returns closest 4b/16color ANSI color codes from colormap.
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
colors: Sequence[int]
|
|
25
|
+
Ordered 16-color ANSI colors corresponding to the 0.0..1.0 range
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def as_colorcodes(bg_frac: Optional[float], fg_frac: Optional[float]) -> str:
|
|
29
|
+
"""Return ANSI color code for 16-color value(s)
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
fg_frac: Optional[float]
|
|
33
|
+
Value 0.0..1.0 for foreground, or None if background-only
|
|
34
|
+
fg_frac: Optional[float]
|
|
35
|
+
Value 0.0..1.0 for background, or None for foreground-only
|
|
36
|
+
"""
|
|
37
|
+
codes = []
|
|
38
|
+
if fg_frac is not None:
|
|
39
|
+
codes += [f"{30 + nearest(colors, fg_frac)}"]
|
|
40
|
+
if bg_frac is not None:
|
|
41
|
+
codes += [f"{40 + nearest(colors, bg_frac)}"]
|
|
42
|
+
return compose(codes)
|
|
43
|
+
|
|
44
|
+
return as_colorcodes
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def colormap_256(colors):
|
|
48
|
+
"""Produce a function that returns closest 8b/256color ANSI color codes from colormap.
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
colors: Sequence[int]
|
|
52
|
+
Ordered 256-color ANSI colors corresponding to the 0.0..1.0 range
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def as_colorcodes(bg_frac: Optional[float], fg_frac: Optional[float]):
|
|
56
|
+
"""Return ANSI color code for 256-color value(s)
|
|
57
|
+
Parameters
|
|
58
|
+
----------
|
|
59
|
+
fg_frac: Optional[float]
|
|
60
|
+
Value 0.0..1.0 for foreground, or None if background-only
|
|
61
|
+
fg_frac: Optional[float]
|
|
62
|
+
Value 0.0..1.0 for background, or None for foreground-only
|
|
63
|
+
"""
|
|
64
|
+
codes = []
|
|
65
|
+
if fg_frac is not None:
|
|
66
|
+
fg = nearest(colors, fg_frac)
|
|
67
|
+
codes += [f"38;5;{fg}"]
|
|
68
|
+
if bg_frac is not None:
|
|
69
|
+
bg = nearest(colors, bg_frac)
|
|
70
|
+
codes += [f"48;5;{bg}"]
|
|
71
|
+
return compose(codes)
|
|
72
|
+
|
|
73
|
+
return as_colorcodes
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
########################################################
|
|
77
|
+
# Colormaps. Assumed 256-color unless suffixed with _16
|
|
78
|
+
# pylint: disable=invalid-name
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
# ANSI 16-color map colors in ROYGBIV order: Red, Yellow, Green, Cyan, Blue
|
|
82
|
+
RAINBOW_16 = colormap_16((5, 1, 3, 2, 6))
|
|
83
|
+
|
|
84
|
+
# ANSI 16-color 'rainbow', Reversed:
|
|
85
|
+
REV_RAINBOW_16 = colormap_16((6, 2, 3, 1, 5))
|
|
86
|
+
|
|
87
|
+
# ANSI 16-color map colors: Black, Blue, Cyan, Green, Yellow, Red, Magenta, White
|
|
88
|
+
FADE_IN_16 = colormap_16((0, 4, 6, 2, 3, 1, 5, 7))
|
|
89
|
+
|
|
90
|
+
# ANSI 256-color map colors in a grayscale black->white
|
|
91
|
+
GRAYSCALE = colormap_256([0] + list(range(232, 256)) + [15])
|
|
92
|
+
|
|
93
|
+
rainbow_256_colors = (
|
|
94
|
+
# fmt: off
|
|
95
|
+
(196, 202, 208, 214, 220, 190, 154, 118, 82, 46, 47, 48, 43, 80, 81, 39, 27, 21, 56, 91)
|
|
96
|
+
# fmt: on
|
|
97
|
+
)
|
|
98
|
+
RAINBOW = colormap_256(rainbow_256_colors)
|
|
99
|
+
REV_RAINBOW = colormap_256(tuple(reversed(rainbow_256_colors)))
|
|
100
|
+
|
|
101
|
+
BLUE_RED = colormap_256((21, 56, 91, 126, 161, 196))
|
|
102
|
+
FADE_IN = colormap_256(
|
|
103
|
+
# fmt: off
|
|
104
|
+
(16, 53, 54, 55, 56, 57, 21, 21, 27, 39, 81, 80, 43, 48, 47,
|
|
105
|
+
46, 82, 118, 154, 190, 220, 214, 208, 202, 196)
|
|
106
|
+
# fmt: on
|
|
107
|
+
)
|
|
108
|
+
HOT = colormap_256((16, 52, 88, 124, 160, 196, 202, 208, 214, 220, 226, 227, 228, 229, 230, 231))
|
|
109
|
+
COOL = colormap_256((50, 81, 111, 141, 171, 201))
|
densitty/ascii_art.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""ASCII-art support"""
|
|
2
|
+
|
|
3
|
+
from typing import Callable, Sequence
|
|
4
|
+
|
|
5
|
+
from .util import nearest
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def color_map(values: Sequence[str]) -> Callable:
|
|
9
|
+
"""Returns the closest ascii-art pixel."""
|
|
10
|
+
|
|
11
|
+
def compute_pixel_value(frac: float, _=None) -> str:
|
|
12
|
+
return nearest(values, frac)
|
|
13
|
+
|
|
14
|
+
return compute_pixel_value
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
#
|
|
18
|
+
# Some example/useful color scales
|
|
19
|
+
# Character (glyph) density is dependent on font choice, unfortunately
|
|
20
|
+
|
|
21
|
+
# Allow the all-caps colormap names:
|
|
22
|
+
# pylint: disable=invalid-name
|
|
23
|
+
DEFAULT = color_map(" .:-=+*#%@")
|
|
24
|
+
EXTENDED = color_map(" .'`^\",:;Il!i>~+?[{1(|/o*#MW&8%B$@")
|
densitty/axis.py
ADDED
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
"""Axis-generation support."""
|
|
2
|
+
|
|
3
|
+
import dataclasses
|
|
4
|
+
from decimal import Decimal
|
|
5
|
+
import itertools
|
|
6
|
+
import math
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
from . import lineart
|
|
10
|
+
from .util import FloatLike, ValueRange, pick_step_size
|
|
11
|
+
|
|
12
|
+
MIN_X_TICKS_PER_LABEL = 4
|
|
13
|
+
MIN_Y_TICKS_PER_LABEL = 2
|
|
14
|
+
DEFAULT_X_COLS_PER_TICK = 4
|
|
15
|
+
DEFAULT_Y_ROWS_PER_TICK = 2
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclasses.dataclass
|
|
19
|
+
class BorderChars:
|
|
20
|
+
"""Characters to use for X/Y border"""
|
|
21
|
+
|
|
22
|
+
first: str
|
|
23
|
+
middle: str
|
|
24
|
+
last: str
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
y_border = {False: BorderChars(" ", " ", " "), True: BorderChars("╷", "│", "╵")}
|
|
28
|
+
x_border = {False: BorderChars(" ", " ", " "), True: BorderChars("╶", "─", "╴")}
|
|
29
|
+
|
|
30
|
+
###############################################
|
|
31
|
+
# Helper functions used by the Axis class below
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def add_label(line: list[str], label: str, ctr_pos: int):
|
|
35
|
+
"""Adds the label string to the output line, centered at specified position
|
|
36
|
+
The output line is a list of single-character strings, to make this kind of thing
|
|
37
|
+
straightforward"""
|
|
38
|
+
width = len(label)
|
|
39
|
+
start_col = max(ctr_pos - width // 2, 0)
|
|
40
|
+
end_col = start_col + width
|
|
41
|
+
line[start_col:end_col] = list(label)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def gen_tick_values(value_range, tick_step):
|
|
45
|
+
"""Produce tick values in the specified range. Basically numpy.arange"""
|
|
46
|
+
|
|
47
|
+
tick = math.ceil(value_range.min / tick_step) * tick_step
|
|
48
|
+
while tick <= value_range.max:
|
|
49
|
+
yield tick
|
|
50
|
+
tick += tick_step
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def gen_labels(
|
|
54
|
+
value_range: ValueRange, num_ticks, min_ticks_per_label, fmt, label_end_ticks=False
|
|
55
|
+
):
|
|
56
|
+
"""Generate positions for labels (plain ticks & ticks with value)"""
|
|
57
|
+
tick_step, label_step = pick_step_size(value_range, num_ticks, min_ticks_per_label)
|
|
58
|
+
|
|
59
|
+
ticks = list(gen_tick_values(value_range, tick_step))
|
|
60
|
+
label_values = list(gen_tick_values(value_range, label_step))
|
|
61
|
+
if label_end_ticks or len(label_values) <= 2:
|
|
62
|
+
# ensure that first & last ticks have labels:
|
|
63
|
+
if label_values[0] != ticks[0]:
|
|
64
|
+
label_values = ticks[:1] + label_values
|
|
65
|
+
if label_values[-1] != ticks[-1]:
|
|
66
|
+
label_values += ticks[-1:]
|
|
67
|
+
|
|
68
|
+
# sanity: if all but one ticks have labels, just label them all
|
|
69
|
+
if len(label_values) >= len(ticks) - 1:
|
|
70
|
+
label_values = ticks
|
|
71
|
+
|
|
72
|
+
ticks_only = {value: "" for value in ticks}
|
|
73
|
+
labeled_ticks = {value: fmt.format(value) for value in label_values}
|
|
74
|
+
|
|
75
|
+
return ticks_only | labeled_ticks
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def calc_edges(value_range, num_bins, values_are_edges):
|
|
79
|
+
"""Calculate the top/bottom or left/right values for each of 'num_bins' bins
|
|
80
|
+
|
|
81
|
+
Parameters
|
|
82
|
+
----------
|
|
83
|
+
value_range: util.ValueRange
|
|
84
|
+
Coordinate values for first/last bin
|
|
85
|
+
Can be center of bin, or outside edge (see values_are_edges)
|
|
86
|
+
num_bins: int
|
|
87
|
+
Number of bins/intervals to produce edges for
|
|
88
|
+
values_are_edges: bool
|
|
89
|
+
Indicates that value_range specifies outside edges rather than bin centers
|
|
90
|
+
"""
|
|
91
|
+
if values_are_edges:
|
|
92
|
+
bin_delta = (value_range.max - value_range.min) / num_bins
|
|
93
|
+
first_bin_min = value_range.min
|
|
94
|
+
else:
|
|
95
|
+
bin_delta = (value_range.max - value_range.min) / (num_bins - 1)
|
|
96
|
+
first_bin_min = value_range.min - (bin_delta / 2)
|
|
97
|
+
bin_edges = tuple(first_bin_min + i * bin_delta for i in range(num_bins + 1))
|
|
98
|
+
return itertools.pairwise(bin_edges)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
###############################################
|
|
102
|
+
# The User-facing interface: the Axis class
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@dataclasses.dataclass
|
|
106
|
+
class Axis:
|
|
107
|
+
"""Options for axis generation."""
|
|
108
|
+
|
|
109
|
+
value_range: ValueRange # can also specify as a tuple of (min, max)
|
|
110
|
+
labels: Optional[dict[float, str]] = None # map axis value to label (plus tick) at that value
|
|
111
|
+
label_fmt: str = "{}" # format for generated labels
|
|
112
|
+
border_line: bool = False # embed ticks in a horizontal X-axis or vertical Y-axis line
|
|
113
|
+
values_are_edges: bool = False # N+1 values, indicating boundaries between pixels, not centers
|
|
114
|
+
fractional_tick_pos: bool = False # Use "▔", "▁", or "╱╲" for non-centered ticks
|
|
115
|
+
|
|
116
|
+
def __init__(
|
|
117
|
+
self,
|
|
118
|
+
value_range: ValueRange | tuple[FloatLike, FloatLike],
|
|
119
|
+
labels: Optional[dict[float, str]] = None,
|
|
120
|
+
label_fmt: str = "{}",
|
|
121
|
+
border_line: bool = False,
|
|
122
|
+
values_are_edges: bool = False,
|
|
123
|
+
fractional_tick_pos: bool = False,
|
|
124
|
+
# pylint: disable=too-many-arguments,too-many-positional-arguments
|
|
125
|
+
):
|
|
126
|
+
# Sanitize value_range: allow user to provide it as a tuple of FloatLike (without
|
|
127
|
+
# needing to import ValueRange), and convert to ValueRange(Decimal, Decimal)
|
|
128
|
+
self.value_range = ValueRange(
|
|
129
|
+
Decimal(float(value_range[0])), Decimal(float(value_range[1]))
|
|
130
|
+
)
|
|
131
|
+
self.labels = labels
|
|
132
|
+
self.label_fmt = label_fmt
|
|
133
|
+
self.border_line = border_line
|
|
134
|
+
self.values_are_edges = values_are_edges
|
|
135
|
+
self.fractional_tick_pos = fractional_tick_pos
|
|
136
|
+
|
|
137
|
+
def _unjustified_y_axis(self, num_rows: int):
|
|
138
|
+
"""Returns the Y axis string for each line of the plot"""
|
|
139
|
+
if self.labels is None:
|
|
140
|
+
labels = gen_labels(
|
|
141
|
+
self.value_range,
|
|
142
|
+
num_rows // DEFAULT_Y_ROWS_PER_TICK,
|
|
143
|
+
MIN_Y_TICKS_PER_LABEL,
|
|
144
|
+
self.label_fmt,
|
|
145
|
+
)
|
|
146
|
+
else:
|
|
147
|
+
labels = self.labels
|
|
148
|
+
|
|
149
|
+
label_values = sorted(labels.keys())
|
|
150
|
+
bins = calc_edges(self.value_range, num_rows, self.values_are_edges)
|
|
151
|
+
|
|
152
|
+
use_combining = self.border_line and self.fractional_tick_pos
|
|
153
|
+
for row_min, row_max in bins:
|
|
154
|
+
if label_values and row_min <= label_values[0] <= row_max:
|
|
155
|
+
label_str = labels[label_values[0]]
|
|
156
|
+
|
|
157
|
+
offset_frac = (label_values[0] - row_min) / (row_max - row_min)
|
|
158
|
+
if offset_frac < 0.25 and self.fractional_tick_pos:
|
|
159
|
+
tick_char = "▔"
|
|
160
|
+
elif offset_frac > 0.75 and self.fractional_tick_pos:
|
|
161
|
+
tick_char = "▁"
|
|
162
|
+
else:
|
|
163
|
+
tick_char = "─"
|
|
164
|
+
label_str += lineart.merge_chars(
|
|
165
|
+
tick_char,
|
|
166
|
+
y_border[self.border_line].middle,
|
|
167
|
+
use_combining_unicode=use_combining,
|
|
168
|
+
)
|
|
169
|
+
yield label_str
|
|
170
|
+
label_values = label_values[1:]
|
|
171
|
+
else:
|
|
172
|
+
yield y_border[self.border_line].middle
|
|
173
|
+
|
|
174
|
+
def render_as_y(self, num_rows: int, pad_top: bool, pad_bot: bool, flip: bool):
|
|
175
|
+
"""Create a Y axis as a list of strings for the left margin of a plot
|
|
176
|
+
|
|
177
|
+
Parameters
|
|
178
|
+
----------
|
|
179
|
+
num_rows: int
|
|
180
|
+
Number of data rows
|
|
181
|
+
pad_top: bool
|
|
182
|
+
Emit a line for an X axis line/row at the top
|
|
183
|
+
pad_bot: bool
|
|
184
|
+
Emit a line for an X axis line/row at the bottom
|
|
185
|
+
flip: bool
|
|
186
|
+
Put the minimum Y on the last line rather than the first
|
|
187
|
+
"""
|
|
188
|
+
unpadded_labels = list(self._unjustified_y_axis(num_rows))
|
|
189
|
+
if flip:
|
|
190
|
+
unpadded_labels = [
|
|
191
|
+
s.translate(lineart.flip_vertical) for s in reversed(unpadded_labels)
|
|
192
|
+
]
|
|
193
|
+
|
|
194
|
+
if pad_top:
|
|
195
|
+
unpadded_labels = [y_border[self.border_line].first] + unpadded_labels
|
|
196
|
+
if pad_bot:
|
|
197
|
+
unpadded_labels = unpadded_labels + [y_border[self.border_line].last]
|
|
198
|
+
|
|
199
|
+
lengths = [lineart.display_len(label_str) for label_str in unpadded_labels]
|
|
200
|
+
max_width = max(lengths)
|
|
201
|
+
pad_lengths = [max_width - length for length in lengths]
|
|
202
|
+
padded_labels = [
|
|
203
|
+
" " * pad_length + label_str
|
|
204
|
+
for (label_str, pad_length) in zip(unpadded_labels, pad_lengths)
|
|
205
|
+
]
|
|
206
|
+
return padded_labels
|
|
207
|
+
|
|
208
|
+
def render_as_x(self, num_cols: int, left_margin: int):
|
|
209
|
+
"""Generate X tick line and X label line.
|
|
210
|
+
|
|
211
|
+
Parameters
|
|
212
|
+
----------
|
|
213
|
+
num_cols: int
|
|
214
|
+
Number of data columns
|
|
215
|
+
left_margin: int
|
|
216
|
+
chars to the left of leftmost data col. May have Labels/border-line.
|
|
217
|
+
"""
|
|
218
|
+
|
|
219
|
+
if self.labels is None:
|
|
220
|
+
labels = gen_labels(
|
|
221
|
+
self.value_range,
|
|
222
|
+
num_cols // DEFAULT_X_COLS_PER_TICK,
|
|
223
|
+
MIN_X_TICKS_PER_LABEL,
|
|
224
|
+
self.label_fmt,
|
|
225
|
+
)
|
|
226
|
+
else:
|
|
227
|
+
labels = self.labels
|
|
228
|
+
|
|
229
|
+
label_values = sorted(labels.keys())
|
|
230
|
+
|
|
231
|
+
bins = calc_edges(self.value_range, num_cols, self.values_are_edges)
|
|
232
|
+
|
|
233
|
+
tick_line = list(
|
|
234
|
+
" " * (left_margin - 1)
|
|
235
|
+
+ x_border[self.border_line].first
|
|
236
|
+
+ x_border[self.border_line].middle * num_cols
|
|
237
|
+
+ x_border[self.border_line].last
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
label_line = [" "] * len(tick_line) # labels under the ticks
|
|
241
|
+
|
|
242
|
+
for col_idx, (col_min, col_max) in enumerate(bins):
|
|
243
|
+
# use Decimal.next_plus to accomodate rounding error/truncation
|
|
244
|
+
if label_values and col_min <= label_values[0] <= col_max.next_plus():
|
|
245
|
+
add_label(label_line, labels[label_values[0]], col_idx + left_margin)
|
|
246
|
+
tick_idx = left_margin + col_idx
|
|
247
|
+
offset_frac = (label_values[0] - col_min) / (col_max - col_min)
|
|
248
|
+
if self.fractional_tick_pos and offset_frac < 0.25:
|
|
249
|
+
if col_idx == 0:
|
|
250
|
+
tick_line[tick_idx - 1] = lineart.merge_chars("│", tick_line[tick_idx - 1])
|
|
251
|
+
else:
|
|
252
|
+
tick_line[tick_idx - 1] = "╱"
|
|
253
|
+
tick_line[tick_idx] = "╲"
|
|
254
|
+
elif self.fractional_tick_pos and offset_frac > 0.75:
|
|
255
|
+
tick_line[tick_idx] = "╱"
|
|
256
|
+
if col_idx < num_cols - 1:
|
|
257
|
+
tick_line[tick_idx + 1] = "╲"
|
|
258
|
+
else:
|
|
259
|
+
tick_line[tick_idx + 1] = lineart.merge_chars("│", tick_line[tick_idx + 1])
|
|
260
|
+
else:
|
|
261
|
+
tick_line[tick_idx] = lineart.merge_chars("│", tick_line[tick_idx])
|
|
262
|
+
|
|
263
|
+
label_values = label_values[1:] # pop that first label since we added it
|
|
264
|
+
|
|
265
|
+
return "".join(tick_line), "".join(label_line)
|
densitty/binning.py
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
"""Bin point data for a 2-D histogram"""
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
from bisect import bisect_right
|
|
5
|
+
from decimal import Decimal
|
|
6
|
+
from typing import Optional, Sequence
|
|
7
|
+
|
|
8
|
+
from .axis import Axis
|
|
9
|
+
from .util import FloatLike, ValueRange
|
|
10
|
+
from .util import clamp, decimal_value_range, most_round, round_up_ish
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def bin_edges(
|
|
14
|
+
points: Sequence[tuple[FloatLike, FloatLike]],
|
|
15
|
+
x_edges: Sequence[FloatLike],
|
|
16
|
+
y_edges: Sequence[FloatLike],
|
|
17
|
+
drop_outside: bool = True,
|
|
18
|
+
) -> Sequence[Sequence[int]]:
|
|
19
|
+
"""Bin points into a 2-D histogram given bin edges
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
points: Sequence of (X,Y) tuples: the points to bin
|
|
24
|
+
x_edges: Sequence of values: Edges of the bins in X (N+1 values for N bins)
|
|
25
|
+
y_edges: Sequence of values: Edges of the bins in Y (N+1 values for N bins)
|
|
26
|
+
drop_outside: bool (default: True)
|
|
27
|
+
True: Drop any data points outside the ranges
|
|
28
|
+
False: Put any outside points in closest bin (i.e. edge bins include outliers)
|
|
29
|
+
"""
|
|
30
|
+
num_x_bins = len(x_edges) - 1
|
|
31
|
+
num_y_bins = len(y_edges) - 1
|
|
32
|
+
out = [[0 for x in range(num_x_bins)] for y in range(num_y_bins)]
|
|
33
|
+
for x, y in points:
|
|
34
|
+
x_idx = bisect_right(x_edges, x) - 1
|
|
35
|
+
y_idx = bisect_right(y_edges, y) - 1
|
|
36
|
+
if drop_outside:
|
|
37
|
+
if 0 <= x_idx < num_x_bins and 0 <= y_idx < num_y_bins:
|
|
38
|
+
out[y_idx][x_idx] += 1
|
|
39
|
+
else:
|
|
40
|
+
out[clamp(y_idx, 0, num_y_bins - 1)][clamp(x_idx, 0, num_x_bins - 1)] += 1
|
|
41
|
+
return out
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def calc_value_range(values: Sequence[FloatLike]) -> ValueRange:
|
|
45
|
+
"""Calculate a value range from data values"""
|
|
46
|
+
if not values:
|
|
47
|
+
# Could raise an exception here, but for now just return _something_
|
|
48
|
+
return ValueRange(0, 1)
|
|
49
|
+
|
|
50
|
+
# bins are closed on left and open on right: i.e. left_edge <= values < right_edge
|
|
51
|
+
# so, the right-most bin edge needs to be larger than the largest data value:
|
|
52
|
+
max_value = max(values)
|
|
53
|
+
range_top = max_value + math.ulp(max_value) # increase by smallest representable amount
|
|
54
|
+
return ValueRange(min(values), range_top)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def pick_edges(
|
|
58
|
+
num_bins: int,
|
|
59
|
+
value_range: ValueRange,
|
|
60
|
+
align=True,
|
|
61
|
+
) -> Sequence[FloatLike]:
|
|
62
|
+
"""Pick bin edges based on data values.
|
|
63
|
+
|
|
64
|
+
Parameters
|
|
65
|
+
----------
|
|
66
|
+
values: Sequence of data values
|
|
67
|
+
num_bins: int
|
|
68
|
+
Number of bins to partition into
|
|
69
|
+
value_range: ValueRange
|
|
70
|
+
Min/Max of the values to be binned
|
|
71
|
+
align: bool
|
|
72
|
+
Adjust the range somewhat to put bin size & edges on "round" values
|
|
73
|
+
"""
|
|
74
|
+
value_range = decimal_value_range(value_range) # coerce into Decimal if not already
|
|
75
|
+
|
|
76
|
+
min_step_size = (value_range.max - value_range.min) / num_bins
|
|
77
|
+
if align:
|
|
78
|
+
step_size = round_up_ish(min_step_size)
|
|
79
|
+
first_edge = math.floor(Decimal(value_range.min) / step_size) * step_size
|
|
80
|
+
if first_edge + num_bins * step_size < value_range.max:
|
|
81
|
+
# Uh oh: even though we rounded up the bin size, shifting the first edge
|
|
82
|
+
# down to a multiple has shifted the last edge down too far. Bump up the step size:
|
|
83
|
+
step_size = round_up_ish(step_size * Decimal(1.015625))
|
|
84
|
+
first_edge = math.floor(Decimal(value_range.min) / step_size) * step_size
|
|
85
|
+
# we now have a round step size, and a first edge that the highest possible multiple of it
|
|
86
|
+
# Test to see if any lower multiples of it will still include the whole ranges,
|
|
87
|
+
# and be "nicer" i.e. if data is all in 1.1..9.5 range with 10 bins, we now have bins
|
|
88
|
+
# covering 1-11, but could have 0-10
|
|
89
|
+
last_edge = first_edge + step_size * num_bins
|
|
90
|
+
num_trials = int((last_edge - value_range.max) // step_size + 1)
|
|
91
|
+
offsets = (step_size * i for i in range(num_trials))
|
|
92
|
+
edge_pairs = ((first_edge - offset, last_edge - offset) for offset in offsets)
|
|
93
|
+
first_edge = most_round(edge_pairs)[0]
|
|
94
|
+
|
|
95
|
+
else:
|
|
96
|
+
step_size = min_step_size
|
|
97
|
+
first_edge = value_range.min
|
|
98
|
+
|
|
99
|
+
num_edges = num_bins + 1
|
|
100
|
+
return tuple(first_edge + step_size * i for i in range(num_edges))
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def edge_range(start: FloatLike, end: FloatLike, step: FloatLike, align: bool):
|
|
104
|
+
"""Similar to range/np.arange, but includes "end" in the output if appropriate"""
|
|
105
|
+
if align:
|
|
106
|
+
v = math.floor(start / step) * step
|
|
107
|
+
else:
|
|
108
|
+
v = start
|
|
109
|
+
while v < end + step:
|
|
110
|
+
if align:
|
|
111
|
+
yield round(v / step) * step
|
|
112
|
+
else:
|
|
113
|
+
yield v
|
|
114
|
+
v += step
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def bin_with_size(
|
|
118
|
+
points: Sequence[tuple[FloatLike, FloatLike]],
|
|
119
|
+
bin_sizes: FloatLike | tuple[FloatLike, FloatLike],
|
|
120
|
+
ranges: Optional[tuple[ValueRange, ValueRange]] = None,
|
|
121
|
+
align=True,
|
|
122
|
+
drop_outside=True,
|
|
123
|
+
**axis_args,
|
|
124
|
+
) -> tuple[Sequence[Sequence[int]], Axis, Axis]:
|
|
125
|
+
"""Bin points into a 2-D histogram, given bin sizes
|
|
126
|
+
|
|
127
|
+
Parameters
|
|
128
|
+
----------
|
|
129
|
+
points: Sequence of (X,Y) tuples: the points to bin
|
|
130
|
+
bin_sizes: float or tuple(float, float)
|
|
131
|
+
Size(s) of (X,Y) bins to partition into
|
|
132
|
+
ranges: Optional (ValueRange, ValueRange)
|
|
133
|
+
((x_min, x_max), (y_min, y_max)) for the bins. Default: take from data.
|
|
134
|
+
align: bool (default: True)
|
|
135
|
+
Force bin edges to be at a multiple of the bin size
|
|
136
|
+
drop_outside: bool (default: True)
|
|
137
|
+
True: Drop any data points outside the ranges
|
|
138
|
+
False: Put any outside points in closest bin (i.e. edge bins include outliers)
|
|
139
|
+
axis_args: Extra arguments to pass through to Axis constructor
|
|
140
|
+
|
|
141
|
+
returns: Sequence[Sequence[int]], (x-)Axis, (y-)Axis
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
if ranges is None:
|
|
145
|
+
x_range = calc_value_range(tuple(x for x, _ in points))
|
|
146
|
+
y_range = calc_value_range(tuple(y for _, y in points))
|
|
147
|
+
else:
|
|
148
|
+
x_range, y_range = ValueRange(*ranges[0]), ValueRange(*ranges[1])
|
|
149
|
+
|
|
150
|
+
if not isinstance(bin_sizes, tuple):
|
|
151
|
+
# given just a single bin size: replicate it for both axes:
|
|
152
|
+
bin_sizes = (bin_sizes, bin_sizes)
|
|
153
|
+
|
|
154
|
+
x_edges = tuple(edge_range(x_range.min, x_range.max, bin_sizes[0], align))
|
|
155
|
+
y_edges = tuple(edge_range(y_range.min, y_range.max, bin_sizes[1], align))
|
|
156
|
+
|
|
157
|
+
x_axis = Axis(x_range, values_are_edges=True, **axis_args)
|
|
158
|
+
y_axis = Axis(y_range, values_are_edges=True, **axis_args)
|
|
159
|
+
|
|
160
|
+
return (bin_edges(points, x_edges, y_edges, drop_outside=drop_outside), x_axis, y_axis)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def histogram2d(
|
|
164
|
+
points: Sequence[tuple[FloatLike, FloatLike]],
|
|
165
|
+
bins: (
|
|
166
|
+
int
|
|
167
|
+
| tuple[int, int]
|
|
168
|
+
| Sequence[FloatLike]
|
|
169
|
+
| tuple[Sequence[FloatLike], Sequence[FloatLike]]
|
|
170
|
+
) = 10,
|
|
171
|
+
ranges: Optional[tuple[Optional[ValueRange], Optional[ValueRange]]] = None,
|
|
172
|
+
align=True,
|
|
173
|
+
drop_outside=True,
|
|
174
|
+
**axis_args,
|
|
175
|
+
) -> tuple[Sequence[Sequence[int]], Axis, Axis]:
|
|
176
|
+
"""Bin points into a 2-D histogram, given number of bins, or bin edges
|
|
177
|
+
|
|
178
|
+
Parameters
|
|
179
|
+
----------
|
|
180
|
+
points: Sequence of (X,Y) tuples: the points to bin
|
|
181
|
+
bins: int or (int, int) or [float,...] or ([float,...], [float,...])
|
|
182
|
+
int: number of bins for both X & Y (default: 10)
|
|
183
|
+
(int,int): number of bins in X, number of bins in Y
|
|
184
|
+
list[float]: bin edges for both X & Y
|
|
185
|
+
(list[float], list[float]): bin edges for X, bin edges for Y
|
|
186
|
+
ranges: Optional (ValueRange, ValueRange)
|
|
187
|
+
((x_min, x_max), (y_min, y_max)) for the bins if # of bins is provided
|
|
188
|
+
Default: take from data.
|
|
189
|
+
align: bool (default: True)
|
|
190
|
+
pick bin edges at 'round' values if # of bins is provided
|
|
191
|
+
drop_outside: bool (default: True)
|
|
192
|
+
True: Drop any data points outside the ranges
|
|
193
|
+
False: Put any outside points in closest bin (i.e. edge bins include outliers)
|
|
194
|
+
axis_args: Extra arguments to pass through to Axis constructor
|
|
195
|
+
|
|
196
|
+
returns: Sequence[Sequence[int]], (x-)Axis, (y-)Axis
|
|
197
|
+
"""
|
|
198
|
+
|
|
199
|
+
if isinstance(bins, int):
|
|
200
|
+
# we were given a single # of bins
|
|
201
|
+
bins = (bins, bins)
|
|
202
|
+
|
|
203
|
+
if isinstance(bins, Sequence) and len(bins) > 2:
|
|
204
|
+
# we were given a single list of bin edges: replicate it
|
|
205
|
+
bins = (bins, bins)
|
|
206
|
+
|
|
207
|
+
if isinstance(bins[0], int):
|
|
208
|
+
# we were given the number of bins for X. Calculate the edges:
|
|
209
|
+
if ranges is None or ranges[0] is None:
|
|
210
|
+
x_range = calc_value_range(tuple(x for x, _ in points))
|
|
211
|
+
else:
|
|
212
|
+
x_range = ValueRange(*ranges[0])
|
|
213
|
+
|
|
214
|
+
x_edges = pick_edges(bins[0], x_range, align)
|
|
215
|
+
else:
|
|
216
|
+
# we were given the bin edges already
|
|
217
|
+
if ranges is not None and ranges[0] is not None:
|
|
218
|
+
raise ValueError("Both bin edges and bin ranges provided, pick one or the other")
|
|
219
|
+
assert isinstance(bins[0], Sequence)
|
|
220
|
+
x_edges = bins[0]
|
|
221
|
+
|
|
222
|
+
if isinstance(bins[1], int):
|
|
223
|
+
# we were given the number of bins. Calculate the edges:
|
|
224
|
+
if ranges is None or ranges[1] is None:
|
|
225
|
+
y_range = calc_value_range(tuple(y for _, y in points))
|
|
226
|
+
else:
|
|
227
|
+
y_range = ValueRange(*ranges[1])
|
|
228
|
+
|
|
229
|
+
y_edges = pick_edges(bins[1], y_range, align)
|
|
230
|
+
else:
|
|
231
|
+
# we were given the bin edges already
|
|
232
|
+
if ranges is not None and ranges[1] is not None:
|
|
233
|
+
raise ValueError("Both bin edges and bin ranges provided, pick one or the other")
|
|
234
|
+
assert isinstance(bins[1], Sequence)
|
|
235
|
+
y_edges = bins[1]
|
|
236
|
+
|
|
237
|
+
x_axis = Axis((x_edges[0], x_edges[-1]), values_are_edges=True, **axis_args)
|
|
238
|
+
y_axis = Axis((y_edges[0], y_edges[-1]), values_are_edges=True, **axis_args)
|
|
239
|
+
|
|
240
|
+
return (bin_edges(points, x_edges, y_edges, drop_outside), x_axis, y_axis)
|