plothist 1.4.0__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- plothist/__init__.py +5 -5
- plothist/_version.py +2 -2
- plothist/comparison.py +170 -120
- plothist/examples/1d_hist/1d_comparison_asymmetry.py +37 -0
- plothist/examples/1d_hist/1d_comparison_difference.py +40 -0
- plothist/examples/1d_hist/1d_comparison_efficiency.py +37 -0
- plothist/examples/1d_hist/1d_comparison_only_efficiency.py +33 -0
- plothist/examples/1d_hist/1d_comparison_pull.py +37 -0
- plothist/examples/1d_hist/1d_comparison_ratio.py +37 -0
- plothist/examples/1d_hist/1d_comparison_relative_difference.py +37 -0
- plothist/examples/1d_hist/1d_comparison_split_ratio.py +37 -0
- plothist/examples/1d_hist/1d_elt1.py +38 -0
- plothist/examples/1d_hist/1d_elt1_stacked.py +45 -0
- plothist/examples/1d_hist/1d_elt2.py +33 -0
- plothist/examples/1d_hist/1d_hist_simple.py +28 -0
- plothist/examples/1d_hist/1d_int_category.py +41 -0
- plothist/examples/1d_hist/1d_profile.py +33 -0
- plothist/examples/1d_hist/1d_side_by_side.py +58 -0
- plothist/examples/1d_hist/1d_str_category.py +41 -0
- plothist/examples/1d_hist/README.rst +4 -0
- plothist/examples/2d_hist/2d_hist_correlations.py +65 -0
- plothist/examples/2d_hist/2d_hist_simple.py +28 -0
- plothist/examples/2d_hist/2d_hist_simple_discrete_colormap.py +42 -0
- plothist/examples/2d_hist/2d_hist_uneven.py +28 -0
- plothist/examples/2d_hist/2d_hist_with_projections.py +36 -0
- plothist/examples/2d_hist/README.rst +4 -0
- plothist/examples/README.rst +7 -0
- plothist/examples/advanced/1d_comparison_advanced.py +87 -0
- plothist/examples/advanced/1d_side_by_side_with_numbers.py +81 -0
- plothist/examples/advanced/README.rst +4 -0
- plothist/examples/advanced/asymmetry_comparison_advanced.py +133 -0
- plothist/examples/advanced/model_examples_flatten2D.py +86 -0
- plothist/examples/func_1d/README.rst +4 -0
- plothist/examples/func_1d/fct_1d.py +27 -0
- plothist/examples/func_1d/fct_1d_stacked.py +42 -0
- plothist/examples/model_ex/README.rst +4 -0
- plothist/examples/model_ex/model_all_comparisons.py +103 -0
- plothist/examples/model_ex/model_all_comparisons_no_model_unc.py +115 -0
- plothist/examples/model_ex/model_examples_pull.py +56 -0
- plothist/examples/model_ex/model_examples_pull_no_model_unc.py +59 -0
- plothist/examples/model_ex/model_examples_stacked.py +74 -0
- plothist/examples/model_ex/model_examples_stacked_unstacked.py +60 -0
- plothist/examples/model_ex/model_examples_unstacked.py +57 -0
- plothist/examples/model_ex/model_with_stacked_and_unstacked_function_components.py +50 -0
- plothist/examples/model_ex/model_with_stacked_and_unstacked_histograms_components.py +69 -0
- plothist/examples/model_ex/ratio_data_vs_model_with_stacked_and_unstacked_function_components.py +61 -0
- plothist/examples/utility/README.rst +4 -0
- plothist/examples/utility/add_text_example.py +39 -0
- plothist/examples/utility/color_palette_hists.py +94 -0
- plothist/examples/utility/color_palette_squares.py +100 -0
- plothist/examples/utility/matplotlib_vs_plothist_style.py +63 -0
- plothist/examples/utility/uncertainty_types.py +120 -0
- plothist/histogramming.py +60 -39
- plothist/plothist_style.py +56 -59
- plothist/plotters.py +210 -195
- plothist/test_helpers.py +43 -0
- plothist/variable_registry.py +46 -30
- {plothist-1.4.0.dist-info → plothist-1.6.0.dist-info}/METADATA +1 -1
- plothist-1.6.0.dist-info/RECORD +64 -0
- plothist/scripts/__init__.py +0 -3
- plothist/scripts/make_examples.py +0 -209
- plothist-1.4.0.dist-info/RECORD +0 -17
- plothist-1.4.0.dist-info/entry_points.txt +0 -2
- {plothist-1.4.0.dist-info → plothist-1.6.0.dist-info}/WHEEL +0 -0
- {plothist-1.4.0.dist-info → plothist-1.6.0.dist-info}/licenses/AUTHORS.md +0 -0
- {plothist-1.4.0.dist-info → plothist-1.6.0.dist-info}/licenses/LICENSE +0 -0
plothist/__init__.py
CHANGED
|
@@ -85,18 +85,19 @@ __all__ = [
|
|
|
85
85
|
]
|
|
86
86
|
|
|
87
87
|
|
|
88
|
-
|
|
88
|
+
from importlib import resources
|
|
89
89
|
from importlib.resources import files
|
|
90
90
|
|
|
91
|
+
import boost_histogram as bh
|
|
92
|
+
import matplotlib.font_manager as fm
|
|
91
93
|
import matplotlib.pyplot as plt
|
|
92
94
|
|
|
95
|
+
# Get style file and use it
|
|
96
|
+
|
|
93
97
|
style_file = files("plothist").joinpath("default_style.mplstyle")
|
|
94
98
|
plt.style.use(style_file)
|
|
95
99
|
|
|
96
100
|
# Install fonts
|
|
97
|
-
from importlib import resources
|
|
98
|
-
|
|
99
|
-
import matplotlib.font_manager as fm
|
|
100
101
|
|
|
101
102
|
with resources.as_file(resources.files("plothist_utils") / "fonts") as font_path:
|
|
102
103
|
font_files = fm.findSystemFonts(fontpaths=[str(font_path)])
|
|
@@ -104,7 +105,6 @@ with resources.as_file(resources.files("plothist_utils") / "fonts") as font_path
|
|
|
104
105
|
fm.fontManager.addfont(font)
|
|
105
106
|
|
|
106
107
|
# Check version of boost_histogram
|
|
107
|
-
import boost_histogram as bh
|
|
108
108
|
|
|
109
109
|
if tuple(int(part) for part in bh.__version__.split(".")) < (1, 4, 0):
|
|
110
110
|
raise ImportError(
|
plothist/_version.py
CHANGED
plothist/comparison.py
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import boost_histogram as bh
|
|
3
4
|
import numpy as np
|
|
4
5
|
from scipy import stats
|
|
5
6
|
|
|
6
7
|
from plothist.histogramming import _check_counting_histogram
|
|
7
8
|
|
|
8
9
|
|
|
9
|
-
def _check_uncertainty_type(uncertainty_type):
|
|
10
|
+
def _check_uncertainty_type(uncertainty_type: str) -> None:
|
|
10
11
|
"""
|
|
11
12
|
Check that the uncertainty type is valid.
|
|
12
13
|
|
|
@@ -21,19 +22,26 @@ def _check_uncertainty_type(uncertainty_type):
|
|
|
21
22
|
If the uncertainty type is not valid.
|
|
22
23
|
|
|
23
24
|
"""
|
|
24
|
-
|
|
25
|
+
_valid_uncertainty_types = [
|
|
26
|
+
"symmetrical",
|
|
27
|
+
"asymmetrical",
|
|
28
|
+
"asymmetrical_double_sided_zeros",
|
|
29
|
+
"asymmetrical_one_sided_zeros",
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
if uncertainty_type not in _valid_uncertainty_types:
|
|
25
33
|
raise ValueError(
|
|
26
|
-
f"Uncertainty type {uncertainty_type} not valid. Must be
|
|
34
|
+
f"Uncertainty type {uncertainty_type} not valid. Must be in {_valid_uncertainty_types}."
|
|
27
35
|
)
|
|
28
36
|
|
|
29
37
|
|
|
30
|
-
def _is_unweighted(hist):
|
|
38
|
+
def _is_unweighted(hist: bh.Histogram) -> bool:
|
|
31
39
|
"""
|
|
32
40
|
Check whether a histogram is unweighted.
|
|
33
41
|
|
|
34
42
|
Parameters
|
|
35
43
|
----------
|
|
36
|
-
hist :
|
|
44
|
+
hist : bh.Histogram
|
|
37
45
|
The histogram to check.
|
|
38
46
|
|
|
39
47
|
Returns
|
|
@@ -41,10 +49,13 @@ def _is_unweighted(hist):
|
|
|
41
49
|
bool
|
|
42
50
|
True if the histogram is unweighted, False otherwise.
|
|
43
51
|
"""
|
|
44
|
-
return np.allclose(hist.variances(), hist.values())
|
|
52
|
+
return np.allclose(hist.variances(), hist.values(), equal_nan=True)
|
|
45
53
|
|
|
46
54
|
|
|
47
|
-
def get_asymmetrical_uncertainties(
|
|
55
|
+
def get_asymmetrical_uncertainties(
|
|
56
|
+
hist: bh.Histogram,
|
|
57
|
+
uncertainty_type: str = "asymmetrical",
|
|
58
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
|
48
59
|
"""
|
|
49
60
|
Get Poisson asymmetrical uncertainties for a histogram via a frequentist approach based on a confidence-interval computation.
|
|
50
61
|
Asymmetrical uncertainties can only be computed for an unweighted histogram, because the bin contents of a weighted histogram do not follow a Poisson distribution.
|
|
@@ -52,14 +63,16 @@ def get_asymmetrical_uncertainties(hist):
|
|
|
52
63
|
|
|
53
64
|
Parameters
|
|
54
65
|
----------
|
|
55
|
-
hist :
|
|
66
|
+
hist : bh.Histogram
|
|
56
67
|
The histogram.
|
|
68
|
+
uncertainty_type : str, optional
|
|
69
|
+
The type of uncertainty to compute for bins with 0 entry. Default is "asymmetrical" (= "asymmetrical_one_sided_zeros"). Use "asymmetrical_double_sided_zeros" to have the double-sided definition. More information in :ref:`documentation-statistics-label`.
|
|
57
70
|
|
|
58
71
|
Returns
|
|
59
72
|
-------
|
|
60
|
-
uncertainties_low :
|
|
73
|
+
uncertainties_low : np.ndarray
|
|
61
74
|
The lower uncertainties.
|
|
62
|
-
uncertainties_high :
|
|
75
|
+
uncertainties_high : np.ndarray
|
|
63
76
|
The upper uncertainties.
|
|
64
77
|
|
|
65
78
|
Raise
|
|
@@ -69,27 +82,55 @@ def get_asymmetrical_uncertainties(hist):
|
|
|
69
82
|
|
|
70
83
|
"""
|
|
71
84
|
_check_counting_histogram(hist)
|
|
85
|
+
_check_uncertainty_type(uncertainty_type)
|
|
72
86
|
|
|
73
87
|
if not _is_unweighted(hist):
|
|
74
88
|
raise ValueError(
|
|
75
89
|
"Asymmetrical uncertainties can only be computed for an unweighted histogram."
|
|
76
90
|
)
|
|
77
|
-
|
|
78
|
-
alpha = 1.0 -
|
|
91
|
+
|
|
92
|
+
alpha = 1.0 - 0.682689492
|
|
93
|
+
tail_probability = alpha / 2
|
|
94
|
+
|
|
79
95
|
n = hist.values()
|
|
80
|
-
|
|
81
|
-
|
|
96
|
+
|
|
97
|
+
lower_bound = np.zeros_like(n, dtype=float)
|
|
98
|
+
upper_bound = np.zeros_like(n, dtype=float)
|
|
99
|
+
|
|
100
|
+
# Two-sided Garwood intervals for n > 0
|
|
101
|
+
lower_bound[n > 0] = stats.gamma.ppf(q=tail_probability, a=n[n > 0], scale=1)
|
|
102
|
+
upper_bound[n > 0] = stats.gamma.ppf(
|
|
103
|
+
q=1 - tail_probability, a=n[n > 0] + 1, scale=1
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
if uncertainty_type == "asymmetrical_double_sided_zeros":
|
|
107
|
+
# Two-sided Garwood intervals for n == 0
|
|
108
|
+
upper_bound[n == 0] = stats.gamma.ppf(q=1 - tail_probability, a=1, scale=1)
|
|
109
|
+
elif uncertainty_type in ["asymmetrical_one_sided_zeros", "asymmetrical"]:
|
|
110
|
+
# One-sided upper limit for n == 0
|
|
111
|
+
upper_bound[n == 0] = stats.gamma.ppf(q=1 - 2 * tail_probability, a=1, scale=1)
|
|
112
|
+
else:
|
|
113
|
+
raise ValueError(
|
|
114
|
+
f"Invalid uncertainty type '{uncertainty_type}' for asymmetrical uncertainties."
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# Compute asymmetric uncertainties
|
|
118
|
+
uncertainties_low = n - lower_bound
|
|
119
|
+
uncertainties_high = upper_bound - n
|
|
120
|
+
|
|
121
|
+
uncertainties_low = np.nan_to_num(uncertainties_low, nan=0.0)
|
|
122
|
+
uncertainties_high = np.nan_to_num(uncertainties_high, nan=0.0)
|
|
82
123
|
|
|
83
124
|
return uncertainties_low, uncertainties_high
|
|
84
125
|
|
|
85
126
|
|
|
86
|
-
def _check_binning_consistency(hist_list):
|
|
127
|
+
def _check_binning_consistency(hist_list: list[bh.Histogram]) -> None:
|
|
87
128
|
"""
|
|
88
129
|
Check that all the histograms in the provided list share the same definition of their bins.
|
|
89
130
|
|
|
90
131
|
Parameters
|
|
91
132
|
----------
|
|
92
|
-
hist_list : list
|
|
133
|
+
hist_list : list[bh.Histogram]
|
|
93
134
|
The list of histograms to check.
|
|
94
135
|
|
|
95
136
|
Raise
|
|
@@ -106,15 +147,15 @@ def _check_binning_consistency(hist_list):
|
|
|
106
147
|
raise ValueError("The bins of the histograms must be equal.")
|
|
107
148
|
|
|
108
149
|
|
|
109
|
-
def get_ratio_variances(h1, h2):
|
|
150
|
+
def get_ratio_variances(h1: bh.Histogram, h2: bh.Histogram) -> np.ndarray:
|
|
110
151
|
"""
|
|
111
152
|
Calculate the variances of the ratio of two uncorrelated histograms (h1/h2).
|
|
112
153
|
|
|
113
154
|
Parameters
|
|
114
155
|
----------
|
|
115
|
-
h1 :
|
|
156
|
+
h1 : bh.Histogram
|
|
116
157
|
The first histogram.
|
|
117
|
-
h2 :
|
|
158
|
+
h2 : bh.Histogram
|
|
118
159
|
The second histogram.
|
|
119
160
|
|
|
120
161
|
Returns
|
|
@@ -131,38 +172,39 @@ def get_ratio_variances(h1, h2):
|
|
|
131
172
|
_check_counting_histogram(h1)
|
|
132
173
|
_check_counting_histogram(h2)
|
|
133
174
|
|
|
134
|
-
np.
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
np.seterr(divide="warn", invalid="warn")
|
|
142
|
-
|
|
143
|
-
return ratio_variances
|
|
175
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
176
|
+
return np.where(
|
|
177
|
+
h2.values() != 0,
|
|
178
|
+
h1.variances() / h2.values() ** 2
|
|
179
|
+
+ h2.variances() * h1.values() ** 2 / h2.values() ** 4,
|
|
180
|
+
np.nan,
|
|
181
|
+
)
|
|
144
182
|
|
|
145
183
|
|
|
146
|
-
def get_pull(
|
|
184
|
+
def get_pull(
|
|
185
|
+
h1: bh.Histogram,
|
|
186
|
+
h2: bh.Histogram,
|
|
187
|
+
h1_uncertainty_type: str = "symmetrical",
|
|
188
|
+
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
|
147
189
|
"""
|
|
148
190
|
Compute the pull between two histograms.
|
|
149
191
|
|
|
150
192
|
Parameters
|
|
151
193
|
----------
|
|
152
|
-
h1 :
|
|
194
|
+
h1 : bh.Histogram
|
|
153
195
|
The first histogram.
|
|
154
|
-
h2 :
|
|
196
|
+
h2 : bh.Histogram
|
|
155
197
|
The second histogram.
|
|
156
198
|
h1_uncertainty_type : str, optional
|
|
157
199
|
What kind of bin uncertainty to use for h1: "symmetrical" for the Poisson standard deviation derived from the variance stored in the histogram object, "asymmetrical" for asymmetrical uncertainties based on a Poisson confidence interval. Default is "symmetrical".
|
|
158
200
|
|
|
159
201
|
Returns
|
|
160
202
|
-------
|
|
161
|
-
pull_values :
|
|
203
|
+
pull_values : np.ndarray
|
|
162
204
|
The pull values.
|
|
163
|
-
pull_uncertainties_low :
|
|
205
|
+
pull_uncertainties_low : np.ndarray
|
|
164
206
|
The lower uncertainties on the pull. Always ones.
|
|
165
|
-
pull_uncertainties_high :
|
|
207
|
+
pull_uncertainties_high : np.ndarray
|
|
166
208
|
The upper uncertainties on the pull. Always ones.
|
|
167
209
|
"""
|
|
168
210
|
_check_uncertainty_type(h1_uncertainty_type)
|
|
@@ -170,8 +212,10 @@ def get_pull(h1, h2, h1_uncertainty_type="symmetrical"):
|
|
|
170
212
|
_check_counting_histogram(h1)
|
|
171
213
|
_check_counting_histogram(h2)
|
|
172
214
|
|
|
173
|
-
if
|
|
174
|
-
uncertainties_low, uncertainties_high = get_asymmetrical_uncertainties(
|
|
215
|
+
if "asymmetrical" in h1_uncertainty_type:
|
|
216
|
+
uncertainties_low, uncertainties_high = get_asymmetrical_uncertainties(
|
|
217
|
+
h1, h1_uncertainty_type
|
|
218
|
+
)
|
|
175
219
|
h1_variances = np.where(
|
|
176
220
|
h1.values() >= h2.values(),
|
|
177
221
|
uncertainties_low**2,
|
|
@@ -195,26 +239,30 @@ def get_pull(h1, h2, h1_uncertainty_type="symmetrical"):
|
|
|
195
239
|
)
|
|
196
240
|
|
|
197
241
|
|
|
198
|
-
def get_difference(
|
|
242
|
+
def get_difference(
|
|
243
|
+
h1: bh.Histogram,
|
|
244
|
+
h2: bh.Histogram,
|
|
245
|
+
h1_uncertainty_type: str = "symmetrical",
|
|
246
|
+
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
|
199
247
|
"""
|
|
200
248
|
Compute the difference between two histograms.
|
|
201
249
|
|
|
202
250
|
Parameters
|
|
203
251
|
----------
|
|
204
|
-
h1 :
|
|
252
|
+
h1 : bh.Histogram
|
|
205
253
|
The first histogram.
|
|
206
|
-
h2 :
|
|
254
|
+
h2 : bh.Histogram
|
|
207
255
|
The second histogram.
|
|
208
256
|
h1_uncertainty_type : str, optional
|
|
209
257
|
What kind of bin uncertainty to use for h1: "symmetrical" for the Poisson standard deviation derived from the variance stored in the histogram object, "asymmetrical" for asymmetrical uncertainties based on a Poisson confidence interval. Default is "symmetrical".
|
|
210
258
|
|
|
211
259
|
Returns
|
|
212
260
|
-------
|
|
213
|
-
difference_values :
|
|
261
|
+
difference_values : np.ndarray
|
|
214
262
|
The difference values.
|
|
215
|
-
difference_uncertainties_low :
|
|
263
|
+
difference_uncertainties_low : np.ndarray
|
|
216
264
|
The lower uncertainties on the difference.
|
|
217
|
-
difference_uncertainties_high :
|
|
265
|
+
difference_uncertainties_high : np.ndarray
|
|
218
266
|
The upper uncertainties on the difference.
|
|
219
267
|
"""
|
|
220
268
|
_check_uncertainty_type(h1_uncertainty_type)
|
|
@@ -224,8 +272,10 @@ def get_difference(h1, h2, h1_uncertainty_type="symmetrical"):
|
|
|
224
272
|
|
|
225
273
|
difference_values = h1.values() - h2.values()
|
|
226
274
|
|
|
227
|
-
if
|
|
228
|
-
uncertainties_low, uncertainties_high = get_asymmetrical_uncertainties(
|
|
275
|
+
if "asymmetrical" in h1_uncertainty_type:
|
|
276
|
+
uncertainties_low, uncertainties_high = get_asymmetrical_uncertainties(
|
|
277
|
+
h1, h1_uncertainty_type
|
|
278
|
+
)
|
|
229
279
|
|
|
230
280
|
difference_uncertainties_low = np.sqrt(uncertainties_low**2 + h2.variances())
|
|
231
281
|
difference_uncertainties_high = np.sqrt(uncertainties_high**2 + h2.variances())
|
|
@@ -240,7 +290,7 @@ def get_difference(h1, h2, h1_uncertainty_type="symmetrical"):
|
|
|
240
290
|
)
|
|
241
291
|
|
|
242
292
|
|
|
243
|
-
def
|
|
293
|
+
def get_efficiency(h1: bh.Histogram, h2: bh.Histogram) -> tuple[np.ndarray, np.ndarray]:
|
|
244
294
|
"""
|
|
245
295
|
Calculate the ratio of two correlated histograms (h1/h2), in which the entries of h1 are a subsample of the entries of h2.
|
|
246
296
|
The variances are calculated according to the formula given in :ref:`documentation-statistics-label`.
|
|
@@ -253,16 +303,16 @@ def get_efficency(h1, h2):
|
|
|
253
303
|
|
|
254
304
|
Parameters
|
|
255
305
|
----------
|
|
256
|
-
h1 :
|
|
306
|
+
h1 : bh.Histogram
|
|
257
307
|
The first histogram.
|
|
258
|
-
h2 :
|
|
308
|
+
h2 : bh.Histogram
|
|
259
309
|
The second histogram.
|
|
260
310
|
|
|
261
311
|
Returns
|
|
262
312
|
-------
|
|
263
|
-
efficiency_values :
|
|
313
|
+
efficiency_values : np.ndarray
|
|
264
314
|
The efficiency values.
|
|
265
|
-
efficiency_uncertainties :
|
|
315
|
+
efficiency_uncertainties : np.ndarray
|
|
266
316
|
The uncertainties on the efficiency values.
|
|
267
317
|
|
|
268
318
|
Raises
|
|
@@ -303,23 +353,23 @@ def get_efficency(h1, h2):
|
|
|
303
353
|
return efficiency_values, np.sqrt(efficiency_variances)
|
|
304
354
|
|
|
305
355
|
|
|
306
|
-
def get_asymmetry(h1, h2):
|
|
356
|
+
def get_asymmetry(h1: bh.Histogram, h2: bh.Histogram) -> tuple[np.ndarray, np.ndarray]:
|
|
307
357
|
"""
|
|
308
358
|
Get the asymmetry between two histograms h1 and h2, defined as (h1 - h2) / (h1 + h2).
|
|
309
359
|
Only symmetrical uncertainties are supported.
|
|
310
360
|
|
|
311
361
|
Parameters
|
|
312
362
|
----------
|
|
313
|
-
h1 :
|
|
363
|
+
h1 : bh.Histogram
|
|
314
364
|
The first histogram.
|
|
315
|
-
h2 :
|
|
365
|
+
h2 : bh.Histogram
|
|
316
366
|
The second histogram.
|
|
317
367
|
|
|
318
368
|
Returns
|
|
319
369
|
-------
|
|
320
|
-
asymmetry_values :
|
|
370
|
+
asymmetry_values : np.ndarray
|
|
321
371
|
The asymmetry values.
|
|
322
|
-
asymmetry_uncertainties :
|
|
372
|
+
asymmetry_uncertainties : np.ndarray
|
|
323
373
|
The uncertainties on the asymmetry.
|
|
324
374
|
"""
|
|
325
375
|
_check_binning_consistency([h1, h2])
|
|
@@ -339,19 +389,19 @@ def get_asymmetry(h1, h2):
|
|
|
339
389
|
|
|
340
390
|
|
|
341
391
|
def get_ratio(
|
|
342
|
-
h1,
|
|
343
|
-
h2,
|
|
344
|
-
h1_uncertainty_type="symmetrical",
|
|
345
|
-
ratio_uncertainty_type="uncorrelated",
|
|
346
|
-
):
|
|
392
|
+
h1: bh.Histogram,
|
|
393
|
+
h2: bh.Histogram,
|
|
394
|
+
h1_uncertainty_type: str = "symmetrical",
|
|
395
|
+
ratio_uncertainty_type: str = "uncorrelated",
|
|
396
|
+
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
|
347
397
|
"""
|
|
348
398
|
Compute the ratio h1/h2 between two uncorrelated histograms h1 and h2.
|
|
349
399
|
|
|
350
400
|
Parameters
|
|
351
401
|
----------
|
|
352
|
-
h1 :
|
|
402
|
+
h1 : bh.Histogram
|
|
353
403
|
The numerator histogram.
|
|
354
|
-
h2 :
|
|
404
|
+
h2 : bh.Histogram
|
|
355
405
|
The denominator histogram.
|
|
356
406
|
h1_uncertainty_type : str, optional
|
|
357
407
|
What kind of bin uncertainty to use for h1: "symmetrical" for the Poisson standard deviation derived from the variance stored in the histogram object, "asymmetrical" for asymmetrical uncertainties based on a Poisson confidence interval. Default is "symmetrical".
|
|
@@ -363,11 +413,11 @@ def get_ratio(
|
|
|
363
413
|
|
|
364
414
|
Returns
|
|
365
415
|
-------
|
|
366
|
-
ratio_values :
|
|
416
|
+
ratio_values : np.ndarray
|
|
367
417
|
The ratio values.
|
|
368
|
-
ratio_uncertainties_low :
|
|
418
|
+
ratio_uncertainties_low : np.ndarray
|
|
369
419
|
The lower uncertainties on the ratio.
|
|
370
|
-
ratio_uncertainties_high :
|
|
420
|
+
ratio_uncertainties_high : np.ndarray
|
|
371
421
|
The upper uncertainties on the ratio.
|
|
372
422
|
|
|
373
423
|
Raises
|
|
@@ -382,11 +432,13 @@ def get_ratio(
|
|
|
382
432
|
|
|
383
433
|
ratio_values = np.where(h2.values() != 0, h1.values() / h2.values(), np.nan)
|
|
384
434
|
|
|
385
|
-
if
|
|
386
|
-
uncertainties_low, uncertainties_high = get_asymmetrical_uncertainties(
|
|
435
|
+
if "asymmetrical" in h1_uncertainty_type:
|
|
436
|
+
uncertainties_low, uncertainties_high = get_asymmetrical_uncertainties(
|
|
437
|
+
h1, h1_uncertainty_type
|
|
438
|
+
)
|
|
387
439
|
|
|
388
440
|
if ratio_uncertainty_type == "uncorrelated":
|
|
389
|
-
if
|
|
441
|
+
if "asymmetrical" in h1_uncertainty_type:
|
|
390
442
|
h1_high = h1.copy()
|
|
391
443
|
h1_high[:] = np.c_[h1_high.values(), uncertainties_high**2]
|
|
392
444
|
h1_low = h1.copy()
|
|
@@ -397,7 +449,7 @@ def get_ratio(
|
|
|
397
449
|
ratio_uncertainties_low = np.sqrt(get_ratio_variances(h1, h2))
|
|
398
450
|
ratio_uncertainties_high = ratio_uncertainties_low
|
|
399
451
|
elif ratio_uncertainty_type == "split":
|
|
400
|
-
if
|
|
452
|
+
if "asymmetrical" in h1_uncertainty_type:
|
|
401
453
|
ratio_uncertainties_low = uncertainties_low / h2.values()
|
|
402
454
|
ratio_uncertainties_high = uncertainties_high / h2.values()
|
|
403
455
|
else:
|
|
@@ -419,19 +471,19 @@ def get_ratio(
|
|
|
419
471
|
|
|
420
472
|
|
|
421
473
|
def get_comparison(
|
|
422
|
-
h1,
|
|
423
|
-
h2,
|
|
424
|
-
comparison,
|
|
425
|
-
h1_uncertainty_type="symmetrical",
|
|
426
|
-
):
|
|
474
|
+
h1: bh.Histogram,
|
|
475
|
+
h2: bh.Histogram,
|
|
476
|
+
comparison: str,
|
|
477
|
+
h1_uncertainty_type: str = "symmetrical",
|
|
478
|
+
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
|
427
479
|
"""
|
|
428
480
|
Compute the comparison between two histograms.
|
|
429
481
|
|
|
430
482
|
Parameters
|
|
431
483
|
----------
|
|
432
|
-
h1 :
|
|
484
|
+
h1 : bh.Histogram
|
|
433
485
|
The first histogram for comparison.
|
|
434
|
-
h2 :
|
|
486
|
+
h2 : bh.Histogram
|
|
435
487
|
The second histogram for comparison.
|
|
436
488
|
comparison : str
|
|
437
489
|
The type of comparison ("ratio", "split_ratio", "pull", "difference", "relative_difference", "efficiency", or "asymmetry").
|
|
@@ -443,11 +495,11 @@ def get_comparison(
|
|
|
443
495
|
|
|
444
496
|
Returns
|
|
445
497
|
-------
|
|
446
|
-
values :
|
|
498
|
+
values : np.ndarray
|
|
447
499
|
The comparison values.
|
|
448
|
-
lower_uncertainties :
|
|
500
|
+
lower_uncertainties : np.ndarray
|
|
449
501
|
The lower uncertainties on the comparison values.
|
|
450
|
-
upper_uncertainties :
|
|
502
|
+
upper_uncertainties : np.ndarray
|
|
451
503
|
The upper uncertainties on the comparison values.
|
|
452
504
|
|
|
453
505
|
Raises
|
|
@@ -462,49 +514,47 @@ def get_comparison(
|
|
|
462
514
|
_check_counting_histogram(h1)
|
|
463
515
|
_check_counting_histogram(h2)
|
|
464
516
|
|
|
465
|
-
np.
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
h1, h2, h1_uncertainty_type, "uncorrelated"
|
|
470
|
-
)
|
|
471
|
-
elif comparison == "split_ratio":
|
|
472
|
-
values, lower_uncertainties, upper_uncertainties = get_ratio(
|
|
473
|
-
h1, h2, h1_uncertainty_type, "split"
|
|
474
|
-
)
|
|
475
|
-
elif comparison == "relative_difference":
|
|
476
|
-
values, lower_uncertainties, upper_uncertainties = get_ratio(
|
|
477
|
-
h1, h2, h1_uncertainty_type, "uncorrelated"
|
|
478
|
-
)
|
|
479
|
-
values -= 1 # relative difference is ratio-1
|
|
480
|
-
elif comparison == "pull":
|
|
481
|
-
values, lower_uncertainties, upper_uncertainties = get_pull(
|
|
482
|
-
h1, h2, h1_uncertainty_type
|
|
483
|
-
)
|
|
484
|
-
elif comparison == "difference":
|
|
485
|
-
values, lower_uncertainties, upper_uncertainties = get_difference(
|
|
486
|
-
h1, h2, h1_uncertainty_type
|
|
487
|
-
)
|
|
488
|
-
elif comparison == "asymmetry":
|
|
489
|
-
if h1_uncertainty_type == "asymmetrical":
|
|
490
|
-
raise ValueError(
|
|
491
|
-
"Asymmetrical uncertainties are not supported for the asymmetry comparison."
|
|
517
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
518
|
+
if comparison == "ratio":
|
|
519
|
+
values, lower_uncertainties, upper_uncertainties = get_ratio(
|
|
520
|
+
h1, h2, h1_uncertainty_type, "uncorrelated"
|
|
492
521
|
)
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
522
|
+
elif comparison == "split_ratio":
|
|
523
|
+
values, lower_uncertainties, upper_uncertainties = get_ratio(
|
|
524
|
+
h1, h2, h1_uncertainty_type, "split"
|
|
525
|
+
)
|
|
526
|
+
elif comparison == "relative_difference":
|
|
527
|
+
values, lower_uncertainties, upper_uncertainties = get_ratio(
|
|
528
|
+
h1, h2, h1_uncertainty_type, "uncorrelated"
|
|
529
|
+
)
|
|
530
|
+
values -= 1 # relative difference is ratio-1
|
|
531
|
+
elif comparison == "pull":
|
|
532
|
+
values, lower_uncertainties, upper_uncertainties = get_pull(
|
|
533
|
+
h1, h2, h1_uncertainty_type
|
|
534
|
+
)
|
|
535
|
+
elif comparison == "difference":
|
|
536
|
+
values, lower_uncertainties, upper_uncertainties = get_difference(
|
|
537
|
+
h1, h2, h1_uncertainty_type
|
|
538
|
+
)
|
|
539
|
+
elif comparison == "asymmetry":
|
|
540
|
+
if "asymmetrical" in h1_uncertainty_type:
|
|
541
|
+
raise ValueError(
|
|
542
|
+
"Asymmetrical uncertainties are not supported for the asymmetry comparison."
|
|
543
|
+
)
|
|
544
|
+
values, uncertainties = get_asymmetry(h1, h2)
|
|
545
|
+
lower_uncertainties = uncertainties
|
|
546
|
+
upper_uncertainties = uncertainties
|
|
547
|
+
elif comparison == "efficiency":
|
|
548
|
+
if "asymmetrical" in h1_uncertainty_type:
|
|
549
|
+
raise ValueError(
|
|
550
|
+
"Asymmetrical uncertainties are not supported in an efficiency computation."
|
|
551
|
+
)
|
|
552
|
+
values, uncertainties = get_efficiency(h1, h2)
|
|
553
|
+
lower_uncertainties = uncertainties
|
|
554
|
+
upper_uncertainties = uncertainties
|
|
555
|
+
else:
|
|
498
556
|
raise ValueError(
|
|
499
|
-
"
|
|
557
|
+
f"{comparison} not available as a comparison ('ratio', 'split_ratio', 'pull', 'difference', 'relative_difference', 'asymmetry' or 'efficiency')."
|
|
500
558
|
)
|
|
501
|
-
values, uncertainties = get_efficency(h1, h2)
|
|
502
|
-
lower_uncertainties = uncertainties
|
|
503
|
-
upper_uncertainties = uncertainties
|
|
504
|
-
else:
|
|
505
|
-
raise ValueError(
|
|
506
|
-
f"{comparison} not available as a comparison ('ratio', 'split_ratio', 'pull', 'difference', 'relative_difference', 'asymmetry' or 'efficiency')."
|
|
507
|
-
)
|
|
508
|
-
np.seterr(divide="warn", invalid="warn")
|
|
509
559
|
|
|
510
560
|
return values, lower_uncertainties, upper_uncertainties
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Asymmetry
|
|
3
|
+
=========
|
|
4
|
+
|
|
5
|
+
Compare two 1D histograms using the asymmetry comparison [(h1-h2)/(h1+h2)].
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from plothist_utils import get_dummy_data
|
|
9
|
+
|
|
10
|
+
df = get_dummy_data()
|
|
11
|
+
|
|
12
|
+
name = "variable_1"
|
|
13
|
+
|
|
14
|
+
x1 = df[name][df["category"] == 2]
|
|
15
|
+
x2 = df[name][df["category"] == 3]
|
|
16
|
+
|
|
17
|
+
x_range = (min(*x1, *x2), max(*x1, *x2))
|
|
18
|
+
|
|
19
|
+
from plothist import make_hist
|
|
20
|
+
|
|
21
|
+
h1 = make_hist(x1, bins=50, range=x_range)
|
|
22
|
+
h2 = make_hist(x2, bins=50, range=x_range)
|
|
23
|
+
|
|
24
|
+
###
|
|
25
|
+
from plothist import plot_two_hist_comparison
|
|
26
|
+
|
|
27
|
+
fig, ax_main, ax_comparison = plot_two_hist_comparison(
|
|
28
|
+
h1,
|
|
29
|
+
h2,
|
|
30
|
+
xlabel=name,
|
|
31
|
+
ylabel="Entries",
|
|
32
|
+
h1_label=r"$\mathbfit{h}_1$",
|
|
33
|
+
h2_label=r"$\mathbfit{h}_2$",
|
|
34
|
+
comparison="asymmetry", # <--
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
fig.savefig("1d_comparison_asymmetry.svg", bbox_inches="tight")
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Difference
|
|
3
|
+
==========
|
|
4
|
+
|
|
5
|
+
Compare two 1D histograms using the difference [h1-h2].
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from plothist_utils import get_dummy_data
|
|
9
|
+
|
|
10
|
+
df = get_dummy_data()
|
|
11
|
+
|
|
12
|
+
name = "variable_1"
|
|
13
|
+
|
|
14
|
+
x1 = df[name][df["category"] == 2]
|
|
15
|
+
x2 = df[name][df["category"] == 3]
|
|
16
|
+
|
|
17
|
+
x_range = (min(*x1, *x2), max(*x1, *x2))
|
|
18
|
+
|
|
19
|
+
from plothist import make_hist
|
|
20
|
+
|
|
21
|
+
h1 = make_hist(x1, bins=50, range=x_range)
|
|
22
|
+
h2 = make_hist(x2, bins=50, range=x_range)
|
|
23
|
+
|
|
24
|
+
###
|
|
25
|
+
from plothist import add_text, plot_two_hist_comparison
|
|
26
|
+
|
|
27
|
+
fig, ax_main, ax_comparison = plot_two_hist_comparison(
|
|
28
|
+
h1,
|
|
29
|
+
h2,
|
|
30
|
+
xlabel=name,
|
|
31
|
+
ylabel="Entries",
|
|
32
|
+
h1_label=r"$\mathcal{H}_{1}$",
|
|
33
|
+
h2_label=r"$\mathcal{H}_{2}$",
|
|
34
|
+
comparison="difference", # <--
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
add_text("Comparison of two hist with difference plot", ax=ax_main)
|
|
38
|
+
add_text("Difference ax", x="right", ax=ax_comparison)
|
|
39
|
+
|
|
40
|
+
fig.savefig("1d_comparison_difference.svg", bbox_inches="tight")
|