lets-plot 4.8.1rc1__cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lets_plot/__init__.py +382 -0
- lets_plot/_global_settings.py +192 -0
- lets_plot/_kbridge.py +197 -0
- lets_plot/_type_utils.py +133 -0
- lets_plot/_version.py +6 -0
- lets_plot/bistro/__init__.py +16 -0
- lets_plot/bistro/_plot2d_common.py +106 -0
- lets_plot/bistro/corr.py +448 -0
- lets_plot/bistro/im.py +196 -0
- lets_plot/bistro/joint.py +192 -0
- lets_plot/bistro/qq.py +207 -0
- lets_plot/bistro/residual.py +341 -0
- lets_plot/bistro/waterfall.py +332 -0
- lets_plot/export/__init__.py +6 -0
- lets_plot/export/ggsave_.py +172 -0
- lets_plot/frontend_context/__init__.py +8 -0
- lets_plot/frontend_context/_configuration.py +140 -0
- lets_plot/frontend_context/_dynamic_configure_html.py +115 -0
- lets_plot/frontend_context/_frontend_ctx.py +16 -0
- lets_plot/frontend_context/_html_contexts.py +223 -0
- lets_plot/frontend_context/_intellij_python_json_ctx.py +38 -0
- lets_plot/frontend_context/_isolated_webview_panel_ctx.py +81 -0
- lets_plot/frontend_context/_json_contexts.py +39 -0
- lets_plot/frontend_context/_jupyter_notebook_ctx.py +82 -0
- lets_plot/frontend_context/_mime_types.py +7 -0
- lets_plot/frontend_context/_static_html_page_ctx.py +76 -0
- lets_plot/frontend_context/_static_svg_ctx.py +26 -0
- lets_plot/frontend_context/_webbr_html_page_ctx.py +29 -0
- lets_plot/frontend_context/sandbox.py +5 -0
- lets_plot/geo_data/__init__.py +19 -0
- lets_plot/geo_data/core.py +335 -0
- lets_plot/geo_data/geocoder.py +988 -0
- lets_plot/geo_data/geocodes.py +512 -0
- lets_plot/geo_data/gis/__init__.py +0 -0
- lets_plot/geo_data/gis/fluent_dict.py +201 -0
- lets_plot/geo_data/gis/geocoding_service.py +42 -0
- lets_plot/geo_data/gis/geometry.py +91 -0
- lets_plot/geo_data/gis/json_request.py +232 -0
- lets_plot/geo_data/gis/json_response.py +308 -0
- lets_plot/geo_data/gis/request.py +492 -0
- lets_plot/geo_data/gis/response.py +247 -0
- lets_plot/geo_data/livemap_helper.py +65 -0
- lets_plot/geo_data/to_geo_data_frame.py +141 -0
- lets_plot/geo_data/type_assertion.py +34 -0
- lets_plot/geo_data_internals/__init__.py +4 -0
- lets_plot/geo_data_internals/constants.py +13 -0
- lets_plot/geo_data_internals/utils.py +33 -0
- lets_plot/mapping.py +115 -0
- lets_plot/package_data/lets-plot.min.js +3 -0
- lets_plot/plot/__init__.py +64 -0
- lets_plot/plot/_global_theme.py +14 -0
- lets_plot/plot/annotation.py +290 -0
- lets_plot/plot/coord.py +242 -0
- lets_plot/plot/core.py +1071 -0
- lets_plot/plot/expand_limits_.py +78 -0
- lets_plot/plot/facet.py +210 -0
- lets_plot/plot/font_features.py +71 -0
- lets_plot/plot/geom.py +9146 -0
- lets_plot/plot/geom_extras.py +53 -0
- lets_plot/plot/geom_function_.py +219 -0
- lets_plot/plot/geom_imshow_.py +393 -0
- lets_plot/plot/geom_livemap_.py +343 -0
- lets_plot/plot/ggbunch_.py +96 -0
- lets_plot/plot/gggrid_.py +139 -0
- lets_plot/plot/ggtb_.py +81 -0
- lets_plot/plot/guide.py +231 -0
- lets_plot/plot/label.py +187 -0
- lets_plot/plot/marginal_layer.py +181 -0
- lets_plot/plot/plot.py +245 -0
- lets_plot/plot/pos.py +344 -0
- lets_plot/plot/sampling.py +338 -0
- lets_plot/plot/sandbox_.py +26 -0
- lets_plot/plot/scale.py +3580 -0
- lets_plot/plot/scale_colormap_mpl.py +300 -0
- lets_plot/plot/scale_convenience.py +155 -0
- lets_plot/plot/scale_identity_.py +653 -0
- lets_plot/plot/scale_position.py +1342 -0
- lets_plot/plot/series_meta.py +209 -0
- lets_plot/plot/stat.py +585 -0
- lets_plot/plot/subplots.py +331 -0
- lets_plot/plot/subplots_util.py +24 -0
- lets_plot/plot/theme_.py +790 -0
- lets_plot/plot/theme_set.py +418 -0
- lets_plot/plot/tooltip.py +486 -0
- lets_plot/plot/util.py +267 -0
- lets_plot/settings_utils.py +244 -0
- lets_plot/tilesets.py +429 -0
- lets_plot-4.8.1rc1.dist-info/METADATA +221 -0
- lets_plot-4.8.1rc1.dist-info/RECORD +97 -0
- lets_plot-4.8.1rc1.dist-info/WHEEL +6 -0
- lets_plot-4.8.1rc1.dist-info/licenses/LICENSE +21 -0
- lets_plot-4.8.1rc1.dist-info/licenses/licenses/LICENSE.FreeType +166 -0
- lets_plot-4.8.1rc1.dist-info/licenses/licenses/LICENSE.ImageMagick +106 -0
- lets_plot-4.8.1rc1.dist-info/licenses/licenses/LICENSE.expat +21 -0
- lets_plot-4.8.1rc1.dist-info/licenses/licenses/LICENSE.fontconfig +200 -0
- lets_plot-4.8.1rc1.dist-info/top_level.txt +2 -0
- lets_plot_kotlin_bridge.cpython-311-x86_64-linux-gnu.so +0 -0
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2022. JetBrains s.r.o.
|
|
3
|
+
# Use of this source code is governed by the MIT license that can be found in the LICENSE file.
|
|
4
|
+
#
|
|
5
|
+
try:
|
|
6
|
+
import numpy as np
|
|
7
|
+
except ImportError:
|
|
8
|
+
np = None
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
import pandas as pd
|
|
12
|
+
except ImportError:
|
|
13
|
+
pd = None
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
import polars as pl
|
|
17
|
+
except ImportError:
|
|
18
|
+
pl = None
|
|
19
|
+
|
|
20
|
+
from ._plot2d_common import _get_bin_params_2d, _get_geom2d_layer, _get_marginal_layers
|
|
21
|
+
from ..plot.core import DummySpec, aes
|
|
22
|
+
from ..plot.geom import geom_hline
|
|
23
|
+
from ..plot.label import ylab
|
|
24
|
+
from ..plot.plot import ggplot
|
|
25
|
+
from ..plot.theme_ import *
|
|
26
|
+
|
|
27
|
+
__all__ = ['residual_plot']
|
|
28
|
+
|
|
29
|
+
_METHOD_DEF = 'lm'
|
|
30
|
+
_METHOD_LM_DEG_DEF = 1
|
|
31
|
+
_METHOD_LOESS_SPAN_DEF = .5
|
|
32
|
+
_GEOM_DEF = 'point'
|
|
33
|
+
_MARGINAL_DEF = "dens:r"
|
|
34
|
+
_HLINE_DEF = True
|
|
35
|
+
|
|
36
|
+
_HLINE_COLOR = "magenta"
|
|
37
|
+
_HLINE_LINETYPE = 'dashed'
|
|
38
|
+
_RESIDUAL_COL = "..residual.."
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _extract_data_series(df, x, y):
|
|
42
|
+
xs = np.array(df[x])
|
|
43
|
+
ys = np.array(df[y])
|
|
44
|
+
if xs.size != ys.size:
|
|
45
|
+
raise Exception("All data series in dataset must have equal size "
|
|
46
|
+
"{x_col} : {x_len} {y_col} : {y_len}".format(
|
|
47
|
+
x_col=x,
|
|
48
|
+
y_col=y,
|
|
49
|
+
x_len=xs.size,
|
|
50
|
+
y_len=ys.size
|
|
51
|
+
))
|
|
52
|
+
if xs.size == 1:
|
|
53
|
+
raise Exception("Data should have at least two points.")
|
|
54
|
+
|
|
55
|
+
return xs, ys
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _poly_transform(deg):
|
|
59
|
+
def _transform(X):
|
|
60
|
+
assert len(X.shape) > 1 and X.shape[1] == 1
|
|
61
|
+
return np.concatenate([np.power(X, d) for d in range(deg + 1)], axis=1).astype(float)
|
|
62
|
+
|
|
63
|
+
return _transform
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _get_lm_predictor(xs_train, ys_train, deg):
|
|
67
|
+
import statsmodels.api as sm
|
|
68
|
+
|
|
69
|
+
X_train = xs_train.reshape(-1, 1)
|
|
70
|
+
transform = _poly_transform(deg)
|
|
71
|
+
model = sm.OLS(ys_train, transform(X_train)).fit()
|
|
72
|
+
|
|
73
|
+
return lambda xs: model.predict(transform(xs.reshape(-1, 1)))
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _get_loess_predictor(xs_train, ys_train, span, seed, max_n):
|
|
77
|
+
import statsmodels.api as sm
|
|
78
|
+
from scipy.interpolate import interp1d
|
|
79
|
+
|
|
80
|
+
if max_n is not None:
|
|
81
|
+
np.random.seed(seed)
|
|
82
|
+
indices = np.random.choice(range(xs_train.size), size=max_n, replace=False)
|
|
83
|
+
xs_train = xs_train[indices]
|
|
84
|
+
ys_train = ys_train[indices]
|
|
85
|
+
lowess = sm.nonparametric.lowess(ys_train, xs_train, frac=span)
|
|
86
|
+
lowess_x = list(zip(*lowess))[0]
|
|
87
|
+
lowess_y = list(zip(*lowess))[1]
|
|
88
|
+
model = interp1d(lowess_x, lowess_y, bounds_error=False)
|
|
89
|
+
|
|
90
|
+
return lambda xs: np.array([model(x) for x in xs])
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _get_predictor(xs_train, ys_train, method, deg, span, seed, max_n):
|
|
94
|
+
if method == 'lm':
|
|
95
|
+
return _get_lm_predictor(xs_train, ys_train, deg)
|
|
96
|
+
if method in ['loess', 'lowess']:
|
|
97
|
+
return _get_loess_predictor(xs_train, ys_train, span, seed, max_n)
|
|
98
|
+
if method == 'none':
|
|
99
|
+
return lambda xs: np.array([0] * xs.size)
|
|
100
|
+
else:
|
|
101
|
+
raise Exception("Unknown method '{0}'".format(method))
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _get_stat_data(data, x, y, group_by, method, deg, span, seed, max_n):
|
|
105
|
+
def _get_group_stat_data(group_df):
|
|
106
|
+
xs, ys = _extract_data_series(group_df, x, y)
|
|
107
|
+
if len(xs) == 0:
|
|
108
|
+
return group_df.assign(**{_RESIDUAL_COL: []}), xs, ys
|
|
109
|
+
predictor = _get_predictor(xs, ys, method, deg, span, seed, max_n)
|
|
110
|
+
return group_df.assign(**{_RESIDUAL_COL: ys - predictor(xs)}), xs, ys
|
|
111
|
+
|
|
112
|
+
if isinstance(data, dict):
|
|
113
|
+
df = pd.DataFrame(data)
|
|
114
|
+
elif isinstance(data, pd.DataFrame):
|
|
115
|
+
df = data.copy()
|
|
116
|
+
elif pl is not None and isinstance(data, pl.DataFrame):
|
|
117
|
+
df = pd.DataFrame(data.to_dict(as_series=False))
|
|
118
|
+
else:
|
|
119
|
+
raise Exception("Unsupported type of data: {0}".format(data))
|
|
120
|
+
df = df[(df[x].notna()) & df[y].notna()]
|
|
121
|
+
if group_by is None:
|
|
122
|
+
return _get_group_stat_data(df)
|
|
123
|
+
else:
|
|
124
|
+
df_list, xs_list, ys_list = zip(*[
|
|
125
|
+
_get_group_stat_data(df[df[group_by] == group_value])
|
|
126
|
+
for group_value in df[group_by].unique()
|
|
127
|
+
])
|
|
128
|
+
return pd.concat(df_list), np.concatenate(xs_list), np.concatenate(ys_list)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def residual_plot(data=None, x=None, y=None, *,
|
|
132
|
+
method=_METHOD_DEF,
|
|
133
|
+
deg=_METHOD_LM_DEG_DEF,
|
|
134
|
+
span=_METHOD_LOESS_SPAN_DEF, seed=None, max_n=None,
|
|
135
|
+
geom=_GEOM_DEF,
|
|
136
|
+
bins=None, binwidth=None,
|
|
137
|
+
color=None, size=None, alpha=None,
|
|
138
|
+
color_by=None,
|
|
139
|
+
show_legend=None,
|
|
140
|
+
hline=_HLINE_DEF, marginal=_MARGINAL_DEF):
|
|
141
|
+
"""
|
|
142
|
+
Produce a residual plot that shows the difference between the observed response and the fitted response values.
|
|
143
|
+
|
|
144
|
+
To use ``residual_plot()``, the `numpy` and `pandas` libraries are required.
|
|
145
|
+
Also, `statsmodels` and `scipy` are required for 'lm' and 'loess' methods.
|
|
146
|
+
|
|
147
|
+
Parameters
|
|
148
|
+
----------
|
|
149
|
+
data : dict or Pandas or Polars ``DataFrame``
|
|
150
|
+
The data to be displayed.
|
|
151
|
+
x : str
|
|
152
|
+
Name of independent variable.
|
|
153
|
+
y : str
|
|
154
|
+
Name of dependent variable that will be fitted.
|
|
155
|
+
method : {'lm', 'loess', 'lowess', 'none'}, default='lm'
|
|
156
|
+
Fitting method: 'lm' (Linear Model) or 'loess'/'lowess' (Locally Estimated Scatterplot Smoothing).
|
|
157
|
+
If value of ``deg`` parameter is greater than 1 then linear model becomes polynomial of the given degree.
|
|
158
|
+
If method is 'none' then data lives as is.
|
|
159
|
+
deg : int, default=1
|
|
160
|
+
Degree of polynomial for linear regression model.
|
|
161
|
+
span : float, default=0.5
|
|
162
|
+
Only for 'loess' method. The fraction of source points closest to the current point is taken into account
|
|
163
|
+
for computing a least-squares regression. A sensible value is usually 0.25 to 0.5.
|
|
164
|
+
seed : int
|
|
165
|
+
Random seed for 'loess' sampling.
|
|
166
|
+
max_n : int
|
|
167
|
+
Maximum number of data-points for 'loess' method. If this quantity exceeded random sampling is applied to data.
|
|
168
|
+
geom : {'point', 'tile', 'hex', 'density2d', 'density2df', 'pointdensity', 'none'}, default='point'
|
|
169
|
+
The geometric object to use to display the data. No object will be used if ``geom='none'``.
|
|
170
|
+
bins : int or list of int
|
|
171
|
+
Number of bins in both directions, vertical and horizontal. Overridden by ``binwidth``.
|
|
172
|
+
If only one value given - interpret it as list of two equal values.
|
|
173
|
+
Applicable simultaneously for 'tile'/'hex' geom and 'histogram' marginal.
|
|
174
|
+
binwidth : float or list of float
|
|
175
|
+
The width of the bins in both directions, vertical and horizontal.
|
|
176
|
+
Overrides ``bins``. The default is to use bin widths that cover the entire range of the data.
|
|
177
|
+
If only one value given - interpret it as list of two equal values.
|
|
178
|
+
Applicable simultaneously for 'tile'/'hex' geom and 'histogram' marginal.
|
|
179
|
+
color : str
|
|
180
|
+
Color of the geometry.
|
|
181
|
+
For more info see `Color and Fill <https://lets-plot.org/python/pages/aesthetics.html#color-and-fill>`__.
|
|
182
|
+
size : float
|
|
183
|
+
Size of the geometry.
|
|
184
|
+
alpha : float
|
|
185
|
+
Transparency level of the geometry. Accept values between 0 and 1.
|
|
186
|
+
color_by : str
|
|
187
|
+
Name of grouping variable.
|
|
188
|
+
show_legend : bool, default=True
|
|
189
|
+
False - do not show legend for the main layer.
|
|
190
|
+
hline : bool, default=True
|
|
191
|
+
False - do not show horizontal line passing through 0.
|
|
192
|
+
marginal : str, default='dens:r'
|
|
193
|
+
Description of marginal layers packed to string value.
|
|
194
|
+
Different marginals are separated by the ',' char.
|
|
195
|
+
Parameters of a marginal are separated by the ':' char.
|
|
196
|
+
First parameter of a marginal is a geometry name.
|
|
197
|
+
Possible values: 'dens'/'density', 'hist'/'histogram', 'box'/'boxplot'.
|
|
198
|
+
Second parameter is a string specifying which sides of the plot the marginal layer will appear on.
|
|
199
|
+
Possible values: 't' (top), 'b' (bottom), 'l' (left), 'r' (right).
|
|
200
|
+
Third parameter (optional) is size of marginal.
|
|
201
|
+
To suppress marginals use ``marginal='none'``.
|
|
202
|
+
Examples:
|
|
203
|
+
"hist:tr:0.3",
|
|
204
|
+
"dens:tr,hist:bl",
|
|
205
|
+
"box:tr:.05, hist:bl, dens:bl".
|
|
206
|
+
|
|
207
|
+
Returns
|
|
208
|
+
-------
|
|
209
|
+
``PlotSpec``
|
|
210
|
+
Plot object specification.
|
|
211
|
+
|
|
212
|
+
Notes
|
|
213
|
+
-----
|
|
214
|
+
When using 'lm' and 'loess' methods,
|
|
215
|
+
this function requires the `statsmodels` and `scipy` libraries to be installed.
|
|
216
|
+
|
|
217
|
+
----
|
|
218
|
+
|
|
219
|
+
To hide axis tooltips, set 'blank' or the result of `element_blank() <https://lets-plot.org/python/pages/api/lets_plot.element_blank.html>`__
|
|
220
|
+
to the ``axis_tooltip``, ``axis_tooltip_x`` or ``axis_tooltip_y`` parameter of the `theme() <https://lets-plot.org/python/pages/api/lets_plot.theme.html>`__.
|
|
221
|
+
|
|
222
|
+
Examples
|
|
223
|
+
--------
|
|
224
|
+
.. jupyter-execute::
|
|
225
|
+
:linenos:
|
|
226
|
+
:emphasize-lines: 11
|
|
227
|
+
|
|
228
|
+
import numpy as np
|
|
229
|
+
from lets_plot import *
|
|
230
|
+
from lets_plot.bistro.residual import *
|
|
231
|
+
LetsPlot.setup_html()
|
|
232
|
+
n = 100
|
|
233
|
+
np.random.seed(42)
|
|
234
|
+
data = {
|
|
235
|
+
'x': np.random.uniform(size=n),
|
|
236
|
+
'y': np.random.normal(size=n)
|
|
237
|
+
}
|
|
238
|
+
residual_plot(data, 'x', 'y')
|
|
239
|
+
|
|
240
|
+
|
|
|
241
|
+
|
|
242
|
+
.. jupyter-execute::
|
|
243
|
+
:linenos:
|
|
244
|
+
:emphasize-lines: 9-11
|
|
245
|
+
|
|
246
|
+
import numpy as np
|
|
247
|
+
from lets_plot import *
|
|
248
|
+
from lets_plot.bistro.residual import *
|
|
249
|
+
LetsPlot.setup_html()
|
|
250
|
+
n, m = 1000, 5
|
|
251
|
+
np.random.seed(42)
|
|
252
|
+
x = np.random.uniform(low=-m, high=m, size=n)
|
|
253
|
+
y = x**2 + np.random.normal(size=n)
|
|
254
|
+
residual_plot({'x': x, 'y': y}, 'x', 'y', \\
|
|
255
|
+
deg=2, geom='tile', binwidth=[1, .5], \\
|
|
256
|
+
hline=False, marginal="hist:tr")
|
|
257
|
+
|
|
258
|
+
|
|
|
259
|
+
|
|
260
|
+
.. jupyter-execute::
|
|
261
|
+
:linenos:
|
|
262
|
+
:emphasize-lines: 10-13
|
|
263
|
+
|
|
264
|
+
import numpy as np
|
|
265
|
+
from lets_plot import *
|
|
266
|
+
from lets_plot.bistro.residual import *
|
|
267
|
+
LetsPlot.setup_html()
|
|
268
|
+
n = 200
|
|
269
|
+
np.random.seed(42)
|
|
270
|
+
x = np.random.uniform(size=n)
|
|
271
|
+
y = x * np.random.normal(size=n)
|
|
272
|
+
g = np.random.choice(['A', 'B'], size=n)
|
|
273
|
+
residual_plot({'x': x, 'y': y, 'g': g}, 'x', 'y', \\
|
|
274
|
+
method='none', bins=[30, 15], \\
|
|
275
|
+
size=5, alpha=.5, color_by='g', show_legend=False, \\
|
|
276
|
+
marginal="hist:t:.2, hist:r, dens:tr, box:bl:.05")
|
|
277
|
+
|
|
278
|
+
|
|
|
279
|
+
|
|
280
|
+
.. jupyter-execute::
|
|
281
|
+
:linenos:
|
|
282
|
+
:emphasize-lines: 12
|
|
283
|
+
|
|
284
|
+
import numpy as np
|
|
285
|
+
from lets_plot import *
|
|
286
|
+
from lets_plot.bistro.residual import *
|
|
287
|
+
LetsPlot.setup_html()
|
|
288
|
+
n = 100
|
|
289
|
+
color, fill = "#bd0026", "#ffffb2"
|
|
290
|
+
np.random.seed(42)
|
|
291
|
+
data = {
|
|
292
|
+
'x': np.random.uniform(size=n),
|
|
293
|
+
'y': np.random.normal(size=n)
|
|
294
|
+
}
|
|
295
|
+
residual_plot(data, 'x', 'y', geom='none', hline=False, marginal='none') + \\
|
|
296
|
+
geom_hline(yintercept=0, size=1, color=color) + \\
|
|
297
|
+
geom_point(shape=21, size=3, color=color, fill=fill) + \\
|
|
298
|
+
ggmarginal('r', layer=geom_area(stat='density', color=color, fill=fill))
|
|
299
|
+
|
|
300
|
+
"""
|
|
301
|
+
# requirements
|
|
302
|
+
if np is None:
|
|
303
|
+
raise ValueError("Module 'numpy' is required for residual plot")
|
|
304
|
+
if pd is None:
|
|
305
|
+
raise ValueError("Module 'pandas' is required for residual plot")
|
|
306
|
+
# prepare data
|
|
307
|
+
stat_data, xs, ys = _get_stat_data(data, x, y, color_by, method, deg, span, seed, max_n)
|
|
308
|
+
# prepare parameters
|
|
309
|
+
binwidth2d, bins2d = _get_bin_params_2d(xs, ys, binwidth, bins)
|
|
310
|
+
# prepare mapping
|
|
311
|
+
mapping_dict = {'x': x, 'y': _RESIDUAL_COL}
|
|
312
|
+
if color_by is not None:
|
|
313
|
+
mapping_dict['color'] = color_by
|
|
314
|
+
mapping_dict['fill'] = color_by
|
|
315
|
+
# prepare scales
|
|
316
|
+
if method == 'none':
|
|
317
|
+
scales = ylab(y)
|
|
318
|
+
else:
|
|
319
|
+
scales = ylab("{0} residual".format(y))
|
|
320
|
+
# prepare layers
|
|
321
|
+
layers = DummySpec()
|
|
322
|
+
# main layer
|
|
323
|
+
main_layer = _get_geom2d_layer(geom, binwidth2d, bins2d, color, color_by, size, alpha, show_legend)
|
|
324
|
+
if main_layer is not None:
|
|
325
|
+
layers += main_layer
|
|
326
|
+
# hline layer
|
|
327
|
+
if hline:
|
|
328
|
+
layers += geom_hline(yintercept=0, color=_HLINE_COLOR, linetype=_HLINE_LINETYPE)
|
|
329
|
+
# marginal layers
|
|
330
|
+
if marginal != 'none':
|
|
331
|
+
layers += _get_marginal_layers(marginal, binwidth2d, bins2d, color, color_by, show_legend)
|
|
332
|
+
# theme layer
|
|
333
|
+
theme_layer = theme(axis="blank",
|
|
334
|
+
axis_text_x=element_text(),
|
|
335
|
+
axis_title_x=element_text(),
|
|
336
|
+
axis_line_y=element_line(),
|
|
337
|
+
axis_ticks_y=element_line(),
|
|
338
|
+
axis_text_y=element_text(),
|
|
339
|
+
axis_title_y=element_text())
|
|
340
|
+
|
|
341
|
+
return ggplot(stat_data, aes(**mapping_dict)) + layers + scales + theme_layer
|
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
# Copyright (c) 2024. JetBrains s.r.o.
|
|
2
|
+
# Use of this source code is governed by the MIT license that can be found in the LICENSE file.
|
|
3
|
+
|
|
4
|
+
from lets_plot.plot.core import PlotSpec, LayerSpec, FeatureSpecArray, aes
|
|
5
|
+
from lets_plot.plot.util import as_annotated_data
|
|
6
|
+
|
|
7
|
+
__all__ = ['waterfall_plot']
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def waterfall_plot(data, x, y, *,
|
|
11
|
+
measure=None, group=None,
|
|
12
|
+
color=None, fill=None, size=None, alpha=None, linetype=None,
|
|
13
|
+
width=None,
|
|
14
|
+
show_legend=None, relative_tooltips=None, absolute_tooltips=None,
|
|
15
|
+
sorted_value=None, threshold=None, max_values=None,
|
|
16
|
+
base=None,
|
|
17
|
+
calc_total=None, total_title=None,
|
|
18
|
+
hline=None, hline_ontop=None,
|
|
19
|
+
connector=None,
|
|
20
|
+
relative_labels=None, absolute_labels=None,
|
|
21
|
+
label=None, label_format=None,
|
|
22
|
+
background_layers=None) -> PlotSpec:
|
|
23
|
+
"""
|
|
24
|
+
A waterfall plot shows the cumulative effect of sequentially introduced positive or negative values.
|
|
25
|
+
|
|
26
|
+
Parameters
|
|
27
|
+
----------
|
|
28
|
+
data : dict or Pandas or Polars ``DataFrame``
|
|
29
|
+
The data to be displayed.
|
|
30
|
+
x : str
|
|
31
|
+
Name of a variable.
|
|
32
|
+
y : str
|
|
33
|
+
Name of a numeric variable.
|
|
34
|
+
measure : str
|
|
35
|
+
Kind of a calculation.
|
|
36
|
+
It takes the name of a data column.
|
|
37
|
+
The values in the column could be:
|
|
38
|
+
|
|
39
|
+
'absolute' - the value is shown as is;
|
|
40
|
+
'relative' - the value is shown as a difference from the previous value;
|
|
41
|
+
'total' - the value is shown as a cumulative sum of all previous values.
|
|
42
|
+
|
|
43
|
+
group : str
|
|
44
|
+
Grouping variable. Each group calculates its own statistics.
|
|
45
|
+
color : str
|
|
46
|
+
Color of the box boundary lines.
|
|
47
|
+
For more info see `Color and Fill <https://lets-plot.org/python/pages/aesthetics.html#color-and-fill>`__.
|
|
48
|
+
Use 'flow_type' to color lines by the direction of the flow.
|
|
49
|
+
Flow type names: "Absolute", "Increase", "Decrease" and "Total".
|
|
50
|
+
You could use these names to change the default colors with the
|
|
51
|
+
`scale_color_manual() <https://lets-plot.org/python/pages/api/lets_plot.scale_color_manual.html>`__ function.
|
|
52
|
+
fill : str
|
|
53
|
+
Fill color of the boxes.
|
|
54
|
+
For more info see `Color and Fill <https://lets-plot.org/python/pages/aesthetics.html#color-and-fill>`__.
|
|
55
|
+
Use 'flow_type' to color boxes by the direction of the flow.
|
|
56
|
+
Flow type names: "Absolute", "Increase", "Decrease" and "Total".
|
|
57
|
+
You could use these names to change the default colors with the
|
|
58
|
+
`scale_fill_manual() <https://lets-plot.org/python/pages/api/lets_plot.scale_fill_manual.html>`__ function.
|
|
59
|
+
size : float, default=0.0
|
|
60
|
+
Line width of the box boundary lines.
|
|
61
|
+
alpha : float
|
|
62
|
+
Transparency level of the boxes. Accept values between 0 and 1.
|
|
63
|
+
linetype : int or str or list
|
|
64
|
+
Type of the box boundary lines.
|
|
65
|
+
Accept codes or names (0 = 'blank', 1 = 'solid', 2 = 'dashed', 3 = 'dotted', 4 = 'dotdash', 5 = 'longdash', 6 = 'twodash'),
|
|
66
|
+
a hex string (up to 8 digits for dash-gap lengths),
|
|
67
|
+
or a list pattern [offset, [dash, gap, ...]] / [dash, gap, ...].
|
|
68
|
+
For more info see `Line Types <https://lets-plot.org/python/pages/aesthetics.html#line-types>`__.
|
|
69
|
+
width : float, default=0.9
|
|
70
|
+
Width of the boxes. Typically range between 0 and 1.
|
|
71
|
+
Values that are greater than 1 lead to overlapping of the boxes.
|
|
72
|
+
show_legend : bool, default=False
|
|
73
|
+
True - show the legend.
|
|
74
|
+
relative_tooltips : ``layer_tooltips`` or str
|
|
75
|
+
Tooltips for boxes with relative values.
|
|
76
|
+
Result of the call to the `layer_tooltips() <https://lets-plot.org/python/pages/api/lets_plot.layer_tooltips.html>`__ function.
|
|
77
|
+
Specify appearance, style and content.
|
|
78
|
+
When 'none', tooltips are not shown.
|
|
79
|
+
When 'detailed', a more detailed (compared to the default) version of the tooltips is shown.
|
|
80
|
+
absolute_tooltips : ``layer_tooltips`` or str
|
|
81
|
+
Tooltips for boxes with absolute values.
|
|
82
|
+
Result of the call to the `layer_tooltips() <https://lets-plot.org/python/pages/api/lets_plot.layer_tooltips.html>`__ function.
|
|
83
|
+
Specify appearance, style and content.
|
|
84
|
+
When 'none', tooltips are not shown.
|
|
85
|
+
When 'detailed', a more detailed (compared to the default) version of the tooltips is shown.
|
|
86
|
+
sorted_value : bool, default=False
|
|
87
|
+
Sorts categories by absolute value of the changes.
|
|
88
|
+
threshold : float
|
|
89
|
+
Groups all categories under a certain threshold value into "Other" category.
|
|
90
|
+
max_values : int
|
|
91
|
+
Groups all categories with the smallest changes, except the first ``max_values``, into "Other" category.
|
|
92
|
+
base : float, default=0.0
|
|
93
|
+
Values with measure 'absolute' or 'total' are relative to this value.
|
|
94
|
+
calc_total : bool, default=True
|
|
95
|
+
Setting the ``calc_total`` to True will put the final cumulative sum into a new separate box.
|
|
96
|
+
Taken into account only if the 'measure' column isn't provided.
|
|
97
|
+
total_title : str
|
|
98
|
+
The header of the last box with the final cumulative sum, if 'measure' column isn't provided.
|
|
99
|
+
Also used as a title in the legend for columns of type 'total'.
|
|
100
|
+
hline : str or dict
|
|
101
|
+
Horizontal line passing through 0.
|
|
102
|
+
Set 'blank' or result of `element_blank() <https://lets-plot.org/python/pages/api/lets_plot.element_blank.html>`__ to draw nothing.
|
|
103
|
+
Set `element_line() <https://lets-plot.org/python/pages/api/lets_plot.element_line.html>`__ to specify parameters.
|
|
104
|
+
hline_ontop : bool, default=True
|
|
105
|
+
Option to place horizontal line over the other layers.
|
|
106
|
+
connector : str or dict
|
|
107
|
+
Line between neighbouring boxes connecting the end of the previous box and the beginning of the next box.
|
|
108
|
+
Set 'blank' or result of `element_blank() <https://lets-plot.org/python/pages/api/lets_plot.element_blank.html>`__ to draw nothing.
|
|
109
|
+
Set `element_line() <https://lets-plot.org/python/pages/api/lets_plot.element_line.html>`__ to specify parameters.
|
|
110
|
+
relative_labels : dict
|
|
111
|
+
Result of the call to the `layer_labels() <https://lets-plot.org/python/pages/api/lets_plot.layer_labels.html>`__ function.
|
|
112
|
+
Specify content and formatting of annotation labels on relative change bars.
|
|
113
|
+
If specified, overrides ``label_format`` for relative bars.
|
|
114
|
+
absolute_labels : dict
|
|
115
|
+
Result of the call to the `layer_labels() <https://lets-plot.org/python/pages/api/lets_plot.layer_labels.html>`__ function.
|
|
116
|
+
Specify content and formatting of annotation labels on absolute value bars.
|
|
117
|
+
If specified, overrides ``label_format`` for absolute bars.
|
|
118
|
+
label : str or dict
|
|
119
|
+
Style configuration for labels on bars. Applied to default labels or to
|
|
120
|
+
relative/absolute labels when ``relative_labels`` or ``absolute_labels`` are specified.
|
|
121
|
+
Set 'blank' or result of `element_blank() <https://lets-plot.org/python/pages/api/lets_plot.element_blank.html>`__ to draw nothing.
|
|
122
|
+
Set `element_text() <https://lets-plot.org/python/pages/api/lets_plot.element_text.html>`__ to specify style parameters.
|
|
123
|
+
Use ``element_text(color='inherit')`` to make labels inherit the color of bar borders.
|
|
124
|
+
label_format : str
|
|
125
|
+
Format string used to transform label values to text. Applied to default labels or to
|
|
126
|
+
relative/absolute labels when ``relative_labels`` or ``absolute_labels`` are specified.
|
|
127
|
+
Can be overridden by formatting specified in ``relative_labels`` or ``absolute_labels``.
|
|
128
|
+
Examples:
|
|
129
|
+
|
|
130
|
+
- '.2f' -> '12.45'
|
|
131
|
+
- 'Num {}' -> 'Num 12.456789'
|
|
132
|
+
- 'TTL: {.2f}$' -> 'TTL: 12.45$'
|
|
133
|
+
|
|
134
|
+
For more info see `Formatting <https://lets-plot.org/python/pages/formats.html>`__.
|
|
135
|
+
background_layers : LayerSpec or FeatureSpecArray
|
|
136
|
+
Background layers to be added to the plot.
|
|
137
|
+
|
|
138
|
+
Returns
|
|
139
|
+
-------
|
|
140
|
+
``PlotSpec``
|
|
141
|
+
Plot object specification.
|
|
142
|
+
|
|
143
|
+
Notes
|
|
144
|
+
-----
|
|
145
|
+
Computed variables:
|
|
146
|
+
|
|
147
|
+
- ..x.. : category id.
|
|
148
|
+
- ..xlabel.. : category name.
|
|
149
|
+
- ..ymin.. : lower value of the change.
|
|
150
|
+
- ..ymax.. : upper value of the change.
|
|
151
|
+
- ..measure.. : kind of a calculation: absolute, relative or total.
|
|
152
|
+
- ..flow_type.. : direction of the flow: increasing, decreasing, or the result (total).
|
|
153
|
+
- ..initial.. : initial value of the change.
|
|
154
|
+
- ..value.. : current cumsum (result of the change) or absolute value (depending on the 'measure' column).
|
|
155
|
+
- ..dy.. : value of the change.
|
|
156
|
+
|
|
157
|
+
Examples
|
|
158
|
+
--------
|
|
159
|
+
.. jupyter-execute::
|
|
160
|
+
:linenos:
|
|
161
|
+
:emphasize-lines: 11
|
|
162
|
+
|
|
163
|
+
import numpy as np
|
|
164
|
+
from lets_plot import *
|
|
165
|
+
from lets_plot.bistro.waterfall import *
|
|
166
|
+
LetsPlot.setup_html()
|
|
167
|
+
categories = list("ABCDEF")
|
|
168
|
+
np.random.seed(42)
|
|
169
|
+
data = {
|
|
170
|
+
'x': categories,
|
|
171
|
+
'y': np.random.normal(size=len(categories))
|
|
172
|
+
}
|
|
173
|
+
waterfall_plot(data, 'x', 'y')
|
|
174
|
+
|
|
175
|
+
|
|
|
176
|
+
|
|
177
|
+
.. jupyter-execute::
|
|
178
|
+
:linenos:
|
|
179
|
+
:emphasize-lines: 21-25
|
|
180
|
+
|
|
181
|
+
import numpy as np
|
|
182
|
+
from lets_plot import *
|
|
183
|
+
from lets_plot.bistro.waterfall import *
|
|
184
|
+
LetsPlot.setup_html()
|
|
185
|
+
categories = list("ABCDEF")
|
|
186
|
+
np.random.seed(42)
|
|
187
|
+
data = {
|
|
188
|
+
'x': categories,
|
|
189
|
+
'y': np.random.normal(size=len(categories))
|
|
190
|
+
}
|
|
191
|
+
band_data = {
|
|
192
|
+
'xmin': [-0.5, 2.5],
|
|
193
|
+
'xmax': [2.5, 5.5],
|
|
194
|
+
'name': ['Q1', 'Q2']
|
|
195
|
+
}
|
|
196
|
+
text_data = {
|
|
197
|
+
'x': [0, 3],
|
|
198
|
+
'y': [2.7, 2.7],
|
|
199
|
+
'name': ['Q1', 'Q2']
|
|
200
|
+
}
|
|
201
|
+
waterfall_plot(data, 'x', 'y', label_format='.2f',
|
|
202
|
+
background_layers=geom_band(
|
|
203
|
+
aes(xmin='xmin', xmax='xmax', fill='name', color='name'),
|
|
204
|
+
data=band_data, alpha=0.2
|
|
205
|
+
)) + \\
|
|
206
|
+
geom_text(aes(x='x', y='y', label='name'), data=text_data, size=10) + \\
|
|
207
|
+
ggtitle("Waterfall with background layers")
|
|
208
|
+
|
|
209
|
+
|
|
|
210
|
+
|
|
211
|
+
.. jupyter-execute::
|
|
212
|
+
:linenos:
|
|
213
|
+
:emphasize-lines: 12-18
|
|
214
|
+
|
|
215
|
+
import numpy as np
|
|
216
|
+
from lets_plot import *
|
|
217
|
+
from lets_plot.bistro.waterfall import *
|
|
218
|
+
LetsPlot.setup_html()
|
|
219
|
+
n, m = 10, 5
|
|
220
|
+
categories = list(range(n))
|
|
221
|
+
np.random.seed(42)
|
|
222
|
+
data = {
|
|
223
|
+
'x': categories,
|
|
224
|
+
'y': np.random.randint(2 * m + 1, size=len(categories)) - m
|
|
225
|
+
}
|
|
226
|
+
waterfall_plot(data, 'x', 'y', \\
|
|
227
|
+
threshold=2, \\
|
|
228
|
+
width=.7, size=1, fill="white", color='flow_type', \\
|
|
229
|
+
hline=element_line(linetype='solid'), hline_ontop=False, \\
|
|
230
|
+
connector=element_line(linetype='dotted'), \\
|
|
231
|
+
label=element_text(color='inherit'), \\
|
|
232
|
+
total_title="Result", show_legend=True)
|
|
233
|
+
|
|
234
|
+
|
|
|
235
|
+
|
|
236
|
+
.. jupyter-execute::
|
|
237
|
+
:linenos:
|
|
238
|
+
:emphasize-lines: 11-20
|
|
239
|
+
|
|
240
|
+
import numpy as np
|
|
241
|
+
from lets_plot import *
|
|
242
|
+
from lets_plot.bistro.waterfall import *
|
|
243
|
+
LetsPlot.setup_html()
|
|
244
|
+
categories = list("ABCDEFGHIJKLMNOP")
|
|
245
|
+
np.random.seed(42)
|
|
246
|
+
data = {
|
|
247
|
+
'x': categories,
|
|
248
|
+
'y': np.random.uniform(-1, 1, size=len(categories))
|
|
249
|
+
}
|
|
250
|
+
waterfall_plot(data, 'x', 'y', sorted_value=True, max_values=5, calc_total=False, \\
|
|
251
|
+
relative_tooltips=layer_tooltips().title("Category: @..xlabel..")
|
|
252
|
+
.format("@..initial..", ".2~f")
|
|
253
|
+
.format("@..value..", ".2~f")
|
|
254
|
+
.format("@..dy..", ".2~f")
|
|
255
|
+
.line("@{..flow_type..}d from @..initial.. to @..value..")
|
|
256
|
+
.line("Difference: @..dy..")
|
|
257
|
+
.disable_splitting(), \\
|
|
258
|
+
size=1, alpha=.5, \\
|
|
259
|
+
label=element_text(color="black"), label_format=".4f")
|
|
260
|
+
|
|
261
|
+
|
|
|
262
|
+
|
|
263
|
+
.. jupyter-execute::
|
|
264
|
+
:linenos:
|
|
265
|
+
:emphasize-lines: 17-18
|
|
266
|
+
|
|
267
|
+
from lets_plot import *
|
|
268
|
+
from lets_plot.bistro.waterfall import *
|
|
269
|
+
LetsPlot.setup_html()
|
|
270
|
+
data = {
|
|
271
|
+
'company': ["Badgersoft"] * 7 + ["AIlien Co."] * 7,
|
|
272
|
+
'accounts': ["initial", "revenue", "costs", "Q1", "revenue", "costs", "Q2"] * 2,
|
|
273
|
+
'values': [200, 200, -100, None, 250, -100, None,
|
|
274
|
+
150, 50, -100, None, 100, -100, None],
|
|
275
|
+
'measure': ['absolute', 'relative', 'relative', 'total', 'relative', 'relative', 'total'] * 2,
|
|
276
|
+
}
|
|
277
|
+
colors = {
|
|
278
|
+
"Absolute": "darkseagreen",
|
|
279
|
+
"Increase": "palegoldenrod",
|
|
280
|
+
"Decrease": "paleturquoise",
|
|
281
|
+
"Total": "palegreen",
|
|
282
|
+
}
|
|
283
|
+
waterfall_plot(data, 'accounts', 'values', measure='measure', group='company',
|
|
284
|
+
size=.75, label=element_text(color="black")) + \\
|
|
285
|
+
scale_fill_manual(values=colors) + \\
|
|
286
|
+
facet_wrap(facets='company', scales='free_x')
|
|
287
|
+
|
|
288
|
+
"""
|
|
289
|
+
data, mapping, data_meta = as_annotated_data(data, aes(x=x, y=y))
|
|
290
|
+
|
|
291
|
+
if background_layers is None:
|
|
292
|
+
layers = []
|
|
293
|
+
elif isinstance(background_layers, LayerSpec):
|
|
294
|
+
layers = [background_layers]
|
|
295
|
+
elif isinstance(background_layers, FeatureSpecArray):
|
|
296
|
+
for sublayer in background_layers.elements():
|
|
297
|
+
if not isinstance(sublayer, LayerSpec):
|
|
298
|
+
raise TypeError("Invalid 'layer' type: {}".format(type(sublayer)))
|
|
299
|
+
layers = background_layers.elements()
|
|
300
|
+
else:
|
|
301
|
+
raise TypeError("Invalid 'layer' type: {}".format(type(background_layers)))
|
|
302
|
+
|
|
303
|
+
return PlotSpec(data=data, mapping=None, scales=[], layers=[], bistro={
|
|
304
|
+
'name': 'waterfall',
|
|
305
|
+
'x': x,
|
|
306
|
+
'y': y,
|
|
307
|
+
'measure': measure,
|
|
308
|
+
'group': group,
|
|
309
|
+
'color': color,
|
|
310
|
+
'fill': fill,
|
|
311
|
+
'size': size,
|
|
312
|
+
'alpha': alpha,
|
|
313
|
+
'linetype': linetype,
|
|
314
|
+
'width': width,
|
|
315
|
+
'show_legend': show_legend,
|
|
316
|
+
'relative_tooltips': relative_tooltips,
|
|
317
|
+
'absolute_tooltips': absolute_tooltips,
|
|
318
|
+
'sorted_value': sorted_value,
|
|
319
|
+
'threshold': threshold,
|
|
320
|
+
'max_values': max_values,
|
|
321
|
+
'base': base,
|
|
322
|
+
'calc_total': calc_total,
|
|
323
|
+
'total_title': total_title,
|
|
324
|
+
'hline': hline,
|
|
325
|
+
'hline_ontop': hline_ontop,
|
|
326
|
+
'connector': connector,
|
|
327
|
+
'relative_labels': relative_labels,
|
|
328
|
+
'absolute_labels': absolute_labels,
|
|
329
|
+
'label': label,
|
|
330
|
+
'label_format': label_format,
|
|
331
|
+
'background_layers': [layer.as_dict() for layer in layers]
|
|
332
|
+
}, **data_meta)
|