lets-plot 4.6.1__cp310-cp310-macosx_11_0_arm64.whl → 4.7.0rc1__cp310-cp310-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lets-plot might be problematic. Click here for more details.

Files changed (35) hide show
  1. lets_plot/_global_settings.py +5 -0
  2. lets_plot/_kbridge.py +7 -0
  3. lets_plot/_type_utils.py +29 -6
  4. lets_plot/_version.py +1 -1
  5. lets_plot/bistro/im.py +2 -2
  6. lets_plot/bistro/waterfall.py +93 -12
  7. lets_plot/export/ggsave_.py +23 -15
  8. lets_plot/frontend_context/_configuration.py +8 -1
  9. lets_plot/geo_data/__init__.py +2 -1
  10. lets_plot/package_data/lets-plot.min.js +2 -1
  11. lets_plot/plot/annotation.py +75 -18
  12. lets_plot/plot/core.py +147 -30
  13. lets_plot/plot/geom.py +730 -89
  14. lets_plot/plot/geom_function_.py +1 -1
  15. lets_plot/plot/geom_imshow_.py +42 -51
  16. lets_plot/plot/geom_livemap_.py +2 -22
  17. lets_plot/plot/ggtb_.py +0 -1
  18. lets_plot/plot/pos.py +13 -44
  19. lets_plot/plot/scale_position.py +9 -3
  20. lets_plot/plot/series_meta.py +179 -105
  21. lets_plot/plot/stat.py +4 -4
  22. lets_plot/plot/subplots.py +4 -4
  23. lets_plot/plot/theme_.py +55 -52
  24. lets_plot/plot/util.py +15 -4
  25. lets_plot/tilesets.py +69 -4
  26. {lets_plot-4.6.1.dist-info → lets_plot-4.7.0rc1.dist-info}/METADATA +28 -20
  27. {lets_plot-4.6.1.dist-info → lets_plot-4.7.0rc1.dist-info}/RECORD +35 -31
  28. {lets_plot-4.6.1.dist-info → lets_plot-4.7.0rc1.dist-info}/WHEEL +1 -1
  29. lets_plot-4.7.0rc1.dist-info/licenses/licenses/LICENSE.FreeType +166 -0
  30. lets_plot-4.7.0rc1.dist-info/licenses/licenses/LICENSE.ImageMagick +106 -0
  31. lets_plot-4.7.0rc1.dist-info/licenses/licenses/LICENSE.expat +21 -0
  32. lets_plot-4.7.0rc1.dist-info/licenses/licenses/LICENSE.fontconfig +200 -0
  33. lets_plot_kotlin_bridge.cpython-310-darwin.so +0 -0
  34. {lets_plot-4.6.1.dist-info → lets_plot-4.7.0rc1.dist-info/licenses}/LICENSE +0 -0
  35. {lets_plot-4.6.1.dist-info → lets_plot-4.7.0rc1.dist-info}/top_level.txt +0 -0
@@ -102,7 +102,7 @@ def geom_function(mapping=None, *, data=None, stat=None, geom=None, position=Non
102
102
  The geometry to display the function, as a string.
103
103
  position : str or `FeatureSpec`, default='identity'
104
104
  Position adjustment.
105
- Either a position adjustment name: 'dodge', 'dodgev', 'jitter', 'nudge', 'jitterdodge', 'fill',
105
+ Either a position adjustment name: 'dodge', 'jitter', 'nudge', 'jitterdodge', 'fill',
106
106
  'stack' or 'identity', or the result of calling a position adjustment function (e.g., `position_dodge()` etc.).
107
107
  show_legend : bool, default=True
108
108
  False - do not show legend for this layer.
@@ -234,29 +234,44 @@ def geom_imshow(image_data, cmap=None, *,
234
234
 
235
235
  (image_data, greyscale_data_min, greyscale_data_max) = _normalize_2D(image_data, norm, vmin, vmax, min_lum)
236
236
  height, width = image_data.shape
237
- nchannels = 1
238
-
239
237
  has_nan = numpy.isnan(image_data.max())
240
- if has_nan and not cmap:
241
- # add alpha-channel (LA)
238
+
239
+ if cmap:
240
+ # colormap via palettable
241
+ if not palettable:
242
+ raise ValueError(
243
+ "Can't process `cmap`: please install 'Palettable' (https://pypi.org/project/palettable/) to your "
244
+ "Python environment. "
245
+ )
246
+
247
+ # prepare palette
248
+ palette = None
249
+ if not has_nan:
250
+ alpha_ch_val = 255 if alpha is None else 255 * alpha
251
+ cmap_256 = palettable.get_map(cmap + "_256")
252
+ palette = [_hex2rgb_arr_uint8(c, alpha_ch_val) for c in cmap_256.hex_colors]
253
+ else:
254
+ alpha_ch_val = 255 if alpha is None else 255 * alpha
255
+ cmap_255 = palettable.get_map(cmap + "_255")
256
+ # transparent color at index 0
257
+ palette = [numpy.array([0, 0, 0, 0], dtype=numpy.uint8)] \
258
+ + [_hex2rgb_arr_uint8(c, alpha_ch_val) for c in cmap_255.hex_colors]
259
+
260
+ # replace indexes with palette colors
261
+ if has_nan:
262
+ # replace all NaN-s with 0 (index 0 for transparent color)
263
+ numpy.nan_to_num(image_data, copy=False, nan=0)
264
+ image_data = numpy.take(palette, numpy.round(image_data).astype(numpy.int32), axis=0)
265
+ else:
266
+ # Greyscale
242
267
  alpha_ch_scaler = 1 if alpha is None else alpha
243
268
  is_nan = numpy.isnan(image_data)
244
269
  im_shape = numpy.shape(image_data)
245
270
  alpha_ch = numpy.zeros(im_shape, dtype=image_data.dtype)
246
271
  alpha_ch[is_nan == False] = 255 * alpha_ch_scaler
247
272
  image_data[is_nan] = 0
248
- image_data = numpy.dstack((image_data, alpha_ch))
249
- nchannels = 2
250
- elif has_nan and cmap:
251
- # replace all NaN-s with 0 (index 0 for transparent color)
252
- numpy.nan_to_num(image_data, copy=False, nan=0)
253
- elif not cmap and alpha is not None:
254
- # add alpha-channel (LA)
255
- im_shape = numpy.shape(image_data)
256
- alpha_ch = numpy.full(im_shape, 255 * alpha, dtype=image_data.dtype)
257
- image_data = numpy.dstack((image_data, alpha_ch))
258
- nchannels = 2
259
-
273
+ image_data = numpy.repeat(image_data[:, :, numpy.newaxis], 3, axis=2) # convert to RGB
274
+ image_data = numpy.dstack((image_data, alpha_ch)) # convert to RGBA
260
275
  else:
261
276
  # Color RGB/RGBA image
262
277
  # Make a copy:
@@ -268,15 +283,14 @@ def geom_imshow(image_data, cmap=None, *,
268
283
 
269
284
  height, width, nchannels = image_data.shape
270
285
 
271
- if alpha is not None:
272
- if nchannels == 3:
273
- # RGB image: add alpha channel (RGBA)
274
- alpha_ch = numpy.full((height, width, 1), 255 * alpha, dtype=image_data.dtype)
275
- image_data = numpy.dstack((image_data, alpha_ch))
276
- nchannels = 4
277
- elif nchannels == 4:
278
- # RGBA image: apply alpha scaling
279
- image_data[:, :, 3] *= alpha
286
+ if nchannels == 3:
287
+ alpha_ch_scaler = 1 if alpha is None else alpha
288
+ # RGB image: add alpha channel (RGBA)
289
+ alpha_ch = numpy.full((height, width, 1), 255 * alpha_ch_scaler, dtype=image_data.dtype)
290
+ image_data = numpy.dstack((image_data, alpha_ch))
291
+ elif nchannels == 4 and alpha is not None:
292
+ # RGBA image: apply alpha scaling
293
+ image_data[:, :, 3] *= alpha
280
294
 
281
295
  # Make sure all values are ints in range 0-255.
282
296
  image_data.clip(0, 255, out=image_data)
@@ -312,39 +326,16 @@ def geom_imshow(image_data, cmap=None, *,
312
326
  image_data = image_data.astype(numpy.int8)
313
327
 
314
328
  # Reshape to 2d-array:
315
- # from [[[R, G, B], [R, G, B]], ...] to [[R, G, B, R, G, B],..] for RGB(A)
316
- # or from [[[L, A], [L, A]], ...] to [[L, A, L, A],..] for greyscale–alpha (LA)
317
- # or pypng will fail
318
- image_2d = image_data.reshape(-1, width * nchannels)
329
+ image_2d = image_data.reshape(-1, width * 4) # always 4 channels (RGBA)
319
330
 
320
331
  # PNG writer
321
- palette = None
322
- if cmap and greyscale:
323
- # colormap via palettable
324
- if not palettable:
325
- raise ValueError(
326
- "Can't process `cmap`: please install 'Palettable' (https://pypi.org/project/palettable/) to your "
327
- "Python environment. "
328
- )
329
- if not has_nan:
330
- alpha_ch_val = None if alpha is None else 255 * alpha
331
- cmap_256 = palettable.get_map(cmap + "_256")
332
- palette = [_hex2rgb_arr_uint8(c, alpha_ch_val) for c in cmap_256.hex_colors]
333
- else:
334
- alpha_ch_val = 255 if alpha is None else 255 * alpha
335
- cmap_255 = palettable.get_map(cmap + "_255")
336
- # transparent color at index 0
337
- palette = [numpy.array([0, 0, 0, 0], dtype=numpy.uint8)] + [_hex2rgb_arr_uint8(c, alpha_ch_val) for c in
338
- cmap_255.hex_colors]
339
-
340
332
  png_bytes = io.BytesIO()
341
333
  png.Writer(
342
334
  width=width,
343
335
  height=height,
344
- greyscale=greyscale and not cmap,
345
- alpha=(nchannels == 4 or nchannels == 2), # RGBA or LA
336
+ greyscale=False,
337
+ alpha=True,
346
338
  bitdepth=8,
347
- palette=palette,
348
339
  compression=compression
349
340
  ).write(png_bytes, image_2d)
350
341
 
@@ -272,33 +272,13 @@ def _prepare_tiles(tiles: Optional[Union[str, dict]]) -> Optional[dict]:
272
272
 
273
273
 
274
274
  def _warn_deprecated_tiles(tiles: Union[dict, None]):
275
- # TODO: Remove this warning in future releases.
276
-
277
275
  if tiles is None:
278
276
  maptiles_url = get_global_val(MAPTILES_URL)
279
277
  else:
280
278
  maptiles_url = tiles[MAPTILES_URL]
281
279
 
282
- if not isinstance(maptiles_url, str):
283
- return
284
- if not maptiles_url.startswith("https://cartocdn_[abc].global.ssl.fastly.net/"):
285
- return
286
- if 'base-midnight' not in maptiles_url and 'base-antique' not in maptiles_url and 'base-flatblue' not in maptiles_url:
287
- return
288
-
289
- if tiles is None:
290
- if not has_global_value(MAPTILES_ATTRIBUTION):
291
- return
292
- maptiles_attribution = get_global_val(MAPTILES_ATTRIBUTION)
293
- else:
294
- maptiles_attribution = tiles[MAPTILES_ATTRIBUTION]
295
-
296
- if not isinstance(maptiles_attribution, str):
297
- return
298
- if not maptiles_attribution.endswith('map data: <a href="https://www.openstreetmap.org/copyright">© OpenStreetMap contributors</a> <a href="https://carto.com/attributions#basemaps">© CARTO</a>, <a href="https://carto.com/attributions">© CARTO</a>'):
299
- return
300
-
301
- print(f"WARN: The tileset is no longer available and the corresponding constant will be removed in future releases.")
280
+ # Check if the current tiles should be deprecated and print a deprecation message. Otherwise, return.
281
+ return
302
282
 
303
283
 
304
284
  def _prepare_location(location: Union[str, List[float]]) -> Optional[dict]:
lets_plot/plot/ggtb_.py CHANGED
@@ -29,7 +29,6 @@ def ggtb() -> FeatureSpec:
29
29
  Limitations:
30
30
 
31
31
  - The toolbar does not work with interactive maps.
32
- - The toolbar is not compatible with GGBunch.
33
32
  - The toolbar cannot be used with plots using a polar coordinate system.
34
33
 
35
34
  Returns
lets_plot/plot/pos.py CHANGED
@@ -22,6 +22,7 @@ def position_dodge(width=None):
22
22
  This is useful when you want to align narrow geoms with wider geoms.
23
23
  The value of width is relative and typically ranges between 0 and 1.
24
24
  Values that are greater than 1 lead to overlapping of the objects.
25
+ The default value is taken from the width aesthetics of the data point.
25
26
 
26
27
  Returns
27
28
  -------
@@ -55,44 +56,11 @@ def position_dodge(width=None):
55
56
 
56
57
  def position_dodgev(height=None):
57
58
  """
58
- Adjust position by dodging overlaps to the side.
59
-
60
- Parameters
61
- ----------
62
- height : float
63
- Dodging height, when different to the height of the individual elements.
64
- This is useful when you want to align narrow geoms with taller geoms.
65
- The value of height is relative and typically ranges between 0 and 1.
66
- Values that are greater than 1 lead to overlapping of the objects.
67
-
68
- Returns
69
- -------
70
- `FeatureSpec`
71
- Geom object position specification.
72
-
73
- Notes
74
- -----
75
- Adjust position by dodging overlaps to the side.
76
-
77
- Examples
78
- --------
79
- .. jupyter-execute::
80
- :linenos:
81
- :emphasize-lines: 11
82
-
83
- from lets_plot import *
84
- LetsPlot.setup_html()
85
- data = {
86
- 'xmin': [0.2, 4.6, 1.6, 3.5],
87
- 'xmax': [1.5, 5.3, 3.0, 4.4],
88
- 'y': ['a', 'a', 'b', 'b'],
89
- 'c': ['gr1', 'gr2', 'gr1', 'gr2']
90
- }
91
- ggplot(data, aes(y='y', color='c')) + \\
92
- geom_errorbar(aes(xmin='xmin', xmax='xmax'), height=0.1, size=2, \\
93
- position=position_dodgev(height=0.2))
59
+ Function `position_dodgev()` is deprecated and will be removed in future releases.
94
60
 
95
61
  """
62
+ print("WARN: The function position_dodgev() is deprecated and will be removed in future releases.")
63
+
96
64
  return _pos('dodgev', height=height)
97
65
 
98
66
 
@@ -102,11 +70,11 @@ def position_jitter(width=None, height=None, seed=None):
102
70
 
103
71
  Parameters
104
72
  ----------
105
- width : float
73
+ width : float, default=.4
106
74
  Jittering width.
107
75
  The value of width is relative and typically ranges between 0 and 0.5.
108
76
  Values that are greater than 0.5 lead to overlapping of the points.
109
- height : float
77
+ height : float, default=.4
110
78
  Jittering height.
111
79
  The value of height is relative and typically ranges between 0 and 0.5.
112
80
  Values that are greater than 0.5 lead to overlapping of the points.
@@ -152,9 +120,9 @@ def position_nudge(x=None, y=None, unit=None):
152
120
 
153
121
  Parameters
154
122
  ----------
155
- x : float
123
+ x : float, default=0.0
156
124
  Nudging width.
157
- y : float
125
+ y : float, default=0.0
158
126
  Nudging height.
159
127
  unit : {'identity', 'size', 'px'}, default='identity'
160
128
  Units for x and y nudging.
@@ -206,11 +174,12 @@ def position_jitterdodge(dodge_width=None, jitter_width=None, jitter_height=None
206
174
  Bin width.
207
175
  The value of `dodge_width` is relative and typically ranges between 0 and 1.
208
176
  Values that are greater than 1 lead to overlapping of the boxes.
209
- jitter_width : float
177
+ The default value is taken from the width aesthetics of the data point.
178
+ jitter_width : float, default=.4
210
179
  Jittering width.
211
180
  The value of `jitter_width` is relative and typically ranges between 0 and 0.5.
212
181
  Values that are greater than 0.5 lead to overlapping of the points.
213
- jitter_height : float
182
+ jitter_height : float, default=.4
214
183
  Jittering height.
215
184
  The value of `jitter_height` is relative and typically ranges between 0 and 0.5.
216
185
  Values that are greater than 0.5 lead to overlapping of the points.
@@ -259,7 +228,7 @@ def position_stack(vjust=None, mode=None):
259
228
 
260
229
  Parameters
261
230
  ----------
262
- vjust : float
231
+ vjust : float, default=1.0
263
232
  Vertical adjustment for geoms that have a position (like points or lines),
264
233
  not a dimension (like bars or areas).
265
234
  Set to 0 to align with the bottom, 0.5 for the middle, and 1 for the top.
@@ -305,7 +274,7 @@ def position_fill(vjust=None, mode=None):
305
274
 
306
275
  Parameters
307
276
  ----------
308
- vjust : float
277
+ vjust : float, default=1.0
309
278
  Vertical adjustment for geoms that have a position (like points or lines),
310
279
  not a dimension (like bars or areas).
311
280
  Set to 0 to align with the bottom, 0.5 for the middle, and 1 for the top.
@@ -722,7 +722,7 @@ def scale_x_discrete(name=None, *,
722
722
  LetsPlot.setup_html()
723
723
  np.random.seed(43)
724
724
  scores = {'rating': np.random.randint(3, 6, size=10)}
725
- ggplot(scores, aes(x='rating')) + geom_bar() + \\
725
+ ggplot(scores, aes(x=as_discrete('rating'))) + geom_bar() + \\
726
726
  scale_x_discrete(name='rating', format='.1f')
727
727
 
728
728
  """
@@ -1012,7 +1012,10 @@ def scale_x_datetime(name=None, *,
1012
1012
  position=None
1013
1013
  ):
1014
1014
  """
1015
- Position scale x for date/time data.
1015
+ Position scale for the x-axis with date/time data.
1016
+ The input is expected to be either a series of integers representing milliseconds since the Unix epoch, or Python datetime objects.
1017
+ Assumes UTC timezone if no timezone information is present in the data (naive datetime).
1018
+ For timezone-aware datetime objects, the timezone information from the data is preserved.
1016
1019
 
1017
1020
  Parameters
1018
1021
  ----------
@@ -1099,7 +1102,10 @@ def scale_y_datetime(name=None, *,
1099
1102
  position=None
1100
1103
  ):
1101
1104
  """
1102
- Position scale y for date/time data.
1105
+ Position scale for the y-axis with date/time data.
1106
+ The input is expected to be either a series of integers representing milliseconds since the Unix epoch, or Python datetime objects.
1107
+ Assumes UTC timezone if no timezone information is present in the data (naive datetime).
1108
+ For timezone-aware datetime objects, the timezone information from the data is preserved.
1103
1109
 
1104
1110
  Parameters
1105
1111
  ----------
@@ -1,129 +1,203 @@
1
1
  # Copyright (c) 2024. JetBrains s.r.o.
2
2
  # Use of this source code is governed by the MIT license that can be found in the LICENSE file.
3
- from datetime import datetime
4
- from typing import Union, Dict, Iterable
3
+ from datetime import datetime, date, time
4
+ from typing import Union, Dict, Iterable, Optional
5
5
 
6
6
  from lets_plot._type_utils import is_polars_dataframe
7
7
  from lets_plot.plot.util import is_pandas_data_frame
8
8
 
9
+ try:
10
+ import numpy
11
+ except ImportError:
12
+ numpy = None
13
+
14
+ try:
15
+ import pandas
16
+ except ImportError:
17
+ pass
18
+
19
+ try:
20
+ import polars as pl
21
+ from polars.datatypes.group import INTEGER_DTYPES as PL_INTEGER_DTYPES
22
+ from polars.datatypes.group import FLOAT_DTYPES as PL_FLOAT_DTYPES
23
+ except ImportError:
24
+ pass
25
+
9
26
  TYPE_INTEGER = 'int'
10
27
  TYPE_FLOATING = 'float'
11
28
  TYPE_STRING = 'str'
12
29
  TYPE_BOOLEAN = 'bool'
13
30
  TYPE_DATE_TIME = 'datetime'
31
+ TYPE_DATE = 'date' # Local date (no time zone)
32
+ TYPE_TIME = 'time' # Local time (we ignore time zone even if it is present)
14
33
  TYPE_UNKNOWN = 'unknown'
15
34
 
16
35
 
17
- def infer_type(data: Union[Dict, 'pandas.DataFrame', 'polars.DataFrame']) -> Dict[str, str]:
36
+ def _infer_type(data: Union[Dict, 'pandas.DataFrame', 'polars.DataFrame']) -> Dict[str, str]:
18
37
  type_info = {}
19
38
 
20
39
  if is_pandas_data_frame(data):
21
- import pandas as pd
22
- import numpy as np # np is a dependency of pandas, we can import it without checking
23
-
24
40
  for var_name, var_content in data.items():
25
- if data.empty:
26
- type_info[var_name] = TYPE_UNKNOWN
27
- continue
28
-
29
- inferred_type = pd.api.types.infer_dtype(var_content.values, skipna=True)
30
- if inferred_type == "categorical":
31
- dtype = var_content.cat.categories.dtype
32
-
33
- if np.issubdtype(dtype, np.integer):
34
- type_info[var_name] = TYPE_INTEGER
35
- elif np.issubdtype(dtype, np.floating):
36
- type_info[var_name] = TYPE_FLOATING
37
- elif np.issubdtype(dtype, np.object_):
38
- # Check if all elements are strings
39
- if all(isinstance(x, str) for x in var_content.cat.categories):
40
- type_info[var_name] = TYPE_STRING
41
- else:
42
- type_info[var_name] = TYPE_UNKNOWN
43
- else:
44
- type_info[var_name] = TYPE_UNKNOWN
45
- else:
46
- # see https://pandas.pydata.org/docs/reference/api/pandas.api.types.infer_dtype.html
47
- if inferred_type == 'string':
48
- type_info[var_name] = TYPE_STRING
49
- elif inferred_type == 'floating':
50
- type_info[var_name] = TYPE_FLOATING
51
- elif inferred_type == 'integer':
52
- type_info[var_name] = TYPE_INTEGER
53
- elif inferred_type == 'boolean':
54
- type_info[var_name] = TYPE_BOOLEAN
55
- elif inferred_type == 'datetime64' or inferred_type == 'datetime':
56
- type_info[var_name] = TYPE_DATE_TIME
57
- elif inferred_type == "date":
58
- type_info[var_name] = TYPE_DATE_TIME
59
- elif inferred_type == 'empty': # for columns with all None values
60
- type_info[var_name] = TYPE_UNKNOWN
61
- else:
62
- type_info[var_name] = 'unknown(pandas:' + inferred_type + ')'
41
+ type_info[var_name] = _infer_type_pandas_dataframe(var_name, var_content)
63
42
  elif is_polars_dataframe(data):
64
- import polars as pl
65
- from polars.datatypes.group import INTEGER_DTYPES, FLOAT_DTYPES
66
43
  for var_name, var_type in data.schema.items():
67
-
68
- # https://docs.pola.rs/api/python/stable/reference/datatypes.html
69
- if var_type in FLOAT_DTYPES:
70
- type_info[var_name] = TYPE_FLOATING
71
- elif var_type in INTEGER_DTYPES:
72
- type_info[var_name] = TYPE_INTEGER
73
- elif var_type == pl.datatypes.String:
74
- type_info[var_name] = TYPE_STRING
75
- elif var_type == pl.datatypes.Boolean:
76
- type_info[var_name] = TYPE_BOOLEAN
77
- elif var_type == pl.datatypes.Date or var_type == pl.datatypes.Datetime:
78
- type_info[var_name] = TYPE_DATE_TIME
79
- else:
80
- type_info[var_name] = 'unknown(polars:' + str(var_type) + ')'
44
+ type_info[var_name] = _infer_type_polars_dataframe(var_name, var_type)
81
45
  elif isinstance(data, dict):
82
46
  for var_name, var_content in data.items():
83
- if isinstance(var_content, Iterable):
84
- if not any(True for _ in var_content): # empty
85
- type_info[var_name] = TYPE_UNKNOWN
86
- continue
87
-
88
- type_set = set(type(val) for val in var_content)
89
- if type(None) in type_set:
90
- type_set.remove(type(None))
91
-
92
- if len(type_set) == 0:
93
- continue
94
-
95
- if len(type_set) > 1:
96
- if all(issubclass(type_obj, int) or issubclass(type_obj, float) for type_obj in type_set):
97
- type_info[var_name] = TYPE_FLOATING
98
- else:
99
- type_info[var_name] = 'unknown(mixed types)'
100
- continue
101
-
102
- try:
103
- import numpy
104
- except ImportError:
105
- numpy = None
106
-
107
- type_obj = list(type_set)[0]
108
- if type_obj == bool:
109
- type_info[var_name] = TYPE_BOOLEAN
110
- elif issubclass(type_obj, int):
111
- type_info[var_name] = TYPE_INTEGER
112
- elif issubclass(type_obj, float):
113
- type_info[var_name] = TYPE_FLOATING
114
- elif issubclass(type_obj, str):
115
- type_info[var_name] = TYPE_STRING
116
- elif issubclass(type_obj, datetime):
117
- type_info[var_name] = TYPE_DATE_TIME
118
- elif numpy and issubclass(type_obj, numpy.datetime64):
119
- type_info[var_name] = TYPE_DATE_TIME
120
- elif numpy and issubclass(type_obj, numpy.timedelta64):
121
- type_info[var_name] = TYPE_DATE_TIME
122
- elif numpy and issubclass(type_obj, numpy.integer):
123
- type_info[var_name] = TYPE_INTEGER
124
- elif numpy and issubclass(type_obj, numpy.floating):
125
- type_info[var_name] = TYPE_FLOATING
126
- else:
127
- type_info[var_name] = 'unknown(python:' + str(type_obj) + ')'
47
+ type_info[var_name] = _infer_type_dict(var_name, var_content)
128
48
 
129
49
  return type_info
50
+
51
+
52
+ def _infer_type_pandas_dataframe(var_name: str, var_content) -> str:
53
+ if var_content.empty:
54
+ return TYPE_UNKNOWN
55
+ elif var_content.isna().all():
56
+ return TYPE_UNKNOWN
57
+
58
+ lp_dtype = TYPE_UNKNOWN
59
+ time_zone = None
60
+ pandas_dtype = pandas.api.types.infer_dtype(var_content.values, skipna=True)
61
+
62
+ if pandas_dtype == "categorical":
63
+ dtype = var_content.cat.categories.dtype
64
+
65
+ if numpy.issubdtype(dtype, numpy.integer):
66
+ lp_dtype = TYPE_INTEGER
67
+ elif numpy.issubdtype(dtype, numpy.floating):
68
+ lp_dtype = TYPE_FLOATING
69
+ elif numpy.issubdtype(dtype, numpy.object_):
70
+ # Check if all elements are strings
71
+ if all(isinstance(x, str) for x in var_content.cat.categories):
72
+ lp_dtype = TYPE_STRING
73
+ else:
74
+ # see https://pandas.pydata.org/docs/reference/api/pandas.api.types.infer_dtype.html
75
+ if pandas_dtype == 'string':
76
+ lp_dtype = TYPE_STRING
77
+ elif pandas_dtype == 'floating':
78
+ lp_dtype = TYPE_FLOATING
79
+ elif pandas_dtype == 'integer':
80
+ lp_dtype = TYPE_INTEGER
81
+ elif pandas_dtype == 'boolean':
82
+ lp_dtype = TYPE_BOOLEAN
83
+
84
+ elif pandas_dtype == 'datetime64' or pandas_dtype == 'datetime':
85
+ lp_dtype = TYPE_DATE_TIME
86
+ elif pandas_dtype == "date":
87
+ lp_dtype = TYPE_DATE
88
+ elif pandas_dtype == "time":
89
+ lp_dtype = TYPE_TIME
90
+
91
+ elif pandas_dtype == 'empty': # for columns with all None values
92
+ lp_dtype = TYPE_UNKNOWN
93
+ else:
94
+ lp_dtype = 'unknown(pandas:' + pandas_dtype + ')'
95
+
96
+ return lp_dtype
97
+
98
+
99
+ def _infer_type_polars_dataframe(var_name: str, var_type) -> str:
100
+ lp_dtype = TYPE_UNKNOWN
101
+
102
+ # https://docs.pola.rs/api/python/stable/reference/datatypes.html
103
+ if var_type in PL_FLOAT_DTYPES:
104
+ lp_dtype = TYPE_FLOATING
105
+ elif var_type in PL_INTEGER_DTYPES:
106
+ lp_dtype = TYPE_INTEGER
107
+ elif var_type == pl.datatypes.String:
108
+ lp_dtype = TYPE_STRING
109
+ elif var_type == pl.datatypes.Boolean:
110
+ lp_dtype = TYPE_BOOLEAN
111
+
112
+ elif var_type == pl.datatypes.Datetime:
113
+ lp_dtype = TYPE_DATE_TIME
114
+ elif var_type == pl.datatypes.Date:
115
+ lp_dtype = TYPE_DATE
116
+ elif var_type == pl.datatypes.Time:
117
+ lp_dtype = TYPE_TIME
118
+
119
+ else:
120
+ lp_dtype = 'unknown(polars:' + str(var_type) + ')'
121
+
122
+ return lp_dtype
123
+
124
+
125
+ def _infer_type_dict(var_name: str, var_content) -> str:
126
+ if isinstance(var_content, Iterable):
127
+ if not any(True for _ in var_content): # empty
128
+ return TYPE_UNKNOWN
129
+ else:
130
+ return TYPE_UNKNOWN
131
+
132
+ type_set = set(type(val) for val in var_content)
133
+ if type(None) in type_set:
134
+ type_set.remove(type(None))
135
+
136
+ if len(type_set) == 0:
137
+ return TYPE_UNKNOWN
138
+
139
+ if len(type_set) > 1:
140
+ if all(issubclass(type_obj, int) or issubclass(type_obj, float) for type_obj in type_set):
141
+ return TYPE_FLOATING
142
+ else:
143
+ return 'unknown(mixed types)'
144
+
145
+ lp_dtype = TYPE_UNKNOWN
146
+ type_obj = list(type_set)[0]
147
+ if type_obj == bool:
148
+ lp_dtype = TYPE_BOOLEAN
149
+ elif issubclass(type_obj, int):
150
+ lp_dtype = TYPE_INTEGER
151
+ elif issubclass(type_obj, float):
152
+ lp_dtype = TYPE_FLOATING
153
+ elif issubclass(type_obj, str):
154
+ lp_dtype = TYPE_STRING
155
+
156
+ elif issubclass(type_obj, datetime):
157
+ lp_dtype = TYPE_DATE_TIME
158
+ elif issubclass(type_obj, date) and not issubclass(type_obj, datetime):
159
+ lp_dtype = TYPE_DATE
160
+ elif issubclass(type_obj, time):
161
+ lp_dtype = TYPE_TIME
162
+
163
+ elif numpy and issubclass(type_obj, numpy.datetime64):
164
+ lp_dtype = TYPE_DATE_TIME
165
+ elif numpy and issubclass(type_obj, numpy.timedelta64):
166
+ # ToDo: time delta?
167
+ # lp_dtype = TYPE_DATE_TIME
168
+ lp_dtype = 'unknown(python:' + str(type_obj) + ')'
169
+
170
+ elif numpy and issubclass(type_obj, numpy.integer):
171
+ lp_dtype = TYPE_INTEGER
172
+ elif numpy and issubclass(type_obj, numpy.floating):
173
+ lp_dtype = TYPE_FLOATING
174
+ else:
175
+ lp_dtype = 'unknown(python:' + str(type_obj) + ')'
176
+
177
+ return lp_dtype
178
+
179
+
180
+ def _detect_time_zone(var_name: str, data: Union[Dict, 'pandas.DataFrame', 'polars.DataFrame']) -> Optional[str]:
181
+ if is_pandas_data_frame(data):
182
+ if var_name in data:
183
+ var_content = data[var_name]
184
+ if hasattr(var_content, 'dt') and hasattr(var_content.dt, 'tz') and var_content.dt.tz is not None:
185
+ return str(var_content.dt.tz)
186
+ elif is_polars_dataframe(data):
187
+ if var_name in data.columns:
188
+ col_dtype = data[var_name].dtype
189
+ if hasattr(col_dtype, 'time_zone'):
190
+ if col_dtype.time_zone is not None:
191
+ return str(col_dtype.time_zone)
192
+ elif isinstance(data, dict):
193
+ if var_name in data:
194
+ var_content = data[var_name]
195
+ if isinstance(var_content, Iterable):
196
+ for val in var_content:
197
+ if isinstance(val, datetime) and val.tzinfo is not None:
198
+ return str(val.tzinfo)
199
+
200
+ # NumPy datetime64 objects don't store timezone information,
201
+ # so we can't extract it from them.
202
+
203
+ return None
lets_plot/plot/stat.py CHANGED
@@ -32,7 +32,7 @@ def stat_summary(mapping=None, *, data=None, geom=None,
32
32
  The geometry to display the summary stat for this layer, as a string.
33
33
  position : str or `FeatureSpec`, default='identity'
34
34
  Position adjustment.
35
- Either a position adjustment name: 'dodge', 'dodgev', 'jitter', 'nudge', 'jitterdodge', 'fill',
35
+ Either a position adjustment name: 'dodge', 'jitter', 'nudge', 'jitterdodge', 'fill',
36
36
  'stack' or 'identity', or the result of calling a position adjustment function (e.g., `position_dodge()` etc.).
37
37
  show_legend : bool, default=True
38
38
  False - do not show legend for this layer.
@@ -184,7 +184,7 @@ def stat_summary_bin(mapping=None, *, data=None, geom=None,
184
184
  The geometry to display the summary stat for this layer, as a string.
185
185
  position : str or `FeatureSpec`, default='identity'
186
186
  Position adjustment.
187
- Either a position adjustment name: 'dodge', 'dodgev', 'jitter', 'nudge', 'jitterdodge', 'fill',
187
+ Either a position adjustment name: 'dodge', 'jitter', 'nudge', 'jitterdodge', 'fill',
188
188
  'stack' or 'identity', or the result of calling a position adjustment function (e.g., `position_dodge()` etc.).
189
189
  show_legend : bool, default=True
190
190
  False - do not show legend for this layer.
@@ -344,7 +344,7 @@ def stat_ecdf(mapping=None, *, data=None, geom=None,
344
344
  The geometry to display the ecdf stat for this layer, as a string.
345
345
  position : str or `FeatureSpec`, default='identity'
346
346
  Position adjustment.
347
- Either a position adjustment name: 'dodge', 'dodgev', 'jitter', 'nudge', 'jitterdodge', 'fill',
347
+ Either a position adjustment name: 'dodge', 'jitter', 'nudge', 'jitterdodge', 'fill',
348
348
  'stack' or 'identity', or the result of calling a position adjustment function (e.g., `position_dodge()` etc.).
349
349
  show_legend : bool, default=True
350
350
  False - do not show legend for this layer.
@@ -485,7 +485,7 @@ def stat_sum(mapping=None, *, data=None, geom=None, position=None, show_legend=N
485
485
  The geometry to display the sum stat for this layer, as a string.
486
486
  position : str or `FeatureSpec`, default='identity'
487
487
  Position adjustment.
488
- Either a position adjustment name: 'dodge', 'dodgev', 'jitter', 'nudge', 'jitterdodge', 'fill',
488
+ Either a position adjustment name: 'dodge', 'jitter', 'nudge', 'jitterdodge', 'fill',
489
489
  'stack' or 'identity', or the result of calling a position adjustment function (e.g., `position_dodge()` etc.).
490
490
  show_legend : bool, default=True
491
491
  False - do not show legend for this layer.